In [44]:
import numpy as np
import pandas as pd
from script.data_pipeline import shinyDataFetcher
import plotly.graph_objects as go
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels.regression.rolling import RollingOLS
import matplotlib.pyplot as plt
import itertools
import warnings
import statsmodels.tsa.stattools as ts
from IPython.display import HTML, display
import itables

warnings.filterwarnings("ignore")

def write_table(df, filename):
    html = itables.to_html_datatable(df, display_logo_when_loading=True)
    with open(f'../docs/dataframe/{filename}.html', 'w') as f:
        f.write(html)

EV_stocks = ["TSLA", "RIVN", "LCID", "F", "GM", "NIO", "XPEV", "BYDDF", "LI"]
EV_stocks_data = {}
for stock in EV_stocks:
    EV_stocks_data[stock] = shinyDataFetcher(asset=stock, durationStr="3 Y", barSizeSetting="1 day").fetch_asset_data()

log_prices = {}
for ticker in EV_stocks:
    df_temp = EV_stocks_data[ticker].copy()
    df_temp["log_close"] = np.log(df_temp["close"])
    log_prices[ticker] = df_temp.set_index("timestamp")["log_close"].dropna()

# # Loop over all unique pairs
# for s1, s2 in itertools.combinations(EV_stocks, 2):
#     # Align datasets by timestamp
#     print(f"Processing pair: {s1} & {s2}")
#     df_pair = pd.concat([log_prices[s1], log_prices[s2]], axis=1).dropna()
#     if df_pair.empty:
#         continue

#     X = log_prices[s1]
#     Y = log_prices[s2]
#     X_const = sm.add_constant(X)  # Adds a constant term to the predictor
#     model = sm.OLS(Y, X_const).fit()
#     print(f"OLS Coefficients: {model.params.to_dict()}, OLS p-value: {model.pvalues[1]:.4f}, OLS R-squared: {model.rsquared:.4f}")

#     # Perform cointegration test
#     result = coint(df_pair.iloc[:, 0], df_pair.iloc[:, 1])
#     test_stat, p_value, crit_values = result

#     print(f"Test Statistic: {test_stat:.3f}, p-value: {p_value:.4f}", f"Critical Values: {crit_values}")
#     print(f"r-square > 0.5? {'Yes' if model.rsquared > 0.5 else 'No'}")
#     print(f"Is cointegrated? {'Yes' if p_value < 0.05 else 'No'}")
#     print("-" * 50)

In [4]:
fig = go.Figure()
asset_a = "TSLA"
asset_b = "RIVN"
fig.add_trace(go.Scatter(x=EV_stocks_data[asset_a]["timestamp"], y=np.log(EV_stocks_data[asset_a]["close"]), mode="lines", name=f"{asset_a}"))
fig.add_trace(go.Scatter(x=EV_stocks_data[asset_b]["timestamp"], y=np.log(EV_stocks_data[asset_b]["close"]), mode="lines", name=f"{asset_b}"))
fig.update_layout(title=f"{asset_a}  vs. {asset_b} Stock Log Prices", xaxis_title="timestamp", yaxis_title="Price (USD)", legend=dict(x=0, y=1))
fig.show()
fig.write_html("html_plot/log_price.html")

In [None]:
X = np.log(EV_stocks_data[asset_a]["close"])
Y = np.log(EV_stocks_data[asset_b]["close"])
X_const = sm.add_constant(X)  # Adds a constant term to the predictor
model = sm.OLS(Y, X_const).fit()
print(model.summary())


result = ts.coint(X, Y)
print("Cointegration test result:")
print("========================================")
print("Test Statistic:", result[0])
print("p-value:", result[1])

                            OLS Regression Results                            
Dep. Variable:                  close   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.029
Method:                 Least Squares   F-statistic:                     23.15
Date:                Wed, 16 Apr 2025   Prob (F-statistic):           1.81e-06
Time:                        14:18:53   Log-Likelihood:                -372.81
No. Observations:                 753   AIC:                             749.6
Df Residuals:                     751   BIC:                             758.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.3888      0.305      4.555      0.0

In [6]:
def rolling_adf(spread_series, time_index, window=90):
    adf_stats = []
    p_values = []
    dates = []

    for i in range(window, len(spread_series)):
        window_data = spread_series.iloc[i - window : i]
        result = adfuller(window_data, maxlag=1, autolag=None)
        adf_stats.append(result[0])  # Test statistic
        p_values.append(result[1])  # p-value
        dates.append(time_index.iloc[i])
    return pd.DataFrame({"timestamp": dates, "adf_stat": adf_stats, "p_value": p_values})


trade_df = pd.merge(EV_stocks_data["RIVN"][["timestamp", "close"]], EV_stocks_data["TSLA"][["timestamp", "close"]], on="timestamp", suffixes=("_RIVN", "_TSLA"))
trade_df["log_close_TSLA"] = np.log(trade_df["close_TSLA"])
trade_df["log_close_RIVN"] = np.log(trade_df["close_RIVN"])
X = trade_df["log_close_TSLA"].astype(float)
Y = trade_df["log_close_RIVN"].astype(float)
X_with_const = sm.add_constant(X)
model = RollingOLS(endog=Y, exog=X_with_const, window=20)
rres = model.fit()
trade_df["hedge_ratio"] = rres.params["log_close_TSLA"]
trade_df["spread"] = trade_df["log_close_RIVN"] - trade_df["hedge_ratio"] * trade_df["log_close_TSLA"]
trade_df["z_score"] = (trade_df["spread"] - trade_df["spread"].rolling(window=20).mean()) / trade_df["spread"].rolling(window=20).std()
trade_df.dropna(inplace=True)
rolling_adf_df = rolling_adf(trade_df["spread"], trade_df["timestamp"], window=60)
trade_df = pd.merge(trade_df, rolling_adf_df, on="timestamp", how="left")
trade_df.dropna(inplace=True)

In [43]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=trade_df["timestamp"], y=trade_df["spread"], mode="lines", name=f"spread"))
fig.add_trace(go.Scatter(x=trade_df["timestamp"], y=trade_df["z_score"], mode="lines", name=f"z-score", opacity=0.4))
fig.add_hline(y=2, line_dash="dash", line_color="red", annotation_text="z-score upper threshold", annotation_position="top right", opacity=0.4)
fig.add_hline(y=-2, line_dash="dash", line_color="red", annotation_text="z-score lower threshold", annotation_position="bottom right", opacity=0.4)
fig.update_layout(title="spread through time")
fig.show()
# fig.write_html("html_plot/log_price.html")

In [7]:
fig = go.Figure()
rolling_window = 60
fig.add_trace(go.Scatter(x=rolling_adf_df["timestamp"], y=rolling_adf_df["p_value"], mode="lines", name="Rolling ADF p-value"))
fig.add_hline(y=0.05, line_dash="dash", line_color="red", annotation_text="Significance threshold (0.05)", annotation_position="bottom right")
fig.update_layout(title=f"Rolling ADF Test on Spread (window={rolling_window})", xaxis_title="timestamp", yaxis_title="p-value")
fig.show()
fig.write_html('html_plot/adf_test.html')

In [31]:
df = trade_df.copy()
df["position"] = 0
df["trade_entry"] = False
df["trade_exit"] = False
df["pnl"] = 0.0

in_position = False
position_type = 0
entry_index = None
entry_spread = None
entry_tsla_price = None
entry_rivn_price = None

blotter = []
ledger = []
notional = 10000
account_value = 100000
cumulative_pnl = 0.0
for i in range(len(df)):
    z = df["z_score"].iloc[i]
    spread = df["spread"].iloc[i]
    tsla_price = df["close_TSLA"].iloc[i]
    rivn_price = df["close_RIVN"].iloc[i]
    timestamp = df["timestamp"].iloc[i]
    beta = df["hedge_ratio"].iloc[i]
    adf_p_value = df["p_value"].iloc[i]

    if not in_position:

        if adf_p_value < 0.05 and z > 2:
            # short spread: short TSLA, long RIVN
            tsla_qty = -round(notional / tsla_price)
            rivn_qty = round((beta * notional) / rivn_price)
            df.at[df.index[i], "position"] = -1
            df.at[df.index[i], "trade_entry"] = True
            in_position = True
            position_type = -1
            entry_index = i
            entry_spread = spread
            entry_tsla_price = tsla_price
            entry_rivn_price = rivn_price
            blotter.append(
                {
                    "entry_time": timestamp,
                    "entry_tsla": tsla_price,
                    "entry_rivn": rivn_price,
                    "direction": "short TSLA, long RIVN",
                    "entry_spread": spread,
                    "hedge_ratio": beta,
                    "TSLA_qty": tsla_qty,
                    "RIVN_qty": rivn_qty,
                }
            )
        elif adf_p_value < 0.05 and z < -2:
            # long spread: long TSLA, short RIVN
            tsla_qty = round(notional / tsla_price)
            rivn_qty = -round((beta * notional) / rivn_price)
            df.at[df.index[i], "position"] = 1
            df.at[df.index[i], "trade_entry"] = True
            in_position = True
            position_type = 1
            entry_index = i
            entry_spread = spread
            entry_tsla_price = tsla_price
            entry_rivn_price = rivn_price
            entry_beta = beta
            blotter.append(
                {
                    "entry_time": timestamp,
                    "entry_tsla": tsla_price,
                    "entry_rivn": rivn_price,
                    "direction": "long TSLA, short RIVN",
                    "entry_spread": spread,
                    "hedge_ratio": beta,
                    "TSLA_qty": tsla_qty,
                    "RIVN_qty": rivn_qty,
                }
            )
    else:
        stop_loss_triggered = (z > 3 and position_type == -1) or (z < -3 and position_type == 1)
        mean_reversion_triggered = abs(z) < 0.5
        timeout_triggered = i - entry_index >= 10

        if stop_loss_triggered or mean_reversion_triggered or timeout_triggered:
            reason = "stop_loss" if stop_loss_triggered else "mean_reversion" if mean_reversion_triggered else "timeout"
            df.at[df.index[i], "position"] = 0
            df.at[df.index[i], "trade_exit"] = True

            # Get previous quantities
            tsla_qty = blotter[-1]["TSLA_qty"]
            rivn_qty = blotter[-1]["RIVN_qty"]

            pnl_tsla = (tsla_price - entry_tsla_price) * tsla_qty
            pnl_rivn = (rivn_price - entry_rivn_price) * rivn_qty
            pnl = pnl_tsla + pnl_rivn
            df.at[df.index[i], "pnl"] = pnl
            cumulative_pnl += pnl
            account_value += pnl

            blotter[-1].update({"exit_time": timestamp, "exit_tsla": tsla_price, "exit_rivn": rivn_price, "exit_spread": spread, "pnl": pnl, "reason": reason})

            in_position = False
            position_type = 0
            entry_index = None
            entry_spread = None
            entry_tsla_price = None
            entry_rivn_price = None
        else:
            df.at[df.index[i], "position"] = position_type

    ledger.append({"timestamp": timestamp, "account_value": account_value, "position": position_type, "daily_pnl": df.at[df.index[i], "pnl"], "cumulative_pnl": cumulative_pnl})

blotter_df = pd.DataFrame(blotter)
ledger_df = pd.DataFrame(ledger)

In [45]:
blotter_df
write_table(blotter_df, 'blotter')

In [46]:
ledger_df
write_table(ledger_df, 'ledger')

In [None]:
# Step-by-step trade logic
df = trade_df.copy()
df["position"] = 0  # 1: long spread, -1: short spread
df["trade_entry"] = False
df["trade_exit"] = False
df["pnl"] = 0.0

in_position = False
position_type = 0
entry_index = None
entry_spread = None

for i in range(len(df)):
    z = df["z_score"].iloc[i]
    spread = df["spread"].iloc[i]

    if not in_position:
        if z > 2:
            df.at[df.index[i], "position"] = -1
            df.at[df.index[i], "trade_entry"] = True
            in_position = True
            position_type = -1
            entry_index = i
            entry_spread = spread
        elif z < -2:
            df.at[df.index[i], "position"] = 1
            df.at[df.index[i], "trade_entry"] = True
            in_position = True
            position_type = 1
            entry_index = i
            entry_spread = spread
    else:
        stop_loss_triggered = (z > 3 and position_type == -1) or (z < -3 and position_type == 1)
        mean_reversion_triggered = abs(z) < 0.5
        timeout_triggered = i - entry_index >= 10

        if stop_loss_triggered or mean_reversion_triggered or timeout_triggered:
            df.at[df.index[i], "position"] = 0
            df.at[df.index[i], "trade_exit"] = True
            pnl = -(spread - entry_spread)
            df.at[df.index[i], "pnl"] = pnl
            in_position = False
            position_type = 0
            entry_index = None
            entry_spread = None
        else:
            df.at[df.index[i], "position"] = position_type
spread_mean = df["spread"].mean()
spread_std = df["spread"].std()
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df["spread"], mode="lines", name="Spread"))
fig.add_trace(go.Scatter(x=[df.index[0], df.index[-1]], y=[0, 0], mode="lines", name="Zero Line", line=dict(dash="dash", color="black")))
fig.add_trace(go.Scatter(x=[df.index[0], df.index[-1]], y=[spread_mean + 2 * spread_std, spread_mean + 2 * spread_std], mode="lines", name="+2σ", line=dict(dash="dash", color="red")))
fig.add_trace(go.Scatter(x=[df.index[0], df.index[-1]], y=[spread_mean - 2 * spread_std, spread_mean - 2 * spread_std], mode="lines", name="-2σ", line=dict(dash="dash", color="red")))
fig.add_trace(go.Scatter(x=[df.index[0], df.index[-1]], y=[spread_mean + 0.5 * spread_std, spread_mean + 0.5 * spread_std], mode="lines", name="+0.5σ", line=dict(dash="dash", color="green")))
fig.add_trace(go.Scatter(x=[df.index[0], df.index[-1]], y=[spread_mean - 0.5 * spread_std, spread_mean - 0.5 * spread_std], mode="lines", name="-0.5σ", line=dict(dash="dash", color="green")))
entries_df = df[df["trade_entry"]]
exits_df = df[df["trade_exit"]]
fig.add_trace(go.Scatter(x=entries_df.index, y=entries_df["spread"], mode="markers", name="Entry", marker=dict(color="blue", size=10)))
fig.add_trace(go.Scatter(x=exits_df.index, y=exits_df["spread"], mode="markers", name="Exit", marker=dict(color="purple", size=10)))
fig.update_layout(title="Spread and Trade Entry/Exit Points", xaxis_title="Timestamp", yaxis_title="Spread", template="plotly_white")
fig.show()
fig.write_html('html_plot/spread.html')

In [48]:
# Create cumulative PnL column
df["cumulative_pnl"] = df["pnl"].cumsum()

# Trade summary metrics
total_trades = df["trade_exit"].sum()
total_pnl = df["pnl"].sum()
winning_trades = df[df["pnl"] > 0]["pnl"].count()
losing_trades = df[df["pnl"] < 0]["pnl"].count()
avg_return = df["pnl"][df["pnl"] != 0].mean()
win_rate = winning_trades / total_trades if total_trades > 0 else np.nan
entry_dates = df.loc[df["trade_entry"], "timestamp"].reset_index(drop=True)
exit_dates = df.loc[df["trade_exit"], "timestamp"].reset_index(drop=True)
holding_durations = [(exit_date - entry_date).days for entry_date, exit_date in zip(entry_dates, exit_dates)]
avg_holding_period = np.mean(holding_durations) if holding_durations else np.nan


# Summary dictionary
summary = {
    "Total Trades": total_trades,
    "Total PnL": round(total_pnl, 4),
    "Winning Trades": winning_trades,
    "Losing Trades": losing_trades,
    "Win Rate (%)": round(win_rate * 100, 2) if not np.isnan(win_rate) else "N/A",
    "Average Trade Return": round(avg_return, 4) if not np.isnan(avg_return) else "N/A",
    "Average Holding Period (days)": round(avg_holding_period, 2) if not np.isnan(avg_holding_period) else "N/A",
}

# Plot cumulative PnL with Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=ledger_df.index, y=ledger_df["cumulative_pnl"], mode="lines", name="Cumulative PnL"))
fig.update_layout(title="Cumulative PnL Over Time", xaxis_title="Date", yaxis_title="Cumulative PnL")
fig.show()
fig.write_html("html_plot/cumulative_pnl.html")

In [14]:
df

Unnamed: 0,timestamp,close_RIVN,close_TSLA,log_close_TSLA,log_close_RIVN,hedge_ratio,spread,z_score,adf_stat,p_value,position,trade_entry,trade_exit,pnl,cumulative_pnl
60,2022-09-07,33.25,283.70,5.647917,3.504055,1.136397,-2.914224,-1.467068,-2.822955,0.055069,0,False,False,0.0,0.000000
61,2022-09-08,36.88,289.26,5.667326,3.607669,1.094585,-2.595703,-1.117577,-2.793230,0.059247,0,False,False,0.0,0.000000
62,2022-09-09,37.55,299.68,5.702715,3.625673,1.214359,-3.299473,-1.426614,-2.774224,0.062051,0,False,False,0.0,0.000000
63,2022-09-12,38.74,304.42,5.718408,3.656873,1.224588,-3.345824,-1.302861,-2.617943,0.089345,0,False,False,0.0,0.000000
64,2022-09-13,38.76,292.13,5.677199,3.657389,1.228464,-3.316846,-1.159201,-2.653474,0.082441,0,False,False,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709,2025-04-09,11.77,272.20,5.606537,2.465554,0.652059,-1.190237,-0.995133,-2.315998,0.166860,0,False,False,0.0,19.176221
710,2025-04-10,11.46,252.40,5.531015,2.438863,0.623627,-1.010428,-0.777499,-2.395515,0.143006,0,False,False,0.0,19.176221
711,2025-04-11,11.47,252.31,5.530658,2.439735,0.612836,-0.949651,-0.702230,-2.686553,0.076394,0,False,False,0.0,19.176221
712,2025-04-14,12.03,252.35,5.530817,2.487404,0.594488,-0.800603,-0.523876,-2.885937,0.047008,0,False,False,0.0,19.176221


In [10]:
import pprint
pprint.pprint(summary)

{'Average Holding Period (days)': np.float64(9.69),
 'Average Trade Return': np.float64(0.5479),
 'Losing Trades': np.int64(16),
 'Total PnL': np.float64(19.1762),
 'Total Trades': np.int64(35),
 'Win Rate (%)': np.float64(54.29),
 'Winning Trades': np.int64(19)}


In [11]:
trade_df.to_html('blotter.html')

'<table id="itables_817ad8db_4e43_4548_b32c_756cf7ab14c5" class="display nowrap" data-quarto-disable-processing="true" style="table-layout:auto;width:auto;margin:auto;caption-side:bottom">\n<thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>timestamp</th>\n      <th>close_RIVN</th>\n      <th>close_TSLA</th>\n      <th>log_close_TSLA</th>\n      <th>log_close_RIVN</th>\n      <th>hedge_ratio</th>\n      <th>spread</th>\n      <th>z_score</th>\n      <th>adf_stat</th>\n      <th>p_value</th>\n      <th>position</th>\n      <th>trade_entry</th>\n      <th>trade_exit</th>\n      <th>pnl</th>\n      <th>cumulative_pnl</th>\n    </tr>\n  </thead><tbody><tr>\n<td style="vertical-align:middle; text-align:left">\n\nLoading ITables v2.3.0 from the internet...\n(need <a href=https://mwouts.github.io/itables/troubleshooting.html>help</a>?)</td>\n</tr></tbody>\n</table>\n<link href="https://www.unpkg.com/dt_for_itables@2.2.0/dt_bundle.css" rel="stylesheet">\n<script type="mod

In [16]:
display(HTML(html))

Unnamed: 0,timestamp,close_RIVN,close_TSLA,log_close_TSLA,log_close_RIVN,hedge_ratio,spread,z_score,adf_stat,p_value,position,trade_entry,trade_exit,pnl,cumulative_pnl
Loading ITables v2.3.0 from the internet... (need help?),,,,,,,,,,,,,,,
