In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error



In [54]:
df = pd.read_parquet('data.parquet')
df

Unnamed: 0_level_0,banknifty,nifty,tte
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01 09:15:00,0.286058,0.199729,27
2021-01-01 09:16:00,0.285381,0.200433,27
2021-01-01 09:17:00,0.284233,0.200004,27
2021-01-01 09:18:00,0.286104,0.199860,27
2021-01-01 09:19:00,0.285539,0.198951,27
...,...,...,...
2022-06-30 15:26:00,0.240701,0.214758,28
2022-06-30 15:27:00,0.240875,0.216558,28
2022-06-30 15:28:00,0.242115,0.216794,28
2022-06-30 15:29:00,0.243426,0.216455,28


In [55]:
df.fillna(df.mean(), inplace=True)
df

Unnamed: 0_level_0,banknifty,nifty,tte
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01 09:15:00,0.286058,0.199729,27
2021-01-01 09:16:00,0.285381,0.200433,27
2021-01-01 09:17:00,0.284233,0.200004,27
2021-01-01 09:18:00,0.286104,0.199860,27
2021-01-01 09:19:00,0.285539,0.198951,27
...,...,...,...
2022-06-30 15:26:00,0.240701,0.214758,28
2022-06-30 15:27:00,0.240875,0.216558,28
2022-06-30 15:28:00,0.242115,0.216794,28
2022-06-30 15:29:00,0.243426,0.216455,28


In [56]:
df.columns

Index(['banknifty', 'nifty', 'tte'], dtype='object')

In [57]:
df['spread'] = df['banknifty'] - df['nifty']
df['z_score'] = (df['spread'] - df['spread'].mean()) / df['spread'].std()





In [58]:
#thresholds for entering and exiting trades
threshold = 1.0


In [59]:
df['Signal'] = 0  # Exit signal
df.loc[df['z_score'] > threshold, 'Signal'] = -1  # Sell signal
df.loc[df['z_score'] < -threshold, 'Signal'] = 1  # Buy signal

df


Unnamed: 0_level_0,banknifty,nifty,tte,spread,z_score,Signal
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 09:15:00,0.286058,0.199729,27,0.086329,0.587711,0
2021-01-01 09:16:00,0.285381,0.200433,27,0.084948,0.535421,0
2021-01-01 09:17:00,0.284233,0.200004,27,0.084229,0.508167,0
2021-01-01 09:18:00,0.286104,0.199860,27,0.086244,0.584473,0
2021-01-01 09:19:00,0.285539,0.198951,27,0.086588,0.597522,0
...,...,...,...,...,...,...
2022-06-30 15:26:00,0.240701,0.214758,28,0.025943,-1.699596,1
2022-06-30 15:27:00,0.240875,0.216558,28,0.024317,-1.761204,1
2022-06-30 15:28:00,0.242115,0.216794,28,0.025321,-1.723156,1
2022-06-30 15:29:00,0.243426,0.216455,28,0.026971,-1.660657,1


In [60]:
# calculation of the P/L 
df['P/L'] = df['spread'] * (df['tte'] ** 0.7)

In [61]:
total_pl = df['P/L'].sum()
sharpe_ratio = df['P/L'].mean() / df['P/L'].std()
drawdown = (1 - df['P/L'].cumsum() / df['P/L'].cumsum().cummax()).max()

print(total_pl,sharpe_ratio,drawdown)

319317.09410491225 1.748508827460007 5.873183546722949e-06


In [62]:
X=df[['banknifty', 'nifty', 'tte']]
Y=df['spread']

In [63]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, Y)
df['Predicted_Spread'] = model.predict(X)
df

Unnamed: 0_level_0,banknifty,nifty,tte,spread,z_score,Signal,P/L,Predicted_Spread
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-01 09:15:00,0.286058,0.199729,27,0.086329,0.587711,0,0.867184,0.086363
2021-01-01 09:16:00,0.285381,0.200433,27,0.084948,0.535421,0,0.853317,0.084907
2021-01-01 09:17:00,0.284233,0.200004,27,0.084229,0.508167,0,0.846089,0.084258
2021-01-01 09:18:00,0.286104,0.199860,27,0.086244,0.584473,0,0.866325,0.086301
2021-01-01 09:19:00,0.285539,0.198951,27,0.086588,0.597522,0,0.869786,0.086584
...,...,...,...,...,...,...,...,...
2022-06-30 15:26:00,0.240701,0.214758,28,0.025943,-1.699596,1,0.267320,0.025994
2022-06-30 15:27:00,0.240875,0.216558,28,0.024317,-1.761204,1,0.250560,0.024275
2022-06-30 15:28:00,0.242115,0.216794,28,0.025321,-1.723156,1,0.260910,0.025273
2022-06-30 15:29:00,0.243426,0.216455,28,0.026971,-1.660657,1,0.277912,0.026751


In [64]:
#check the model performance
print(model.score(X,Y))

0.9999944776907865


In [65]:
df['z_score_advanced'] = (df['Predicted_Spread'] - df['Predicted_Spread'].mean()) / df['Predicted_Spread'].std()
df['Signal_advanced'] = 0  # Exit signal
df.loc[df['z_score_advanced'] > threshold, 'Signal_advanced'] = -1  # Sell signal
df.loc[df['z_score_advanced'] < -threshold, 'Signal_advanced'] = 1  # Buy signal


In [66]:
df

Unnamed: 0_level_0,banknifty,nifty,tte,spread,z_score,Signal,P/L,Predicted_Spread,z_score_advanced,Signal_advanced
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-01-01 09:15:00,0.286058,0.199729,27,0.086329,0.587711,0,0.867184,0.086363,0.589055,0
2021-01-01 09:16:00,0.285381,0.200433,27,0.084948,0.535421,0,0.853317,0.084907,0.533891,0
2021-01-01 09:17:00,0.284233,0.200004,27,0.084229,0.508167,0,0.846089,0.084258,0.509317,0
2021-01-01 09:18:00,0.286104,0.199860,27,0.086244,0.584473,0,0.866325,0.086301,0.586684,0
2021-01-01 09:19:00,0.285539,0.198951,27,0.086588,0.597522,0,0.869786,0.086584,0.597396,0
...,...,...,...,...,...,...,...,...,...,...
2022-06-30 15:26:00,0.240701,0.214758,28,0.025943,-1.699596,1,0.267320,0.025994,-1.697767,1
2022-06-30 15:27:00,0.240875,0.216558,28,0.024317,-1.761204,1,0.250560,0.024275,-1.762857,1
2022-06-30 15:28:00,0.242115,0.216794,28,0.025321,-1.723156,1,0.260910,0.025273,-1.725069,1
2022-06-30 15:29:00,0.243426,0.216455,28,0.026971,-1.660657,1,0.277912,0.026751,-1.669088,1


In [67]:
df['P/L_advanced'] = df['Predicted_Spread'] * (df['tte'] ** 0.7)
total_pl_advanced = df['P/L_advanced'].sum()
sharpe_ratio_advanced = df['P/L_advanced'].mean() / df['P/L_advanced'].std()
drawdown_advanced = (1 - df['P/L_advanced'].cumsum() / df['P/L_advanced'].cumsum().cummax()).max()

print(total_pl_advanced,sharpe_ratio_advanced,drawdown_advanced)

319316.2190519026 1.7485648705737067 5.873208341333758e-06


In [68]:
print('Absolute P/L for base and advanced model respectively:',total_pl,',',total_pl_advanced)

Absolute P/L for base and advanced model respectively: 319317.09410491225 , 319316.2190519026


In [69]:
print('Sharpe Ratio for base and advanced model respectively:',sharpe_ratio,',',sharpe_ratio_advanced)

Sharpe Ratio for base and advanced model respectively: 1.748508827460007 , 1.7485648705737067


In [70]:
print('Drawdown for base and advanced model respectively:',drawdown,',',drawdown_advanced)

Drawdown for base and advanced model respectively: 5.873183546722949e-06 , 5.873208341333758e-06
