In [None]:
import pandas as pd

In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy.stats import zscore

In [None]:
df = pd.read_parquet('data.parquet')

display(df)

In [None]:
df['spread'] = df['banknifty'] - df['nifty']

In [None]:
df['z_score'] = zscore(df['spread'])
df['signal'] = np.where(df['z_score'] > 1, 1, np.where(df['z_score'] < -1, -1, 0))
df['position'] = df['signal'].shift(1)
df['strategy_returns'] = df['position'] * df['spread'] * (df['tte'] * 0.7)
base_model_pl = df['strategy_returns'].sum()
strategy_returns_std = df['strategy_returns'].std()
if strategy_returns_std == 0 or np.isnan(strategy_returns_std):
    base_model_sharpe_ratio = np.nan
    print("Warning: Standard deviation is zero or invalid. Sharpe ratio cannot be calculated.")
else:
    base_model_sharpe_ratio = (df['strategy_returns'].mean() / strategy_returns_std) * np.sqrt(len(df))

base_model_drawdown = (df['strategy_returns'].cumsum() / df['strategy_returns'].cumsum().cummax()).min()

print(f"Base Model Performance:")
print(f"Profit/Loss: {base_model_pl:.2f}")
print(f"Sharpe Ratio: {base_model_sharpe_ratio:.2f}")
print(f"Drawdown: {base_model_drawdown:.2f}")

In [None]:
# 2. Improved model: Linear regression
from sklearn.linear_model import LinearRegression
X = df[['banknifty', 'nifty', 'tte']].dropna()
y = df['spread'].dropna()
from sklearn.impute import SimpleImputer

# Impute missing values in X
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(df[['banknifty', 'nifty', 'tte']])

# Impute missing values in y
y = df['spread'].fillna(df['spread'].mean())

X = df[['banknifty', 'nifty', 'tte']].dropna(subset=['banknifty', 'nifty', 'tte'])
y = df['spread']
model = LinearRegression()
model.fit(X, y)

df['predicted_spread'] = model.predict(X)
df['residuals'] = df['spread'] - df['predicted_spread']
df['improved_signal'] = np.where(df['residuals'] > 0, 1, -1)
df['improved_position'] = df['improved_signal'].shift(1)
df['improved_strategy_returns'] = df['improved_position'] * df['residuals'] * (df['tte'] * 0.7)
improved_model_pl = df['improved_strategy_returns'].sum()

# Calculate Sharpe ratio with NaN handling
improved_strategy_returns_std = df['improved_strategy_returns'].std()
if improved_strategy_returns_std == 0 or np.isnan(improved_strategy_returns_std):
    improved_model_sharpe_ratio = np.nan
    print("Warning: Standard deviation is zero or invalid. Sharpe ratio cannot be calculated.")
else:
    improved_model_sharpe_ratio = (df['improved_strategy_returns'].mean() / improved_strategy_returns_std) * np.sqrt(len(df))

improved_model_drawdown = (df['improved_strategy_returns'].cumsum() / df['improved_strategy_returns'].cumsum().cummax()).min()

# 3. Compare models
print("\nBase Model Performance:")
print(f"Profit/Loss: {base_model_pl:.2f}")
print(f"Sharpe Ratio: {base_model_sharpe_ratio:.2f}")
print(f"Drawdown: {base_model_drawdown:.2f}")

print("\nImproved Model Performance:")
print(f"Profit/Loss: {improved_model_pl:.2f}")
print(f"Sharpe Ratio: {improved_model_sharpe_ratio:.2f}")
print(f"Drawdown: {improved_model_drawdown:.2f}")