In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import zscore

# Load dataset
data = pd.read_parquet(r"C:\Users\Akarsha\Downloads\data (1).parquet")

# Drop rows with missing values
data.dropna(inplace=True)

# Assuming 'data' DataFrame contains columns: 'banknifty', 'nifty', 'tte'
# Assuming the dataset is sorted by timestamp and fills missing values

# Calculate spread
data['Spread'] = data['banknifty'] - data['nifty']

# Calculate z-score
data['Z_Score'] = zscore(data['Spread'])

# Feature Engineering
data['Spread_Ratio'] = data['banknifty'] / data['nifty']
data['Volatility_Difference'] = data['banknifty'] - data['nifty']

# Base model: Trading based on z-score
def base_model(data):
    positions = []
    for z in data['Z_Score']:
        if z > 1:
            positions.append(-1)  # Short Bank Nifty, Long Nifty
        elif z < -1:
            positions.append(1)   # Long Bank Nifty, Short Nifty
        else:
            positions.append(0)   # No position
    
    data['Position'] = positions
    data['PnL_Base_Model'] = data['Position'] * (data['tte'] ** 0.7) * data['Spread']
    
    # Calculate additional metrics
    pnl = data['PnL_Base_Model'].sum()
    sharpe_ratio = np.mean(data['PnL_Base_Model']) / np.std(data['PnL_Base_Model'])
    drawdown = np.min(np.cumsum(data['PnL_Base_Model']) - np.maximum.reduce(np.cumsum(data['PnL_Base_Model'])))
    
    return pnl, sharpe_ratio, drawdown

# Better model: Machine learning approach (Random Forest Regressor)
def better_model(data):
    # Feature engineering
    X = data[['banknifty', 'nifty', 'tte', 'Spread_Ratio', 'Volatility_Difference']]
    y = data['Spread']
    
    # Train-test split
    train_size = int(0.8 * len(data))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # Train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict spread movements
    data['Predicted_Spread'] = model.predict(X)
    
    # Trading based on predicted spread movements
    data['Position'] = np.where(data['Predicted_Spread'] > 0, 1, -1)
    data['PnL_Better_Model'] = data['Position'] * (data['tte'] ** 0.7) * data['Spread']
    
    # Calculate additional metrics
    pnl = data['PnL_Better_Model'].sum()
    sharpe_ratio = np.mean(data['PnL_Better_Model']) / np.std(data['PnL_Better_Model'])
    drawdown = np.min(np.cumsum(data['PnL_Better_Model']) - np.maximum.reduce(np.cumsum(data['PnL_Better_Model'])))
    
    return pnl, sharpe_ratio, drawdown

# Compare models
base_model_pnl, base_model_sharpe, base_model_drawdown = base_model(data)
better_model_pnl, better_model_sharpe, better_model_drawdown = better_model(data)

# Summary of results and findings
print("Base Model:")
print("PnL:", base_model_pnl)
print("Sharpe Ratio:", base_model_sharpe)
print("Drawdown:", base_model_drawdown)

print("\nBetter Model:")
print("PnL:", better_model_pnl)
print("Sharpe Ratio:", better_model_sharpe)
print("Drawdown:", better_model_drawdown)


Base Model:
PnL: -56132.667890946526
Sharpe Ratio: -0.2444764973484154
Drawdown: -76113.93252003896

Better Model:
PnL: 317677.05380497477
Sharpe Ratio: 1.7557234186393573
Drawdown: -317676.1866209265
