In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Step 1: Data Collection (Simulated here)
# Normally, you would fetch this data using an API or CSV file
# Here we are creating a simple example DataFrame

# Simulated historical options data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=100)
data = {
    'Date': dates,
    'Underlying_Price': np.random.uniform(15000, 17000, len(dates)),
    'Strike_Price': 16000,
    'Option_Price': np.random.uniform(100, 300, len(dates)),
    'Implied_Volatility': np.random.uniform(15, 25, len(dates)),
    'Delta': np.random.uniform(-1, 1, len(dates)),
    'Theta': np.random.uniform(-10, 10, len(dates)),
    'Gamma': np.random.uniform(0, 1, len(dates)),
    'Vega': np.random.uniform(0, 1, len(dates)),
    'Rho': np.random.uniform(-1, 1, len(dates)),
}
options_data = pd.DataFrame(data)

# Display the first few rows
print(options_data.head())

# Step 2: Feature Engineering
options_data['SMA_20'] = options_data['Option_Price'].rolling(window=20).mean()
options_data['Daily_Return'] = options_data['Option_Price'].pct_change()

# Drop rows with NaN values
options_data.dropna(inplace=True)

# Define the target variable (1 for price increase, 0 for decrease)
options_data['Target'] = (options_data['Option_Price'].shift(-1) > options_data['Option_Price']).astype(int)

# Select features and target
features = ['Underlying_Price', 'Implied_Volatility', 'Delta', 'Theta', 'Gamma', 'Vega', 'Rho', 'SMA_20', 'Daily_Return']
X = options_data[features]
y = options_data['Target']

# Step 3: Model Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Step 4: Backtesting (Simplified)
initial_balance = 10000
balance = initial_balance
shares = 0

# Add predictions to the original dataset with correct alignment
options_data['Prediction'] = np.nan
predictions_df = pd.DataFrame(data={'Prediction': y_pred}, index=X_test.index)
options_data.update(predictions_df)
options_data['Prediction'].fillna(method='ffill', inplace=True)

# Simulate a simple backtest strategy
for i in range(len(options_data) - 1):
    if options_data['Prediction'].iloc[i] == 1 and balance > 0:
        # Buy the option
        shares = balance / options_data['Option_Price'].iloc[i]
        balance = 0
    elif options_data['Prediction'].iloc[i] == 0 and shares > 0:
        # Sell the option
        balance = shares * options_data['Option_Price'].iloc[i]
        shares = 0

# Final balance
final_balance = balance + (shares * options_data['Option_Price'].iloc[-1])  # Include any remaining shares in the final balance
print(f"Final Balance: {final_balance:.2f}, Profit: {final_balance - initial_balance:.2f}")


        Date  Underlying_Price  Strike_Price  Option_Price  \
0 2023-01-01      15749.080238         16000    106.285837   
1 2023-01-02      16901.428613         16000    227.282082   
2 2023-01-03      16463.987884         16000    162.871196   
3 2023-01-04      16197.316968         16000    201.714138   
4 2023-01-05      15312.037281         16000    281.513295   

   Implied_Volatility     Delta     Theta     Gamma      Vega       Rho  
0           21.420316 -0.896637 -7.937523  0.698162  0.168935  0.065179  
1           15.841400  0.062709  8.051058  0.536096  0.278590 -0.896353  
2           16.616287  0.081270  0.105047  0.309528  0.177010 -0.326791  
3           23.985542  0.274860  6.529149  0.813795  0.088703 -0.731171  
4           21.064291  0.452183 -3.599008  0.684731  0.120636 -0.873250  
Model Accuracy: 0.60
Final Balance: 5869.42, Profit: -4130.58


  options_data['Prediction'].fillna(method='ffill', inplace=True)
