<a href="https://colab.research.google.com/github/VCHULANI15/pricing-options-with-machine-learning/blob/main/Pricing_Options_With_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Importing necessary libraries for manipulating data
import numpy as np
import pandas as pd
import seaborn as sns

#Plot Library
import matplotlib.pyplot as plt
import plotly.express as px
from plotnine import *

#Sklearn Library
from sklearn.metrics import mean_absolute_error, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, median_absolute_error, mean_squared_error, r2_score, mean_absolute_error, classification_report
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

#Torch Data
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from scipy.stats import norm
from itertools import product


In [3]:
np.random.seed(42)

n_samples = 100000

Spot=np.random.uniform(50,150,n_samples)
Strike=np.random.uniform(50,150,n_samples)
TTM=np.random.uniform(0.25,2,n_samples)

r=np.random.uniform(0.01,0.05,n_samples)

sigma = np.random.uniform(0.1, 0.4, n_samples) + (Strike < Spot) * np.random.uniform(0.05, 0.15, n_samples)

options_df = pd.DataFrame({
    'Spot Price': Spot,
    'Strike Price': Strike,
    'TTM': TTM,
    'Riks-Free Rate': r,
    'Volatility': sigma
})

In [13]:
def black_scholes_put_price(Spot, Strike, TTM, r, sigma, q =0):
  d1 = (np.log(Spot/Strike) + ( r- q + 0.5 * sigma ** 2) * TTM) / (sigma * np.sqrt(TTM))
  d2 = d1 - sigma * np.sqrt(TTM)

  put_price = Strike * np.exp(-r * TTM) * norm.cdf(-d2) - Spot * np.exp(-q * TTM) * norm.cdf(-d1)
  return put_price

options_df['BS_Put_Price'] = black_scholes_put_price(
    options_df['Spot Price'],
    options_df['Strike Price'],
    options_df['TTM'],
    options_df['Riks-Free Rate'],
    options_df['Volatility']
)

options_df.head()

Unnamed: 0,Spot Price,Strike Price,TTM,Riks-Free Rate,Volatility,BS_Put_Price,Adjusted Volatility,Adjusted_BS_Put_Price
0,87.454012,108.077904,0.744529,0.016282,0.366686,24.191026,0.366686,41.00133
1,145.071431,102.697165,1.052684,0.01382,0.295664,2.102809,0.354691,12.805663
2,123.199394,85.103695,0.423627,0.015518,0.229967,0.029113,0.306606,0.296458
3,109.865848,99.321266,1.031965,0.02894,0.525141,15.590019,0.533712,38.078891
4,65.601864,86.509664,0.605392,0.045381,0.230878,19.06552,0.230878,27.187674


In [14]:
delta = (options_df['Spot Price'] - options_df['Strike Price']).abs()
skew_factor = np.where(options_df['Strike Price'] < options_df['Spot Price'], delta / options_df['Strike Price'], 0)
options_df['Adjusted Volatility'] = options_df['Volatility'] + skew_factor * np.random.uniform(0.05, 0.2, n_samples)

options_df['Adjusted_BS_Put_Price'] = black_scholes_put_price(
    options_df['Spot Price'],
    options_df['Strike Price'],
    options_df['TTM'],
    options_df['Riks-Free Rate'],
    options_df['Volatility'],
    options_df['Adjusted Volatility']
)

options_df.head()

Unnamed: 0,Spot Price,Strike Price,TTM,Riks-Free Rate,Volatility,BS_Put_Price,Adjusted Volatility,Adjusted_BS_Put_Price
0,87.454012,108.077904,0.744529,0.016282,0.366686,24.191026,0.366686,41.00133
1,145.071431,102.697165,1.052684,0.01382,0.295664,2.102809,0.342648,12.233704
2,123.199394,85.103695,0.423627,0.015518,0.229967,0.029113,0.281432,0.250768
3,109.865848,99.321266,1.031965,0.02894,0.525141,15.590019,0.536543,38.208139
4,65.601864,86.509664,0.605392,0.045381,0.230878,19.06552,0.230878,27.187674


In [25]:
X = options_df[['Spot Price', 'Strike Price', 'TTM', 'Riks-Free Rate', 'Volatility']]

y = options_df['Adjusted_BS_Put_Price']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.20, random_state=42)

X_train.shape, X_test.shape

((80000, 5), (20000, 5))

In [26]:
lr_model = LinearRegression()

lr_model.fit(X_train, y_train)

lr_predictions = lr_model.predict(X_test)

lr_mse = mean_squared_error(y_test, lr_predictions)
lr_mae = mean_absolute_error(y_test, lr_predictions)
lr_r2 = r2_score(y_test, lr_predictions)

lr_mse, lr_mae, lr_r2

(58.65204342046828, 6.092854620559996, 0.9067821860961106)

In [27]:
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

dt_predictions = dt_model.predict(X_test)

dt_mse = mean_squared_error(y_test, dt_predictions)

dt_mae = mean_absolute_error(y_test, dt_predictions)
dt_r2 = r2_score(y_test, dt_predictions)

dt_mse, dt_mae, dt_r2

(8.388591144985998, 2.1787879250087, 0.9866677086991955)

In [28]:
rf_model = RandomForestRegressor(n_estimators= 100, random_state = 42)

rf_model.fit(X_train, y_train)

rf_predictions = rf_model.predict(X_test)

rf_mse = mean_squared_error(y_test, rf_predictions)
rf_mae = mean_absolute_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)

rf_mse, rf_mae, rf_r2

(2.0595058544555345, 1.081118082052353, 0.9967267528584075)

In [29]:
gbr_model = GradientBoostingRegressor(random_state = 42)

gbr_model.fit(X_train, y_train)

gbr_predictions = gbr_model.predict(X_test)

gbr_mse = mean_squared_error(y_test, rf_predictions)
gbr_mae = mean_absolute_error(y_test, rf_predictions)
gbr_r2 = r2_score(y_test, rf_predictions)

gbr_mse, gbr_mae, gbr_r2

(2.0595058544555345, 1.081118082052353, 0.9967267528584075)

In [30]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

nn_model = MLPRegressor(hidden_layer_sizes =(100, 50), max_iter = 1000, random_state = 42)

nn_model.fit(X_train, y_train)
nn_predictions = nn_model.predict(X_test_scaled)

nn_mse = mean_squared_error(y_test, nn_predictions)
nn_mae = mean_absolute_error(y_test, nn_predictions)
nn_r2 = r2_score(y_test, nn_predictions)

nn_mse, nn_mae, nn_r2




(8205.653720925015, 72.06778126149392, -12.0415422363615)

In [33]:

results_df = pd.DataFrame({
'Model': ['Linear Regression', 'Decision Tree', 'Random Forest', 'Gradient␣Boosted Trees', 'Neural Network (MLP)'],
'Mean Squared Error': [lr_mse, dt_mse, rf_mse, gbr_mse, nn_mse],
'Mean Absolute Error': [lr_mae, dt_mae, rf_mae, gbr_mae, nn_mae],
'R-squared': [lr_r2, dt_r2, rf_r2, gbr_r2, nn_r2]
})
results_df


Unnamed: 0,Model,Mean Squared Error,Mean Absolute Error,R-squared
0,Linear Regression,58.652043,6.092855,0.906782
1,Decision Tree,8.388591,2.178788,0.986668
2,Random Forest,2.059506,1.081118,0.996727
3,Gradient␣Boosted Trees,2.059506,1.081118,0.996727
4,Neural Network (MLP),8205.653721,72.067781,-12.041542
