In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from fredapi import Fred
from datetime import datetime
from arch import arch_model

In [2]:
# Get current price of AAPL
ticker = 'AAPL' # AAPL Ticker

asset = yf.Ticker(ticker)
current_price = asset.history(period="1d")['Close'].iloc[-1]
print("Current Price of the Asset:", current_price)

Current Price of the Asset: 173.02999877929688


In [3]:
# Load Historical Options Data
# This is data describing the historical volatility at different dates in the past.
# The key for this data is 'date'.

# Load the CSV file into a pandas DataFrame
historical_volatility = pd.read_csv('volatility_history.csv')

# Drop rows where the value of the column 'act_symbol' is not equal to 'AAPL'
historical_volatility = historical_volatility[historical_volatility['act_symbol'] == 'AAPL']
print(historical_volatility.shape)
historical_volatility.head(10)



(707, 16)


Unnamed: 0,date,act_symbol,hv_current,hv_week_ago,hv_month_ago,hv_year_high,hv_year_high_date,hv_year_low,hv_year_low_date,iv_current,iv_week_ago,iv_month_ago,iv_year_high,iv_year_high_date,iv_year_low,iv_year_low_date
3,2019-02-09,AAPL,0.4373,0.5048,0.5061,0.5162,2019-01-04,0.1134,2018-06-22,0.228,0.2285,0.3371,0.4552,2018-12-24,0.1592,2018-05-25
534,2019-02-16,AAPL,0.2967,0.4373,0.4948,0.5162,2019-01-04,0.1134,2018-06-22,0.207,0.228,0.3109,0.4552,2018-12-24,0.1592,2018-05-25
1065,2019-02-23,AAPL,0.2726,0.2967,0.4738,0.5162,2019-01-04,0.1134,2018-06-22,0.1943,0.207,0.331,0.4552,2018-12-24,0.1592,2018-05-25
1596,2019-03-02,AAPL,0.2623,0.2726,0.5048,0.5162,2019-01-04,0.1134,2018-06-22,0.1893,0.1943,0.2383,0.4552,2018-12-24,0.1592,2018-05-25
2127,2019-03-09,AAPL,0.2541,0.2623,0.4373,0.5162,2019-01-04,0.1134,2018-06-22,0.2115,0.1893,0.2205,0.4552,2018-12-24,0.1592,2018-05-25
2658,2019-03-16,AAPL,0.173,0.2541,0.2967,0.5162,2019-01-04,0.1134,2018-06-22,0.1975,0.2115,0.22,0.4552,2018-12-24,0.1592,2018-05-25
3189,2019-03-23,AAPL,0.1864,0.173,0.2726,0.5162,2019-01-04,0.1134,2018-06-22,0.2492,0.1975,0.2039,0.4552,2018-12-24,0.1592,2018-05-25
3719,2019-03-30,AAPL,0.1926,0.1864,0.2627,0.5162,2019-01-04,0.1134,2018-06-22,0.2426,0.2492,0.1978,0.4552,2018-12-24,0.1592,2018-05-25
4250,2019-04-06,AAPL,0.1915,0.1926,0.2627,0.5162,2019-01-04,0.1134,2018-06-22,0.2818,0.2426,0.2045,0.4552,2018-12-24,0.1592,2018-05-25
4782,2019-04-13,AAPL,0.1938,0.1915,0.2531,0.5162,2019-01-04,0.1134,2018-06-22,0.2885,0.2818,0.1965,0.4552,2018-12-24,0.1592,2018-05-25


In [4]:
# Load Historical Options Data
# Keys: 'date', 'expiration', 'strike', 'call_put'

# Load the CSV file into a pandas DataFrame
historical_options = pd.read_csv('option_chain.csv')

# Drop rows where the value of the column 'act_symbol' is not equal to 'AAPL'
historical_options = historical_options[historical_options['act_symbol'] == 'AAPL']

print(historical_options.shape)
historical_options.head(10)

(93706, 13)


Unnamed: 0,date,act_symbol,expiration,strike,call_put,bid,ask,vol,delta,gamma,theta,vega,rho
0,2024-04-29,AAPL,2024-06-21,225.0,Put,51.1,51.9,0.415,-0.9635,0.0054,-0.0134,0.0512,-0.0614
1,2024-04-29,AAPL,2024-06-21,225.0,Call,0.11,0.13,0.3059,0.017,0.002,-0.0083,0.028,0.0041
2,2024-04-29,AAPL,2024-06-21,220.0,Put,46.1,47.05,0.3991,-0.9518,0.0062,-0.0165,0.0648,-0.0754
3,2024-04-29,AAPL,2024-06-21,220.0,Call,0.16,0.17,0.2965,0.0233,0.0028,-0.0106,0.0364,0.0056
4,2024-04-29,AAPL,2024-06-21,215.0,Put,40.95,41.9,0.3394,-0.9726,0.0067,-0.0081,0.041,-0.0402
5,2024-04-29,AAPL,2024-06-21,215.0,Call,0.22,0.23,0.2858,0.0316,0.0037,-0.0132,0.047,0.0076
6,2024-04-29,AAPL,2024-06-21,210.0,Put,35.9,36.9,0.3024,-0.9766,0.0076,-0.0057,0.0379,-0.0304
7,2024-04-29,AAPL,2024-06-21,210.0,Call,0.31,0.33,0.2761,0.0443,0.0051,-0.0169,0.0619,0.0107
8,2024-04-29,AAPL,2024-06-21,205.0,Put,31.0,31.95,0.2879,-0.9562,0.0094,-0.0101,0.0641,-0.0553
9,2024-04-29,AAPL,2024-06-21,205.0,Call,0.46,0.48,0.2672,0.0635,0.007,-0.0218,0.0823,0.0153


In [5]:
# Load Current Options Data
stock = yf.Ticker(ticker)
options_data = stock.option_chain()

# Fetch all available expiry dates
all_dates = stock.options  # This will fetch all available expiry dates
combined_df = pd.DataFrame()  # This will hold all the combined data

for date in all_dates:
    # Fetch the calls and puts for the current expiry date
    options_data = stock.option_chain(date)
    calls_df = options_data.calls
    puts_df = options_data.puts
    
    # Add a column for the expiration date to calls and puts DataFrames
    calls_df['expiration_date'] = date
    puts_df['expiration_date'] = date

    # Label each DataFrame
    calls_df['type'] = 'call'
    puts_df['type'] = 'put'
    
    # Concatenate calls and puts into one DataFrame and append to the combined DataFrame
    options_df = pd.concat([calls_df, puts_df])
    current_options = pd.concat([combined_df, options_df])

print(current_options.shape)
current_options.head() # Show the first few rows of the combined DataFrame, (was 'combined_df')

(77, 16)


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,expiration_date,type
0,AAPL261218C00050000,2024-05-02 19:41:23+00:00,50.0,126.53,124.5,129.5,3.830002,3.121436,3.0,9,0.598332,True,REGULAR,USD,2026-12-18,call
1,AAPL261218C00060000,2024-04-09 13:30:00+00:00,60.0,114.0,116.0,121.0,0.0,0.0,1.0,18,0.56873,True,REGULAR,USD,2026-12-18,call
2,AAPL261218C00070000,2024-04-30 17:09:00+00:00,70.0,110.4,107.5,112.5,0.0,0.0,20.0,161,0.536626,True,REGULAR,USD,2026-12-18,call
3,AAPL261218C00080000,2024-04-16 17:39:56+00:00,80.0,97.58,99.0,104.0,0.0,0.0,,100,0.503972,True,REGULAR,USD,2026-12-18,call
4,AAPL261218C00085000,2024-04-30 19:57:46+00:00,85.0,94.6,95.05,99.95,0.0,0.0,50.0,53,0.541692,True,REGULAR,USD,2026-12-18,call


In [6]:
# Convert 'lastTradeDate' column to datetime format
current_options['lastTradeDate'] = pd.to_datetime(current_options['lastTradeDate'])

# Create a new 'date' column with the desired format
current_options['date'] = current_options['lastTradeDate'].dt.strftime('%Y-%m-%d')

# Display the DataFrame with the new 'date' column
print(current_options.shape)
current_options.head(10)

(77, 17)


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,expiration_date,type,date
0,AAPL261218C00050000,2024-05-02 19:41:23+00:00,50.0,126.53,124.5,129.5,3.830002,3.121436,3.0,9,0.598332,True,REGULAR,USD,2026-12-18,call,2024-05-02
1,AAPL261218C00060000,2024-04-09 13:30:00+00:00,60.0,114.0,116.0,121.0,0.0,0.0,1.0,18,0.56873,True,REGULAR,USD,2026-12-18,call,2024-04-09
2,AAPL261218C00070000,2024-04-30 17:09:00+00:00,70.0,110.4,107.5,112.5,0.0,0.0,20.0,161,0.536626,True,REGULAR,USD,2026-12-18,call,2024-04-30
3,AAPL261218C00080000,2024-04-16 17:39:56+00:00,80.0,97.58,99.0,104.0,0.0,0.0,,100,0.503972,True,REGULAR,USD,2026-12-18,call,2024-04-16
4,AAPL261218C00085000,2024-04-30 19:57:46+00:00,85.0,94.6,95.05,99.95,0.0,0.0,50.0,53,0.541692,True,REGULAR,USD,2026-12-18,call,2024-04-30
5,AAPL261218C00090000,2024-04-30 19:58:08+00:00,90.0,91.0,91.0,94.65,0.0,0.0,150.0,161,0.502599,True,REGULAR,USD,2026-12-18,call,2024-04-30
6,AAPL261218C00095000,2024-05-02 19:31:06+00:00,95.0,89.03,87.0,91.5,5.330002,6.367983,40.0,10,0.503514,True,REGULAR,USD,2026-12-18,call,2024-05-02
7,AAPL261218C00100000,2024-05-02 19:49:22+00:00,100.0,85.4,83.0,86.85,2.400002,2.891568,5.0,413,0.478185,True,REGULAR,USD,2026-12-18,call,2024-05-02
8,AAPL261218C00105000,2024-04-30 19:53:09+00:00,105.0,80.75,79.0,84.0,0.0,0.0,1.0,34,0.48145,True,REGULAR,USD,2026-12-18,call,2024-04-30
9,AAPL261218C00110000,2024-04-22 15:39:44+00:00,110.0,71.05,75.85,79.35,0.0,0.0,12.0,114,0.456762,True,REGULAR,USD,2026-12-18,call,2024-04-22


In [7]:
# Risk Free Rate
# FRED
fred_api_key = '89cf57325599e66d278022b8e765d92c'

# Initialize the FRED API client
fred = Fred(api_key=fred_api_key)

# Get the three-month Treasury yield data
three_month_yield = fred.get_series('DTB3')

# Print the retrieved data
three_month_yield.tail()

2024-04-25    5.26
2024-04-26    5.25
2024-04-29    5.25
2024-04-30    5.25
2024-05-01    5.26
dtype: float64

In [8]:
# Define the current date and time
current_date = pd.Timestamp(datetime.utcnow())  # Using UTC time

In [9]:
# Get historical market data
historical_prices = asset.history(period="5y")

# Calculate returns
historical_prices['returns'] = np.log(historical_prices['Close'] / historical_prices['Close'].shift(1))

historical_prices['date'] = historical_prices.index
historical_prices['date'] = historical_prices['date'].dt.strftime('%Y-%m-%d')

# Calculating additional features from 'hist'
window_size = 21  # Define a window size for rolling computations
historical_prices['rolling_vol'] = historical_prices['returns'].rolling(window=window_size).std() * np.sqrt(252)  # Annualized volatility
historical_prices['sma_50'] = historical_prices['Close'].rolling(window=50).mean()
historical_prices['ema_21'] = historical_prices['Close'].ewm(span=21, adjust=False).mean()

# RSI Calculation
delta = historical_prices['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
historical_prices['rsi'] = 100 - (100 / (1 + rs))

historical_prices.dropna(inplace=True)
print(historical_prices.shape)
historical_prices.head()

(1210, 13)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,returns,date,rolling_vol,sma_50,ema_21,rsi
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-07-15 00:00:00-04:00,49.30846,49.73851,49.286716,49.579056,67789600,0.0,0.0,0.009351,2019-07-15,0.179018,46.732801,48.252031,61.776239
2019-07-16 00:00:00-04:00,49.429267,49.796503,49.165923,49.407524,67467200,0.0,0.0,-0.003466,2019-07-16,0.176823,46.701694,48.357076,67.272746
2019-07-17 00:00:00-04:00,49.29879,49.550053,49.110341,49.129669,56430000,0.0,0.0,-0.00564,2019-07-17,0.178786,46.68077,48.427311,57.795209
2019-07-18 00:00:00-04:00,49.286715,49.740927,49.214234,49.687775,74162400,0.0,0.0,0.011296,2019-07-18,0.165411,46.69806,48.541899,61.830489
2019-07-19 00:00:00-04:00,49.719181,49.890719,48.890489,48.946056,83717200,0.0,0.0,-0.01504,2019-07-19,0.174739,46.700324,48.578641,58.888455


In [10]:
# Check data types of the columns involved in the merge
print(historical_options.dtypes)
print(historical_volatility.dtypes)
print(current_options.dtypes)
print(historical_prices.dtypes)

date           object
act_symbol     object
expiration     object
strike        float64
call_put       object
bid           float64
ask           float64
vol           float64
delta         float64
gamma         float64
theta         float64
vega          float64
rho           float64
dtype: object
date                  object
act_symbol            object
hv_current           float64
hv_week_ago          float64
hv_month_ago         float64
hv_year_high         float64
hv_year_high_date     object
hv_year_low          float64
hv_year_low_date      object
iv_current           float64
iv_week_ago          float64
iv_month_ago         float64
iv_year_high         float64
iv_year_high_date     object
iv_year_low          float64
iv_year_low_date      object
dtype: object
contractSymbol                    object
lastTradeDate        datetime64[ns, UTC]
strike                           float64
lastPrice                        float64
bid                              float64
ask              

In [11]:
# Convert 'date' columns in each DataFrame to date-only format
historical_options['date'] = pd.to_datetime(historical_options['date']).dt.date
historical_volatility['date'] = pd.to_datetime(historical_volatility['date']).dt.date
current_options['date'] = pd.to_datetime(current_options['date']).dt.date
historical_prices['date'] = pd.to_datetime(historical_prices['date']).dt.date

# Check the unique dates again to ensure conversion was successful
print("Unique dates in historical_options:", historical_options['date'].unique())
print("Unique dates in historical_volatility:", historical_volatility['date'].unique())
print("Unique dates in current_options:", current_options['date'].unique())
print("Unique dates in hist:", historical_prices['date'].unique())

Unique dates in historical_options: [datetime.date(2024, 4, 29) datetime.date(2024, 4, 26)
 datetime.date(2024, 4, 24) datetime.date(2024, 4, 22)
 datetime.date(2024, 4, 19) datetime.date(2024, 4, 17)
 datetime.date(2024, 4, 15) datetime.date(2024, 4, 12)
 datetime.date(2024, 4, 10) datetime.date(2024, 4, 8)
 datetime.date(2024, 4, 5) datetime.date(2024, 4, 3)
 datetime.date(2024, 4, 1) datetime.date(2024, 3, 29)
 datetime.date(2024, 3, 27) datetime.date(2024, 3, 25)
 datetime.date(2024, 3, 22) datetime.date(2024, 3, 20)
 datetime.date(2024, 3, 18) datetime.date(2024, 3, 15)
 datetime.date(2024, 3, 13) datetime.date(2024, 3, 11)
 datetime.date(2024, 3, 8) datetime.date(2024, 3, 6)
 datetime.date(2024, 3, 4) datetime.date(2024, 3, 1)
 datetime.date(2024, 2, 28) datetime.date(2024, 2, 26)
 datetime.date(2024, 2, 23) datetime.date(2024, 2, 21)
 datetime.date(2024, 2, 19) datetime.date(2024, 2, 16)
 datetime.date(2024, 2, 14) datetime.date(2024, 2, 12)
 datetime.date(2024, 2, 9) datetime.d

In [12]:
# Merge data
nn_data = pd.merge(historical_options, historical_volatility, on='date', how='inner')
nn_data = pd.merge(nn_data, historical_prices, on=['date'], how='left')

# Check the result
print("Number of rows in merged DataFrame:", nn_data.shape[0])
nn_data.dropna(inplace=True)
nn_data.head()

Number of rows in merged DataFrame: 92748


Unnamed: 0,date,act_symbol_x,expiration,strike,call_put,bid,ask,vol,delta,gamma,...,Low,Close,Volume,Dividends,Stock Splits,returns,rolling_vol,sma_50,ema_21,rsi
9488,2023-11-13,AAPL,2023-12-29,240.0,Put,54.65,55.85,0.4335,-0.9781,0.005,...,183.975254,184.564499,43627500.0,0.0,0.0,-0.008621,0.200322,175.934003,177.696671,68.893427
9489,2023-11-13,AAPL,2023-12-29,240.0,Call,0.01,0.16,0.3402,0.0203,0.0022,...,183.975254,184.564499,43627500.0,0.0,0.0,-0.008621,0.200322,175.934003,177.696671,68.893427
9490,2023-11-13,AAPL,2023-12-29,235.0,Put,49.8,50.85,0.4238,-0.9636,0.0057,...,183.975254,184.564499,43627500.0,0.0,0.0,-0.008621,0.200322,175.934003,177.696671,68.893427
9491,2023-11-13,AAPL,2023-12-29,235.0,Call,0.01,0.16,0.3165,0.0216,0.0025,...,183.975254,184.564499,43627500.0,0.0,0.0,-0.008621,0.200322,175.934003,177.696671,68.893427
9492,2023-11-13,AAPL,2023-12-29,230.0,Put,44.8,45.85,0.3934,-0.9613,0.0065,...,183.975254,184.564499,43627500.0,0.0,0.0,-0.008621,0.200322,175.934003,177.696671,68.893427


In [13]:
print(nn_data.columns)  # Check if 'volume' and 'openInterest' are in nn_data
print(historical_prices.columns)  # Check if 'volume' and 'openInterest' are in historical_prices


Index(['date', 'act_symbol_x', 'expiration', 'strike', 'call_put', 'bid',
       'ask', 'vol', 'delta', 'gamma', 'theta', 'vega', 'rho', 'act_symbol_y',
       'hv_current', 'hv_week_ago', 'hv_month_ago', 'hv_year_high',
       'hv_year_high_date', 'hv_year_low', 'hv_year_low_date', 'iv_current',
       'iv_week_ago', 'iv_month_ago', 'iv_year_high', 'iv_year_high_date',
       'iv_year_low', 'iv_year_low_date', 'Open', 'High', 'Low', 'Close',
       'Volume', 'Dividends', 'Stock Splits', 'returns', 'rolling_vol',
       'sma_50', 'ema_21', 'rsi'],
      dtype='object')
Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'returns', 'date', 'rolling_vol', 'sma_50', 'ema_21', 'rsi'],
      dtype='object')


In [14]:
nn_data.shape

(68768, 40)

In [15]:
# Define features and labels
features = nn_data[[
    'hv_week_ago',      # Historical volatility one week ago
    'hv_month_ago',     # Historical volatility one month ago
    'iv_week_ago',      # Implied volatility one week ago
    'iv_month_ago',     # Implied volatility one month ago
    'rolling_vol',      # Rolling calculated volatility
    'returns',          # Historical returns
    'gamma',            # Option Greek Gamma
    'vega',             # Option Greek Vega
    'High',             # Highest price of the day
    'Low',              # Lowest price of the day
    'Close',            # Closing price of the day
    'Open',             # Opening price of the day
    'Volume',           # Trading volume
    'rsi',              # Relative Strength Index
    'sma_50',           # 50-day Simple Moving Average
    'ema_21'            # 21-day Exponential Moving Average
]]

labels = nn_data[['hv_current']]

In [16]:
# Normalize features
scaler = StandardScaler()
features = scaler.fit_transform(features)
features = np.nan_to_num(features)
labels = np.nan_to_num(labels.values.flatten())

# Convert to PyTorch tensors
X = torch.tensor(features, dtype=torch.float32)
y = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)

In [17]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create DataLoader instances
train_data = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
test_data = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False)

# Define neural network architecture
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(X_train.shape[1], 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.out(x)
        return x

# Instantiate model
model = NeuralNet()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=50):
    model.train()
    for epoch in range(num_epochs):
        for data, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train_model(model, train_loader, criterion, optimizer)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.014448199421167374
Epoch 2, Loss: 0.002776874927803874
Epoch 3, Loss: 0.02925787679851055
Epoch 4, Loss: 0.009987548924982548
Epoch 5, Loss: 0.06696757674217224
Epoch 6, Loss: 0.008510539308190346
Epoch 7, Loss: 0.017226332798600197
Epoch 8, Loss: 0.05674970895051956
Epoch 9, Loss: 0.009878125041723251
Epoch 10, Loss: 0.011879777535796165
Epoch 11, Loss: 0.006962781306356192
Epoch 12, Loss: 0.009657783433794975
Epoch 13, Loss: 0.009651553817093372
Epoch 14, Loss: 0.008731670677661896
Epoch 15, Loss: 0.005040444433689117
Epoch 16, Loss: 0.04131799191236496
Epoch 17, Loss: 0.01943567581474781
Epoch 18, Loss: 0.007277412340044975
Epoch 19, Loss: 0.00927568506449461
Epoch 20, Loss: 0.014682046137750149
Epoch 21, Loss: 0.00947065744549036
Epoch 22, Loss: 0.1327025294303894
Epoch 23, Loss: 0.1286233365535736
Epoch 24, Loss: 0.06862455606460571
Epoch 25, Loss: 0.004122513812035322
Epoch 26, Loss: 0.07612945884466171
Epoch 27, Loss: 0.016719795763492584
Epoch 28, Loss: 0.00439

In [18]:
print(len(train_loader.dataset))
print(len(test_loader.dataset))

55014
13754


In [19]:
model.eval()
np.set_printoptions(threshold=np.inf)
predictions = model(torch.Tensor(X_test))
print(predictions.detach().numpy())

[[0.29714742]
 [0.32766315]
 [0.30749127]
 [0.31258872]
 [0.32059035]
 [0.32477242]
 [0.37780172]
 [0.31758752]
 [0.2960116 ]
 [0.295927  ]
 [0.2934246 ]
 [0.31055033]
 [0.30261075]
 [0.29714742]
 [0.30844074]
 [0.2958472 ]
 [0.2920522 ]
 [0.30276462]
 [0.29714742]
 [0.309969  ]
 [0.29480898]
 [0.30816028]
 [0.29579997]
 [0.31674966]
 [0.30261075]
 [0.29384804]
 [0.29557067]
 [0.29309613]
 [0.3183519 ]
 [0.30261075]
 [0.32288945]
 [0.2952323 ]
 [0.31110692]
 [0.29398856]
 [0.2933084 ]
 [0.3249524 ]
 [0.30282173]
 [0.30856603]
 [0.29539555]
 [0.30959705]
 [0.30336046]
 [0.29507506]
 [0.2936474 ]
 [0.29356208]
 [0.31906235]
 [0.29550073]
 [0.29421017]
 [0.3114007 ]
 [0.2970503 ]
 [0.3042095 ]
 [0.30261075]
 [0.30814448]
 [0.30867735]
 [0.3040944 ]
 [0.2923539 ]
 [0.29519546]
 [0.3114798 ]
 [0.30261075]
 [0.30724454]
 [0.29472643]
 [0.30886257]
 [0.29714742]
 [0.3216877 ]
 [0.31040722]
 [0.3162188 ]
 [0.29714742]
 [0.30261075]
 [0.29714742]
 [0.30712146]
 [0.30261075]
 [0.29387322]
 [0.32

In [20]:
def evaluate_model(model, test_loader):
    model.eval()
    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            print(f'Test Loss: {loss.item()}')

evaluate_model(model, test_loader)

Test Loss: 0.05733265355229378
Test Loss: 0.00494686933234334
Test Loss: 0.00844472274184227
Test Loss: 0.010916532948613167
Test Loss: 0.007910086773335934
Test Loss: 0.010875855572521687
Test Loss: 0.004035358317196369
Test Loss: 0.006742102559655905
Test Loss: 0.006351395510137081
Test Loss: 0.009686642326414585
Test Loss: 0.006924772635102272
Test Loss: 0.008903161622583866
Test Loss: 0.012315340340137482
Test Loss: 0.008850705809891224
Test Loss: 0.04923093691468239
Test Loss: 0.07964035123586655
Test Loss: 0.004294846206903458
Test Loss: 0.08765391260385513
Test Loss: 0.018444033339619637
Test Loss: 0.05555961653590202
Test Loss: 0.004708257503807545
Test Loss: 0.010337340645492077
Test Loss: 0.0064382548443973064
Test Loss: 0.004484796896576881
Test Loss: 0.015068101696670055
Test Loss: 0.013268559239804745
Test Loss: 0.0075418646447360516
Test Loss: 0.010518049821257591
Test Loss: 0.010426982305943966
Test Loss: 0.06297214329242706
Test Loss: 0.05407411605119705
Test Loss: 0.00

  return F.mse_loss(input, target, reduction=self.reduction)


In [21]:
def predict_volatility(model, data_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for data, _ in data_loader:
            outputs = model(data)
            predictions.extend(outputs.squeeze().tolist())
    return predictions

predicted_volatility = predict_volatility(model, test_loader)
print(len(predicted_volatility))

13754


In [22]:
# BINOMIAL OPTION PRICING MODEL
# This implementation is for a simple European option using the Cox-Ross-Rubinstein binomial tree model.

def binomial_option_price(S, K, T, r, sigma, steps, option_type):
    # S: initial stock price
    # K: strike price
    # T: time to maturity
    # r: risk-free rate
    # sigma: volatility
    # steps: number of steps in the binomial tree
    # option_type: 'call' or 'put'
    
    # Calculate time step
    dt = T / steps

    # Calculate up and down factors
    u = np.exp(sigma * np.sqrt(dt))
    d = 1 / u

    # Calculate the probability of the price going up
    p = (np.exp(r * dt) - d) / (u - d)
    
    # Initialize arrays for stock prices and option values
    stock_price = np.zeros(steps + 1)
    option_value = np.zeros(steps + 1)
    
    # Generate stock prices at maturity
    stock_price[0] = S * d**steps
    for i in range(1, steps + 1):
        stock_price[i] = stock_price[i - 1] * (u / d)
    
    # Calculate option values at maturity
    if option_type == 'call':
        option_value = np.maximum(stock_price - K, 0)
    else:
        option_value = np.maximum(K - stock_price, 0)
    
    # Iterate backwards through the tree
    for i in range(steps - 1, -1, -1):
        for j in range(i + 1):
            option_value[j] = (p * option_value[j + 1] + (1 - p) * option_value[j]) * np.exp(-r * dt)
    
    return option_value[0]

In [23]:
# # Fit a basic GARCH(1,1) model
# garch_model = arch_model(hist['returns'].dropna(), vol='Garch', p=1, q=1)
# garch_result = garch_model.fit(update_freq=5)

# # Display the model summary
# print(garch_result.summary())

# # Forecast the next step
# forecast = garch_result.forecast(horizon=1)
# volatility = np.sqrt(forecast.variance.values[-1, :][0])

In [24]:
returns = nn_data['returns']

In [25]:
print(predicted_volatility)

[0.2971474230289459, 0.3276631534099579, 0.307491272687912, 0.31258872151374817, 0.3205903470516205, 0.3247724175453186, 0.37780171632766724, 0.3175875246524811, 0.2960115969181061, 0.2959269881248474, 0.2934246063232422, 0.310550332069397, 0.30261075496673584, 0.2971474230289459, 0.3084407448768616, 0.29584720730781555, 0.2920522093772888, 0.3027646243572235, 0.2971474230289459, 0.3099690079689026, 0.2948089838027954, 0.30816027522087097, 0.29579997062683105, 0.3167496621608734, 0.30261075496673584, 0.29384803771972656, 0.2955706715583801, 0.293096125125885, 0.3183518946170807, 0.30261075496673584, 0.32288944721221924, 0.29523229598999023, 0.31110692024230957, 0.29398855566978455, 0.29330840706825256, 0.3249523937702179, 0.30282172560691833, 0.30856603384017944, 0.29539555311203003, 0.30959704518318176, 0.3033604621887207, 0.29507505893707275, 0.2936474084854126, 0.2935620844364166, 0.31906235218048096, 0.2955007255077362, 0.29421016573905945, 0.31140071153640747, 0.2970502972602844, 

In [26]:
# Convert predicted volatility to initial variances (squared volatility)
initial_variances = np.array(predicted_volatility) ** 2

# GARCH Model
garch_model = arch_model(returns*100, vol='Garch', p=1, q=1)
garch_model.volatility.starting_vals = initial_variances[0]  # Setting the initial condition

# Fit the model
garch_result = garch_model.fit(update_freq=5)
print(garch_result.summary())

Iteration:      5,   Func. Count:     33,   Neg. LLF: 749147482.7910933
Iteration:     10,   Func. Count:     68,   Neg. LLF: 757958.7914420202
Iteration:     15,   Func. Count:     97,   Neg. LLF: 100021.40330764103
Iteration:     20,   Func. Count:    130,   Neg. LLF: 129365207799.27676
Optimization terminated successfully    (Exit mode 0)
            Current function value: 96133.13684576965
            Iterations: 25
            Function evaluations: 153
            Gradient evaluations: 22
                     Constant Mean - GARCH Model Results                      
Dep. Variable:                returns   R-squared:                       0.000
Mean Model:             Constant Mean   Adj. R-squared:                  0.000
Vol Model:                      GARCH   Log-Likelihood:               -96133.1
Distribution:                  Normal   AIC:                           192274.
Method:            Maximum Likelihood   BIC:                           192311.
                          

In [27]:
print(initial_variances)

[0.08829659 0.10736314 0.09455088 0.09771171 0.10277817 0.10547712
 0.14273414 0.10086184 0.08762287 0.08757278 0.086098   0.09644151
 0.09157327 0.08829659 0.09513569 0.08752557 0.08529449 0.09166642
 0.08829659 0.09608079 0.08691234 0.09496276 0.08749762 0.10033035
 0.09157327 0.08634667 0.08736202 0.08590534 0.10134793 0.09157327
 0.1042576  0.08716211 0.09678752 0.08642927 0.08602982 0.10559406
 0.091701   0.095213   0.08725853 0.09585033 0.09202757 0.08706929
 0.0862288  0.0861787  0.10180078 0.08732068 0.08655962 0.0969704
 0.08823888 0.09254342 0.09157327 0.09495302 0.0952817  0.09247341
 0.0854708  0.08714036 0.09701967 0.09157327 0.09439921 0.08686367
 0.09539609 0.08829659 0.10348298 0.09635264 0.09999433 0.08829659
 0.09157327 0.08829659 0.09432359 0.09157327 0.08636147 0.10306649
 0.10968227 0.09157327 0.08759012 0.08829659 0.09431941 0.0987023
 0.08595454 0.08716228 0.08569829 0.08514343 0.09433022 0.1018203
 0.09472398 0.0953487  0.0911227  0.08829659 0.09396565 0.1019827

In [28]:
from scipy.optimize import minimize
from arch import arch_model

# 'returns' is your Array of returns
def garch_param_optimizer(params):
    p, q = int(params[0]), int(params[1])
    garch_model = arch_model(returns*100, vol='Garch', p=max(1, p), q=max(1, q))
    garch_result = garch_model.fit(disp='off')
    
    # Calculate the mean squared error between actual and predicted volatility
    # actual_vol = garch_result.conditional_volatility
    predicted_volatility = garch_result.conditional_volatility
    mse = np.mean((labels - predicted_volatility) ** 2)
    return mse

# Initial guesses for p and q
initial_guess = [1, 1]

# Bounds to ensure p and q are integers and within a reasonable range
bounds = [(1, 5), (1, 5)]

# Optimize p and q
result = minimize(garch_param_optimizer, initial_guess, bounds=bounds, method='L-BFGS-B')

# Print the optimized p and q values
optimized_p, optimized_q = int(result.x[0]), int(result.x[1])
print(f"Optimized p: {optimized_p}, Optimized q: {optimized_q}")

# Fit the GARCH model with optimized parameters
optimized_garch_model = arch_model(returns*100, vol='Garch', p=optimized_p, q=optimized_q)
optimized_garch_result = optimized_garch_model.fit()
print(optimized_garch_result.summary())


Optimized p: 1, Optimized q: 1
Iteration:      1,   Func. Count:      6,   Neg. LLF: 66372467.64437784
Iteration:      2,   Func. Count:     16,   Neg. LLF: 1023624.7696857778
Iteration:      3,   Func. Count:     22,   Neg. LLF: 99251.94550711174
Iteration:      4,   Func. Count:     27,   Neg. LLF: 106081.38422514198
Iteration:      5,   Func. Count:     33,   Neg. LLF: 749147482.7910933
Iteration:      6,   Func. Count:     44,   Neg. LLF: 36535758279.5635
Iteration:      7,   Func. Count:     50,   Neg. LLF: 99096.76329198753
Iteration:      8,   Func. Count:     56,   Neg. LLF: 185586.5476043589
Iteration:      9,   Func. Count:     62,   Neg. LLF: 10477958.108570369
Iteration:     10,   Func. Count:     68,   Neg. LLF: 757958.7914420202
Iteration:     11,   Func. Count:     74,   Neg. LLF: 97353.93710753968
Iteration:     12,   Func. Count:     80,   Neg. LLF: 96169.10004243748
Iteration:     13,   Func. Count:     85,   Neg. LLF: 96149.8543788663
Iteration:     14,   Func. Count

In [33]:
# Last observed close price of the underlying asset
S = nn_data['Close'].iloc[-1]

# Annual risk-free rate from FRED (convert to decimal)
r = three_month_yield.iloc[-1] / 100

# Daily volatility estimated from GARCH model
sigma = garch_result.conditional_volatility.iloc[-1]  # Assuming this produces a scalar.

# Number of steps in the binomial tree
steps = 50

options = current_options.copy(deep=True)
options['expiration'] = pd.to_datetime(options['expiration_date'])
options['date'] = pd.to_datetime(options['date'])
options['TTM'] = (options['expiration'] - options['date']).dt.days / 365

# Initialize a new column for the calculated option prices
options['Option Price'] = 0

# Iterate over each row in the DataFrame to calculate option prices
for index, row in options.iterrows():
    K = row['strike']  # Strike price from the DataFrame
    T = row['TTM']  # Time to maturity from the DataFrame
    option_type = 'call' if row['type'] == 'C' else 'put'

    # Calculate option price for each row using the binomial model
    option_price = binomial_option_price(S, K, T, r, sigma, steps, option_type)
    options.loc[index, 'Option Price'] = option_price

# Display the updated DataFrame
options.head(20)

  options.loc[index, 'Option Price'] = option_price


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,expiration_date,type,date,expiration,TTM,Option Price
0,AAPL261218C00050000,2024-05-02 19:41:23+00:00,50.0,126.53,124.5,129.5,3.830002,3.121436,3.0,9,0.598332,True,REGULAR,USD,2026-12-18,call,2024-05-02,2026-12-18,2.630137,10.32764
1,AAPL261218C00060000,2024-04-09 13:30:00+00:00,60.0,114.0,116.0,121.0,0.0,0.0,1.0,18,0.56873,True,REGULAR,USD,2026-12-18,call,2024-04-09,2026-12-18,2.693151,14.97624
2,AAPL261218C00070000,2024-04-30 17:09:00+00:00,70.0,110.4,107.5,112.5,0.0,0.0,20.0,161,0.536626,True,REGULAR,USD,2026-12-18,call,2024-04-30,2026-12-18,2.635616,20.101276
3,AAPL261218C00080000,2024-04-16 17:39:56+00:00,80.0,97.58,99.0,104.0,0.0,0.0,,100,0.503972,True,REGULAR,USD,2026-12-18,call,2024-04-16,2026-12-18,2.673973,25.873386
4,AAPL261218C00085000,2024-04-30 19:57:46+00:00,85.0,94.6,95.05,99.95,0.0,0.0,50.0,53,0.541692,True,REGULAR,USD,2026-12-18,call,2024-04-30,2026-12-18,2.635616,28.977496
5,AAPL261218C00090000,2024-04-30 19:58:08+00:00,90.0,91.0,91.0,94.65,0.0,0.0,150.0,161,0.502599,True,REGULAR,USD,2026-12-18,call,2024-04-30,2026-12-18,2.635616,31.913092
6,AAPL261218C00095000,2024-05-02 19:31:06+00:00,95.0,89.03,87.0,91.5,5.330002,6.367983,40.0,10,0.503514,True,REGULAR,USD,2026-12-18,call,2024-05-02,2026-12-18,2.630137,34.915528
7,AAPL261218C00100000,2024-05-02 19:49:22+00:00,100.0,85.4,83.0,86.85,2.400002,2.891568,5.0,413,0.478185,True,REGULAR,USD,2026-12-18,call,2024-05-02,2026-12-18,2.630137,38.186021
8,AAPL261218C00105000,2024-04-30 19:53:09+00:00,105.0,80.75,79.0,84.0,0.0,0.0,1.0,34,0.48145,True,REGULAR,USD,2026-12-18,call,2024-04-30,2026-12-18,2.635616,41.628525
9,AAPL261218C00110000,2024-04-22 15:39:44+00:00,110.0,71.05,75.85,79.35,0.0,0.0,12.0,114,0.456762,True,REGULAR,USD,2026-12-18,call,2024-04-22,2026-12-18,2.657534,45.020696
