## Imports

In [15]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import random
import seaborn as sns
from IPython.display import display
import matplotlib.pyplot as plt
# from matplotlib import animation
# from JSAnimation.IPython_display import display_animation
# import gym
import scipy.stats as stats
from scipy.stats import norm
from collections import namedtuple
import statistics
import time
import os
from collections import deque
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

import yfinance as yf
import datetime as dt

import pickle

## Classes
- To do feature engineering
- To simulate options data

In [28]:
def calculate_rsi(series, period=2):
    delta = series.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()
    RS = gain / loss.replace(0, 1e-8)  # Prevent division by zero
    RSI = 100 - (100 / (1 + RS))
    return RSI

def MA(series, n):
    return series.rolling(window=n).mean()

def EMA(series, n):
    return series.ewm(span=n, min_periods=n).mean()

def ROC(series, n):
    diff = series.diff(n)
    shifted = series.shift(n)
    roc = diff / shifted
    return roc

def MOM(series, n):
    return series.diff(n)

def STOK(close, low, high, n):
    lowest_low = low.rolling(window=n).min()
    highest_high = high.rolling(window=n).max()
    stok = ((close - lowest_low) / (highest_high - lowest_low)) * 100
    return stok

def STOD(close, low, high, n):
    stok = STOK(close, low, high, n)
    stod = stok.rolling(window=3).mean()  # Using a window of 3 for smoothing
    return stod

def fetch_vix(start_date, end_date):
    vix = yf.download('^VIX', start=start_date, end=end_date)
    return vix['Adj Close']

def fetch_treasury_rates(start_date, end_date):
    treasury_3m = pdr.get_data_fred('DGS3MO', start=start_date, end=end_date)
    return treasury_3m['DGS3MO']



In [29]:
class OptionDataGenerator:
    def __init__(self, stock_data, risk_free_data, num_options=100, T=30):
        """
        Initialize the option data generator.
        """
        self.stock_data = stock_data
        self.risk_free_data = risk_free_data
        self.num_options = num_options  # this might still be useful if you want to keep track of how many options are considered
        self.T = T

        # Calculate daily returns and 30-day rolling volatility
        returns = self.stock_data['Adj Close'].pct_change()
        self.stock_data['Volatility'] = returns.rolling(window=30, min_periods=1).std()
        self.stock_data['Volatility'] = self.stock_data['Volatility'].fillna(method='ffill').fillna(method='bfill')
        self.stock_data['Volatility'] = self.stock_data['Volatility'].clip(lower=1e-8)

    def generate_options(self, fixed_time_to_expiration=None, fixed_strike=None):
        """
        Generate a DataFrame of call options data for the same dates as stock_data.
        """
        options_data = []
        time_to_expiration = fixed_time_to_expiration or self.T  # Use fixed or default expiration

        for date in self.stock_data.index:
            stock_price = self.stock_data.loc[date, 'Adj Close']
            risk_free_rate = self.risk_free_data.loc[date]
            vol = self.stock_data.loc[date, 'Volatility']
            T_in_days = time_to_expiration

            strike = fixed_strike if fixed_strike is not None else stock_price * (1 + np.random.uniform(-0.05, 0.05))
            option_price, delta, gamma, theta, vega, rho = self._black_scholes_greeks(
                stock_price, strike, T_in_days, risk_free_rate, vol)

            # Scale each Greek by the number of options
            options_data.append([
                date, strike, vol, time_to_expiration, option_price,
                delta * self.num_options,  # scaled delta
                gamma * self.num_options,  # scaled gamma
                theta * self.num_options,  # scaled theta
                vega * self.num_options,   # scaled vega
                rho * self.num_options,    # scaled rho
                risk_free_rate
            ])

        # Create DataFrame with the same date index as stock_data
        options_df = pd.DataFrame(
            options_data,
            columns=['Date', 'strike', 'impliedVolatility', 'TimeToExpiration',
                     'OptionPrice', 'delta', 'gamma', 'theta', 'vega', 'rho', 'RiskFreeRate']
        )
        options_df['Date'] = pd.to_datetime(options_df['Date'])
        options_df.set_index('Date', inplace=True)

        return options_df

    def _black_scholes_greeks(self, S, K, T, r, sigma):
        """
        Calculate the Black-Scholes call option price and Greeks.
        """
        if T <= 0 or sigma <= 0 or S <= 0 or K <= 0:
            return 0, 0, 0, 0, 0, 0

        d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
        d2 = d1 - sigma * np.sqrt(T)

        call_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
        delta = norm.cdf(d1)
        gamma = norm.pdf(d1) / (S * sigma * np.sqrt(T))
        theta = (-S * norm.pdf(d1) * sigma / (2 * np.sqrt(T)) 
                 - r * K * np.exp(-r * T) * norm.cdf(d2))
        vega = S * norm.pdf(d1) * np.sqrt(T) / 100  # Per 1% change in volatility
        rho = K * T * np.exp(-r * T) * norm.cdf(d2) / 100  # Per 1% change in rate

        return call_price, delta, gamma, theta, vega, rho

## Generate data

In [30]:
# Fetch AAPL stock data for training
ticker = 'AAPL'
aapl_stock_train = yf.download(ticker, start="2010-01-01", end="2022-12-31")
# Exclude the first 30 days (for a 30-day window)
aapl_stock_train = aapl_stock_train.iloc[30:].copy()

# Fetch AAPL stock data for testing
aapl_stock_test = yf.download(ticker, start="2023-01-01", end="2024-12-31")
aapl_stock_test = aapl_stock_test.iloc[30:].copy()


aapl_stock_train.index = pd.to_datetime(aapl_stock_train.index)
aapl_stock_test.index = pd.to_datetime(aapl_stock_test.index)

df_stock_train = pd.DataFrame(aapl_stock_train)
df_stock_test = pd.DataFrame(aapl_stock_test)

# print("Testing stock train data:")
# print(df_stock_train.head())

# print("Testing stock test data:")
# print(df_stock_test.head())

[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed


In [40]:
# Use the entire stock data
data_train = df_stock_train.copy()
data_test = df_stock_test.copy()

# Apply indicators to training data
features_train = pd.DataFrame(index=data_train.index)
features_train["LogReturn"] = np.log(data_train['Adj Close'] / data_train['Adj Close'].shift(1))
features_train["RSI2"] = calculate_rsi(data_train["Adj Close"], period=2)
features_train["MA10"] = MA(data_train['Adj Close'], 10)
features_train["MA30"] = MA(data_train['Adj Close'], 30)
features_train["MA200"] = MA(data_train['Adj Close'], 200)
features_train["EMA10"] = EMA(data_train['Adj Close'], 10)
features_train["EMA30"] = EMA(data_train['Adj Close'], 30)
features_train["EMA200"] = EMA(data_train['Adj Close'], 200)
features_train["ROC10"] = ROC(data_train['Adj Close'], 10)
features_train["ROC30"] = ROC(data_train['Adj Close'], 30)
features_train["MOM10"] = MOM(data_train['Adj Close'], 10)
features_train["MOM30"] = MOM(data_train['Adj Close'], 30)
features_train["%K10"] = STOK(data_train['Adj Close'], data_train['Low'], data_train['High'], 10)
features_train["%D10"] = STOD(data_train['Adj Close'], data_train['Low'], data_train['High'], 10)
features_train["%K30"] = STOK(data_train['Adj Close'], data_train['Low'], data_train['High'], 30)
features_train["%D30"] = STOD(data_train['Adj Close'], data_train['Low'], data_train['High'], 30)
# Fetch VIX and Risk-Free Rates
vix_train = fetch_vix("2010-01-01", "2022-12-31")
risk_free_train = fetch_treasury_rates("2010-01-01", "2022-12-31")

features_train["VIX"] = vix_train.reindex(features_train.index).ffill()
features_train["RiskFreeRate"] = risk_free_train.reindex(features_train.index).ffill() / 100 / 365
features_train["Volume"] = data_train["Volume"]

# Apply indicators to testing data
features_test = pd.DataFrame(index=data_test.index)
features_test["LogReturn"] = np.log(data_test['Adj Close'] / data_test['Adj Close'].shift(1))
features_test["RSI2"] = calculate_rsi(data_test["Adj Close"], period=2)
features_test["MA10"] = MA(data_test['Adj Close'], 10)
features_test["MA30"] = MA(data_test['Adj Close'], 30)
features_test["MA200"] = MA(data_test['Adj Close'], 200)
features_test["EMA10"] = EMA(data_test['Adj Close'], 10)
features_test["EMA30"] = EMA(data_test['Adj Close'], 30)
features_test["EMA200"] = EMA(data_test['Adj Close'], 200)
features_test["ROC10"] = ROC(data_test['Adj Close'], 10)
features_test["ROC30"] = ROC(data_test['Adj Close'], 30)
features_test["MOM10"] = MOM(data_test['Adj Close'], 10)
features_test["MOM30"] = MOM(data_test['Adj Close'], 30)
features_test["%K10"] = STOK(data_test['Adj Close'], data_test['Low'], data_test['High'], 10)
features_test["%D10"] = STOD(data_test['Adj Close'], data_test['Low'], data_test['High'], 10)
features_test["%K30"] = STOK(data_test['Adj Close'], data_test['Low'], data_test['High'], 30)
features_test["%D30"] = STOD(data_test['Adj Close'], data_test['Low'], data_test['High'], 30)
# Fetch VIX and Risk-Free Rates
vix_test = fetch_vix("2023-01-01", "2024-12-31")
risk_free_test = fetch_treasury_rates("2023-01-01", "2024-12-31")

features_test["VIX"] = vix_test.reindex(features_test.index).ffill()
features_test["RiskFreeRate"] = risk_free_test.reindex(features_test.index).ffill() / 100 / 365
features_test["Volume"] = data_test["Volume"]

columns_not_in_features = df_stock_train.columns.difference(features_train.columns)

# Combine features with the original stock data, then fill any remaining NaN values
features_train = pd.concat([features_train, df_stock_train[columns_not_in_features]], axis=1).ffill().bfill()
features_test = pd.concat([features_test, df_stock_test[columns_not_in_features]], axis=1).ffill().bfill()

# Final check to confirm alignment
# print("Aligned Features Train Head:\n", features_train.head())
# print("Aligned Features Test Head:\n", features_test.head())


[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed


In [41]:
# Ensure 'RiskFreeRate' is aligned to the index of df_stock_train and df_stock_test
risk_free_train_aligned = features_train['RiskFreeRate'].reindex(df_stock_train.index).ffill().bfill() 
risk_free_test_aligned = features_test['RiskFreeRate'].reindex(df_stock_test.index).ffill().bfill() 

# Assign the aligned 'RiskFreeRate' to the stock dataframes
df_stock_train['RiskFreeRate'] = risk_free_train_aligned
df_stock_test['RiskFreeRate'] = risk_free_test_aligned

# Confirm the addition
# print("df_stock_train with RiskFreeRate:\n", df_stock_train.head())
# print("df_stock_test with RiskFreeRate:\n", df_stock_test.head())

In [42]:
# Initialize the OptionDataGenerator with stock data and risk-free rates
option_generator_train = OptionDataGenerator(
    stock_data=df_stock_train,
    risk_free_data=features_train["RiskFreeRate"],
    num_options=100,  # changed
    T=30  # Fixed time to expiration
)

option_generator_test = OptionDataGenerator(
    stock_data=df_stock_test,
    risk_free_data=features_test["RiskFreeRate"],
    num_options=100,
    T=30
)

# Generate options
call_options_train = option_generator_train.generate_options(fixed_time_to_expiration=30)
call_options_test = option_generator_test.generate_options(fixed_time_to_expiration=30)

# Since we have one option per date, we can merge directly
call_options_train.reset_index(inplace=True)
call_options_test.reset_index(inplace=True)

# Set Date as index for merging
call_options_train.set_index('Date', inplace=True)
call_options_test.set_index('Date', inplace=True)

# Select only the Greeks
greeks_train = call_options_train[['delta', 'gamma', 'theta', 'vega', 'rho']]
greeks_test = call_options_test[['delta', 'gamma', 'theta', 'vega', 'rho']]

# Combine features with the Greeks
features_train = pd.concat([features_train, greeks_train], axis=1)
features_test = pd.concat([features_test, greeks_test], axis=1)

# Select only the necessary columns from options data
options_train = call_options_train[['impliedVolatility', 'OptionPrice']]
options_test = call_options_test[['impliedVolatility', 'OptionPrice']]

# Rename columns to fit into the features DataFrame
options_train.rename(columns={'impliedVolatility': 'vol', 'OptionPrice': 'OptionPrice'}, inplace=True)
options_test.rename(columns={'impliedVolatility': 'vol', 'OptionPrice': 'OptionPrice'}, inplace=True)

# Combine option features with the stock features
features_train = pd.concat([features_train, options_train], axis=1)
features_test = pd.concat([features_test, options_test], axis=1)

# Fill any remaining NaN values (if any)
features_train.fillna(method='ffill', inplace=True)
features_test.fillna(method='ffill', inplace=True)

  self.stock_data['Volatility'] = self.stock_data['Volatility'].fillna(method='ffill').fillna(method='bfill')
  self.stock_data['Volatility'] = self.stock_data['Volatility'].fillna(method='ffill').fillna(method='bfill')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  options_train.rename(columns={'impliedVolatility': 'vol', 'OptionPrice': 'OptionPrice'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  options_test.rename(columns={'impliedVolatility': 'vol', 'OptionPrice': 'OptionPrice'}, inplace=True)
  features_train.fillna(method='ffill', inplace=True)
  features_test.fillna(method='ffill', inplace=True)


In [43]:
print(features_train.columns)

Index(['LogReturn', 'RSI2', 'MA10', 'MA30', 'MA200', 'EMA10', 'EMA30',
       'EMA200', 'ROC10', 'ROC30', 'MOM10', 'MOM30', '%K10', '%D10', '%K30',
       '%D30', 'VIX', 'RiskFreeRate', 'Volume', 'Adj Close', 'Close', 'High',
       'Low', 'Open', 'delta', 'gamma', 'theta', 'vega', 'rho', 'vol',
       'OptionPrice'],
      dtype='object')


In [44]:
print(features_train.shape)
print(features_test.shape)
print(df_stock_train.shape)
print(df_stock_test.shape)
print(call_options_train.shape)
print(call_options_test.shape)

(3242, 31)
(434, 31)
(3242, 8)
(434, 8)
(3242, 10)
(434, 10)


## Notebook Summary
We now have the following dataframes to be used for training / testing:
- features_train (which includes the use of greeks_train)
- features_test (which includes the use of greeks_test)
- df_stock_train
- df_stock_test
- call_options_train
- call_options_test
