In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import brentq
import matplotlib.pyplot as plt
from datetime import time


In [1004]:
#Global Variables

risk_free_rate=0.05
timeframe= '30min'
trading_sessions= 14

In [1001]:
# Data operations

#Load Data
def load_data(file_path):
    data = pd.read_csv(file_path, parse_dates=['start_ts'], usecols=['symbol','start_ts','open','high','low','close','volume','vwap'])
    data = data.dropna()
    return data

def strip_extra_trading_hours(data):
    
    data=data.reset_index()

    data = data.drop(data[data['start_ts'].dt.time< time(9,30,0)].index)
    data = data.drop(data[data['start_ts'].dt.time>= time(16,0,0)].index)

    return data

#Convert Time Frames, by default every row is a 1min tick
def convert_eqt_data(eqt,new_timeframe):  

    eqt = eqt.set_index('start_ts')

    eqt_data = eqt.resample(new_timeframe).agg(    
        close= ('close', 'last'),
        Weighed_Price_Volume= ('vwap', lambda x:(x* eqt.loc[x.index, 'volume']).sum()),
        volume= ('volume','sum')
    )

    eqt_data['vwap'] = eqt_data['Weighed_Price_Volume']/eqt_data['volume']
    eqt_data=eqt_data.drop(columns=['Weighed_Price_Volume'])

    #Strip off extra trading hours
    eqt_data=strip_extra_trading_hours(eqt_data)

    eqt_data= eqt_data.reset_index()
    eqt_data = eqt_data.dropna()
    return eqt_data

'''Computationally cumbersome- Assumptions
        1. Selected the strike just greater than spot may not be closest but should be fairly liquid
        2. TTE != 0DTE
        3. Trading only on option closes
'''
def convert_option_data(option,new_timeframe):  

    #Add columns 
    option_characteristics=pd.DataFrame((x.split('_') for x in option['symbol']), columns=['ticker','expiry_date','strike_price','option_type'])
    option_characteristics['expiry_date']=pd.to_datetime(option_characteristics['expiry_date'])
    option_characteristics['strike_price']= option_characteristics['strike_price'].astype(float)
    option_data = pd.concat([option,option_characteristics['option_type']], axis=1)

    # Data filtering
    option_data=option_data.drop(option_data[option_data['option_type']=='P'].index)

    #Create 2 df to store data
    options_close= option_data.pivot(index='start_ts', columns='symbol', values='close')
    options_volume= option_data.pivot(index='start_ts', columns='symbol', values='volume')
    
    options_close= options_close.resample(new_timeframe).last()
    options_volume= options_volume.resample(new_timeframe).sum()     

    options_close= strip_extra_trading_hours(options_close)

    return options_close

    

In [963]:
#Math Logic

# Black-Scholes formula
def black_scholes_price(S, K, T, r, sigma, option_type="C"):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == "C":
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    elif option_type == "P":
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

# Implied Volatility using Newton Raphson
def calculate_iv(option_price, S, K, T, r, option_type="C"):
    def bs_error(sigma):
        return black_scholes_price(S, K, T, r, sigma, option_type) - option_price

    try:
        return brentq(bs_error, 1e-6, 3)  # IV bounds: 0.000001 to 3
    except ValueError:
        return np.nan  #

#Find z-score for mean reversion
def calculate_zscore(data, lookback):
    mean = data.rolling(window=lookback).mean()
    std = data.rolling(window=lookback).std()
    zscore = (data - mean) / std
    return zscore

#Calculates sharpe for a pnl series
def calculate_sharpe_ratio(returns, risk_free_rate=0):
    excess_returns = returns - risk_free_rate
    sharpe_ratio= excess_returns.mean() / excess_returns.std()
    return sharpe_ratio

#Max drawdown of a pnl series
def calculate_drawdown(returns):
  cumulative_returns = (1 + returns).cumprod()
  peak = cumulative_returns.cummax()
  drawdown = (cumulative_returns - peak) / peak
  return drawdown

In [964]:
def generate_signals(data,lower_threshold=-1, upper_threshold=1, lookback=20):

    spread = data['asset1'] - data['asset2']
    positions = pd.Series(index=spread.index, data=0.0)
    pnl = pd.Series(index=spread.index, data=0.0)
    zscore=calculate_zscore(spread,lookback)
    
    in_position = False
    for i in range(lookback, len(spread)):
        if not in_position and zscore[i] < lower_threshold:
            positions[i] = 1 # Long trade
            in_position = True
        elif in_position and zscore[i] > upper_threshold:
            positions[i] = 0 # Close trade
            in_position = False
        elif not in_position and zscore[i] > upper_threshold:
            positions[i] = -1  #Short trade
            in_position = True
        elif in_position and zscore[i] < lower_threshold:
            positions[i] = 0 #Close trade
            in_position = False
        else:
            positions[i] = positions[i-1] #Hold
            
        if i > 0:
            pnl[i] = positions[i] * (spread[i] - spread[i - 1]) #Intratick pnl
            
    cumulative_pnl = pnl.cumsum()
    sharpe = calculate_sharpe_ratio(pnl[lookback:])
    drawdown = calculate_drawdown(pnl[lookback:])
    max_drawdown = drawdown.min()  

    results = pd.DataFrame({
        'zscore': zscore,
        'positions': positions,
        'pnl': pnl,
        'cumulative_pnl': cumulative_pnl
    })
    
    print(f"Sharpe Ratio: {sharpe}")
    print(f"Maximum Drawdown: {max_drawdown}")

    return results

In [1002]:
'''Equity trading hours from 4am to 8pm'''
eqt_path = 'CBOE Data/testeqt.csv'
eqt = load_data(eqt_path)

'''Filter the Data
    1. Convert to 30min timeframe
    2. Strip off extra trading hours
'''
eqt_data = convert_eqt_data(eqt,timeframe)
print(eqt_data.to_string())

    index            start_ts     close    volume        vwap
0      11 2023-01-03 09:30:00  126.9336  13683695  129.271024
1      12 2023-01-03 10:00:00  125.4600  10686686  126.092497
2      13 2023-01-03 10:30:00  125.5950   9023652  125.404657
3      14 2023-01-03 11:00:00  125.3400   8429008  125.209053
4      15 2023-01-03 11:30:00  124.7488   6110185  124.879579
5      16 2023-01-03 12:00:00  124.6554   4796729  124.763794
6      17 2023-01-03 12:30:00  124.5900   4015050  124.626831
7      18 2023-01-03 13:00:00  124.6500   4074701  124.427184
8      19 2023-01-03 13:30:00  124.8150   4116170  124.566492
9      20 2023-01-03 14:00:00  124.5726   3589739  124.678465
10     21 2023-01-03 14:30:00  124.6150   5941235  124.697510
11     22 2023-01-03 15:00:00  124.6200   5301049  124.698999
12     23 2023-01-03 15:30:00  125.0200  10287122  125.052305
13     59 2023-01-04 09:30:00  126.6788  12230060  126.671923
14     60 2023-01-04 10:00:00  125.9912   8927742  125.957751
15     6

In [1003]:
'''Options trading hours from 9.30am to 4pm = 14 Trading sessions'''
option_path = 'CBOE Data/testopt.csv'
option = load_data(option_path)

#Convert to 30min Timeframe
option_data = convert_option_data(option,timeframe)
print(option_data.to_string())



symbol            start_ts  AAPL_20230106_100_C  AAPL_20230106_105_C  AAPL_20230106_107_C  AAPL_20230106_108_C  AAPL_20230106_109_C  AAPL_20230106_110_C  AAPL_20230106_111_C  AAPL_20230106_112_C  AAPL_20230106_113_C  AAPL_20230106_114_C  AAPL_20230106_115_C  AAPL_20230106_116_C  AAPL_20230106_117_C  AAPL_20230106_118_C  AAPL_20230106_119_C  AAPL_20230106_120_C  AAPL_20230106_121_C  AAPL_20230106_122_C  AAPL_20230106_123_C  AAPL_20230106_124_C  AAPL_20230106_125_C  AAPL_20230106_126_C  AAPL_20230106_127_C  AAPL_20230106_128_C  AAPL_20230106_129_C  AAPL_20230106_130_C  AAPL_20230106_131_C  AAPL_20230106_132_C  AAPL_20230106_133_C  AAPL_20230106_134_C  AAPL_20230106_135_C  AAPL_20230106_136_C  AAPL_20230106_137_C  AAPL_20230106_138_C  AAPL_20230106_139_C  AAPL_20230106_140_C  AAPL_20230106_141_C  AAPL_20230106_142_C  AAPL_20230106_143_C  AAPL_20230106_144_C  AAPL_20230106_145_C  AAPL_20230106_146_C  AAPL_20230106_147_C  AAPL_20230106_148_C  AAPL_20230106_149_C  AAPL_20230106_150_C  AAPL_2

In [None]:


option_data_iv = calculate_iv(option_data['close'],eqt_data['close'],option_data['strike'], option_data['time_to_expiry'], risk_free_rate ,option_data['option_type'])
print(option_data_iv.head())
