In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import brentq
import matplotlib.pyplot as plt
from datetime import time


In [903]:
risk_free_rate=0.05
timeframe= '30min'
trading_sessions= 14

In [904]:
#Math Logic

# Black-Scholes formula
def black_scholes_price(S, K, T, r, sigma, option_type="C"):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == "C":
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    elif option_type == "P":
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

# Implied Volatility using Newton Raphson
def calculate_iv(option_price, S, K, T, r, option_type="C"):
    def bs_error(sigma):
        return black_scholes_price(S, K, T, r, sigma, option_type) - option_price

    try:
        return brentq(bs_error, 1e-6, 3)  # IV bounds: 0.000001 to 3
    except ValueError:
        return np.nan  #

#Find z-score for mean reversion
def calculate_zscore(data, lookback):
    mean = data.rolling(window=lookback).mean()
    std = data.rolling(window=lookback).std()
    zscore = (data - mean) / std
    return zscore

#Calculates sharpe for a pnl series
def calculate_sharpe_ratio(returns, risk_free_rate=0):
    excess_returns = returns - risk_free_rate
    sharpe_ratio= excess_returns.mean() / excess_returns.std()
    return sharpe_ratio

#Max drawdown of a pnl series
def calculate_drawdown(returns):
  cumulative_returns = (1 + returns).cumprod()
  peak = cumulative_returns.cummax()
  drawdown = (cumulative_returns - peak) / peak
  return drawdown

In [905]:
# Data operations

#Load Data
def load_data(file_path):
    data = pd.read_csv(file_path, parse_dates=['start_ts'], usecols=['symbol','start_ts','open','high','low','close','volume','vwap'])
    data = data.dropna()
    return data

#Convert Time Frames, by default every row is a 1min tick
def convert_eqt_data(eqt,new_timeframe):  

    #Strip off extra trading hours
    eqt = eqt.drop(eqt[eqt['start_ts'].dt.time< time(9,30,0)].index)
    eqt = eqt.drop(eqt[eqt['start_ts'].dt.time>= time(16,0,0)].index)

    eqt = eqt.set_index('start_ts')

    eqt_data = eqt.resample(new_timeframe).agg(    
        close= ('close', 'last'),
        Weighed_Price_Volume= ('vwap', lambda x:(x* eqt.loc[x.index, 'volume']).sum()),
        volume= ('volume','sum')
    )

    eqt_data['vwap'] = eqt_data['Weighed_Price_Volume']/eqt_data['volume']
    eqt_data=eqt_data.drop(columns=['Weighed_Price_Volume'])

    eqt_data=eqt_data.reset_index()
    eqt_data = eqt_data.dropna()
    return eqt_data

'''Computationally cumbersome- Assumptions
        1. Selected the strike just greater than spot may not be closest but should be fairly liquid
        2. TTE != 0DTE
        3. Trading only on option closes
'''
def convert_option_data(eqt,option,new_timeframe):  

    option=option.sort_values(by=['start_ts','expiry_date','strike_price'])
    option=option.reset_index(drop=True)

    results=[]
    
    for _,rows in eqt.iterrows():
        
        valid_expires = option[option['expiry_date'] > rows['start_ts']]
        closest_expires= valid_expires['expiry_date'].unique()[:2]

        otm_strikes= option[option['strike_price'] >= rows['close']]
        nearest_atm_strike= otm_strikes.iloc[0]['strike_price']

        filtered_options=option[(option['strike_price']==nearest_atm_strike) & (option['expiry_date'].isin(closest_expires))]
        # print(filtered_options.shape)

        option_data = (filtered_options.set_index('start_ts').groupby(['strike_price', 'expiry_date']).resample(new_timeframe).agg(
                close=('close', 'last'),
                volume=('volume', 'sum'),
            ).reset_index()
        )
        results.append(option_data)
    
    results=pd.concat(results, ignore_index=True).reset_index()
    results = results.dropna()
    
    return results

In [906]:
def generate_signals(data,lower_threshold=-1, upper_threshold=1, lookback=20):

    spread = data['asset1'] - data['asset2']
    positions = pd.Series(index=spread.index, data=0.0)
    pnl = pd.Series(index=spread.index, data=0.0)
    zscore=calculate_zscore(spread,lookback)
    
    in_position = False
    for i in range(lookback, len(spread)):
        if not in_position and zscore[i] < lower_threshold:
            positions[i] = 1 # Long trade
            in_position = True
        elif in_position and zscore[i] > upper_threshold:
            positions[i] = 0 # Close trade
            in_position = False
        elif not in_position and zscore[i] > upper_threshold:
            positions[i] = -1  #Short trade
            in_position = True
        elif in_position and zscore[i] < lower_threshold:
            positions[i] = 0 #Close trade
            in_position = False
        else:
            positions[i] = positions[i-1] #Hold
            
        if i > 0:
            pnl[i] = positions[i] * (spread[i] - spread[i - 1]) #Intratick pnl
            
    cumulative_pnl = pnl.cumsum()
    sharpe = calculate_sharpe_ratio(pnl[lookback:])
    drawdown = calculate_drawdown(pnl[lookback:])
    max_drawdown = drawdown.min()  

    results = pd.DataFrame({
        'zscore': zscore,
        'positions': positions,
        'pnl': pnl,
        'cumulative_pnl': cumulative_pnl
    })
    
    print(f"Sharpe Ratio: {sharpe}")
    print(f"Maximum Drawdown: {max_drawdown}")

    return results

No need to sort and all, we will use only ATM hence we will fetch for values in the option chain and hopefully around ATM we have good volumes.
Also lets straight work on IV spread implementation
Can we keep nearest strikes and scale orders accordingly? for more efficiency: right now we are just choosing the nearest strikes
We need to delta hedge again and again as everyday ATM strike selection will generate some delta

1min many strike, many expiry
filtered = atm, expiry (2) 1min 



In [907]:
'''Equity trading hours from 4am to 8pm'''
eqt_path = 'CBOE Data/testeqt.csv'
eqt = load_data(eqt_path)

'''Filter the Data
    1. Convert to 30min timeframe
    2. Strip off extra trading hours
'''
eqt_data = convert_eqt_data(eqt,timeframe)
print(eqt_data.head(50))

              start_ts     close    volume        vwap
0  2023-01-03 09:30:00  126.9336  13683695  129.271024
1  2023-01-03 10:00:00  125.4600  10686686  126.092497
2  2023-01-03 10:30:00  125.5950   9023652  125.404657
3  2023-01-03 11:00:00  125.3400   8429008  125.209053
4  2023-01-03 11:30:00  124.7488   6110185  124.879579
5  2023-01-03 12:00:00  124.6554   4796729  124.763794
6  2023-01-03 12:30:00  124.5900   4015050  124.626831
7  2023-01-03 13:00:00  124.6500   4074701  124.427184
8  2023-01-03 13:30:00  124.8150   4116170  124.566492
9  2023-01-03 14:00:00  124.5726   3589739  124.678465
10 2023-01-03 14:30:00  124.6150   5941235  124.697510
11 2023-01-03 15:00:00  124.6200   5301049  124.698999
12 2023-01-03 15:30:00  125.0200  10287122  125.052305


In [908]:
'''Options trading hours from 9.30am to 4pm = 14 Trading sessions'''
option_path = 'CBOE Data/testopt.csv'
option = load_data(option_path)

#Add columns 
option_characteristics=pd.DataFrame((x.split('_') for x in option['symbol']), columns=['ticker','expiry_date','strike_price','option_type'])
option_characteristics['expiry_date']=pd.to_datetime(option_characteristics['expiry_date'])
option_characteristics['strike_price']= option_characteristics['strike_price'].astype(float)
option_data = pd.concat([option,option_characteristics], axis=1)

#Strip off all put options
option_data=option_data.drop(option_data[option_data['option_type']=='P'].index)

#Convert to 30min Timeframe
option_data = convert_option_data(eqt_data, option_data,timeframe)
print(option_data.to_string())

# option_data['tte']=pd.to_timedelta(option_data['expiry_date']-option_data['start_ts'])


     index  strike_price expiry_date            start_ts  close  volume
0        0         127.0  2023-01-06 2023-01-03 09:30:00   2.57     618
1        1         127.0  2023-01-06 2023-01-03 10:00:00   1.95    5184
2        2         127.0  2023-01-06 2023-01-03 10:30:00   1.87    2553
3        3         127.0  2023-01-06 2023-01-03 11:00:00   1.71    2605
4        4         127.0  2023-01-06 2023-01-03 11:30:00   1.51    1224
5        5         127.0  2023-01-06 2023-01-03 12:00:00   1.46     750
6        6         127.0  2023-01-06 2023-01-03 12:30:00   1.42     761
7        7         127.0  2023-01-06 2023-01-03 13:00:00   1.50     864
8        8         127.0  2023-01-06 2023-01-03 13:30:00   1.50     647
9        9         127.0  2023-01-06 2023-01-03 14:00:00   1.43     298
10      10         127.0  2023-01-06 2023-01-03 14:30:00   1.44     829
11      11         127.0  2023-01-06 2023-01-03 15:00:00   1.41     254
12      12         127.0  2023-01-06 2023-01-03 15:30:00   1.52 

In [None]:


option_data_iv = calculate_iv(option_data['close'],security['close'],option_data['strike'], option_data['time_to_expiry'], risk_free_rate ,option_data['option_type'])
print(option_data_iv.head())
