## Project Title
Project description

The notebook is structured as follows:
1. 

## Import Libraries

In [101]:
# For data manipulation
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# For ignoring warnings
import warnings
warnings.simplefilter("ignore")

# For data visualisation
import matplotlib.pyplot as plt

# Helper functions
import sys
sys.path.append("..")

<a id='import'></a>
## Import the Data

In [107]:
# Read the data
options_data = pd.read_csv('./data/options_daily_sp500_2018_2022.csv', index_col=0)

# Change the index type to datetime
options_data.index = pd.to_datetime(options_data.index)

# Import the S&P 500 index daily data
daily_data_SP500 = pd.read_csv('./data/daily_sp500_2018_2022.csv', index_col=0)

# Change the index type to datetime
daily_data_SP500.index = pd.to_datetime(daily_data_SP500.index)

# Merge both dataframes to include the information of underlying data into options data
options_data = options_data.merge(daily_data_SP500[[
    'Open', 'High', 'Low', 'Close']], left_index=True, right_index=True, how='left').dropna()

# Display the data
options_data.head()

Unnamed: 0_level_0,STRIKE,C_IV,P_IV,C_LAST,P_LAST,Open,High,Low,Close
[QUOTE_DATE],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-02,2695.0,0.07549,0.06018,19.98,20.1,2683.72998,2695.889893,2682.360107,2695.810059
2018-01-03,2715.0,0.07249,0.05435,18.13,19.75,2697.850098,2714.370117,2697.77002,2713.060059
2018-01-04,2725.0,0.06976,0.06254,21.6,19.25,2719.310059,2729.290039,2719.070068,2723.98999
2018-01-05,2740.0,0.0746,0.06344,22.0,18.68,2731.330078,2743.449951,2727.919922,2743.149902
2018-01-08,2745.0,0.07366,0.06348,20.49,17.45,2742.669922,2748.51001,2737.600098,2747.709961


<a id='signals'></a>
## Generate Signals
In this section a signal is generated at the start of every month, where the forecasted volatility is compared to the implied volatility of options expiring at the end of them. If the forecasted volatility is greater than the implied volatility, a straddle is bought, while if the forecasted volatility is less than the implied volatility, a straddle is bought. 

### Setup

## Helper Functions

In [142]:
# Find the likelihood for given parameters
def garch_likelihood(params, returns, vol):
    gamma = params[0]
    alpha = params[1]
    beta = params[2]
    log_likelihood = 0
    for i in range(1, len(returns)):
        variance = gamma * vol[0] + alpha * returns[i-1]**2 + beta * vol[i]
        log_likelihood -= np.log(variance) + returns[i]**2 / variance
    return -log_likelihood


initial_params = [0.1, 0.1, 0.1]
params_bounds = [(0,1), (0,1), (0,1)]

# Find max likelihood parameters
def estimate_params(args):
    # Define the GARCH(1,1) model's initial parameters and their bounds
    result = minimize(garch_likelihood, initial_params, args = args, bounds = params_bounds, method = 'SLSQP')
    return result.x

def forecast_volatility(params, returns, vol):
    gamma = params[0]
    alpha = params[1]
    beta = params[2]
    result = gamma * vol[0] + alpha * returns[-1]**2 + beta * vol[-1]
    return result


### Backtesting Functions

In [143]:
# Define dataframes to store individual trade details, the tradebook, and the mark to market value of the current position
trade_details = pd.DataFrame()
trade_book = pd.DataFrame()
mark_to_market = pd.DataFrame()

# Function to calculate the premium of a straddle
def premium(type, data):
    if type == 'CE':
        return data['C_LAST']
    return data['P_LAST']

# Function to setup a straddle
def new_straddle(data, direction):
    straddle = pd.DataFrame()
    straddle['option_type'] = ['CE', 'PE']
    straddle['strike'] = data.STRIKE
    straddle['position'] = 1 if direction == 'long' else -1
    straddle['premium'] = straddle.apply(lambda x: premium(x['option_type'], data), axis=1)
    return straddle

def m_to_m(mark_to_market, strategy, date):
    strategy['Date'] = date
    mark_to_market = pd.concat([mark_to_market, strategy])
    return mark_to_market


In [179]:
straddle(options_data.iloc[252*4], 'short')

Unnamed: 0,option_type,strike,position,premium
0,CE,4390.0,-1,85.52
1,PE,4390.0,-1,63.3


<a id='signals'></a>
## Generate Signals
In this section a signal is generated at the start of every month, where the forecasted volatility is compared to the implied volatility of options expiring at the end of them. If the forecasted volatility is greater than the implied volatility, a straddle is bought, while if the forecasted volatility is less than the implied volatility, a straddle is bought. 

In [165]:
# Run the strategy for the last year of data and generate a signal every 5 days
last_year = options_data[252*4:]
last_signal = 5
# Create a column to store signals
options_data['signal'] = 0
# Dictionary used to specify which aggregate function to apply to columns when changing from daily data to monthly data
columns_functions ={
    'Open': 'first',
    'Close': 'last',
    'High': 'max',
    'Low': 'min'
}

for date in last_year.index:
    if last_signal >= 5:
        # We will be using data from the last 4 years to forecast volatility
        data = options_data[:date][-252*4:]
        data.index = pd.to_datetime(data.index)
        # Convert into monthly data to calculate monthly volatility
        monthly_data = data.resample('M').agg(columns_functions).dropna()
        # Estimate the volatility using the three different estimators
        
        # Close to Close volatility
        monthly_returns = np.log(monthly_data['Close']/monthly_data['Close'].shift(1)).dropna()
        monthly_data['c_to_c'] = monthly_returns.std()
        
        # Parkinson
        high_low = np.log(monthly_data['High']/monthly_data['Low'])
        monthly_data['parkinson'] = np.sqrt((1/(4*np.log(2)))*(high_low**2).sum())
        
        # Garman-Klass
        monthly_data['garman_klass'] = np.sqrt((0.5)*((high_low**2).sum()) - (((2 * np.log(2))-1)*(monthly_returns**2).sum()))

        # Average Volatility
        avg_vol = (monthly_data['garman_klass'] + monthly_data['parkinson'] + monthly_data['c_to_c']) / 3
        # Estimate GARCH(1,1) model parametrs using max likelihood estimation and forecast volatility
        parameters = estimate_params((monthly_returns, avg_vol))
        print(parameters)
        forecasted_volatility = forecast_volatility(parameters, monthly_returns, avg_vol)
        # Compare with implied volatility
        if forecasted_volatility*100 > options_data.loc[date].C_IV and forecasted_volatility*100 > options_data.loc[date].P_IV:
            # Set signal for the following 5 trading sessions or days????
            options_data.loc[date:date+pd.DateOffset(days = 4), 'signal'] = 1
            last_signal = 0
        elif forecasted_volatility*100 < options_data.loc[date].C_IV and forecasted_volatility*100 < options_data.loc[date].P_IV:
            # Set signal for the following 5 trading sessions or days????
            options_data.loc[date:date+pd.DateOffset(days = 4), 'signal'] = -1
            last_signal = 0
    else:
        last_signal += 1
options_data[252*4:].signal.sum()

[0.00350117 0.19152429 0.00349314]
[0.00354855 0.1983202  0.00354873]
[0.00351103 0.18268399 0.00351491]
[0.00351709 0.2049588  0.00351693]
[0.00350995 0.21578499 0.00351018]
[0.00341312 0.18969467 0.00341248]
[0.0035642  0.18708944 0.00356309]
[0.003786   0.18821412 0.00379404]
[0.00947259 0.00841217 0.0094726 ]
[0.00367933 0.16812101 0.00368044]
[0.003764  0.1781879 0.0037656]
[0.00372618 0.17919421 0.00371941]
[0.01257687 0.01256904 0.01257687]
[0.00385951 0.18275143 0.00385274]
[0.00384609 0.17689521 0.00384305]
[0.00889588 0.00789839 0.00889583]
[0.0037009  0.18004339 0.0037043 ]
[0.01264796 0.01262263 0.01264796]


36

In [162]:
options_data.loc[pd.to_datetime("2022-09-01"):pd.to_datetime("2022-09-01")+pd.DateOffset(days = 4), 'signal']

 [QUOTE_DATE]
2022-09-01    1
2022-09-02    1
Name: signal, dtype: int64

## Backtest


In [176]:
current_position = 0
exit = False
backtest_data = options_data["signal"].copy()
backtest_data.pnl = 0

for date, signal in backtest_data.items():
    if current_position == 0:
        if signal == 1:
            straddle = new_straddle(options_data[date], 'long')
            entry_price = round((straddle['premium']*straddle['position']).sum(), 2)
            

<a id='analytics'></a>
## Trade Level Analytics