# **ALPHA VOLATILITY GENERATION - SYSTEMATIC TRADING STRATEGIES PROJECT**

In [1]:
import numpy as np
from scipy.stats import norm
import pandas as pd

In [3]:
from abc import ABC, abstractmethod

In [4]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [5]:
df_train = pd.read_pickle("df_train.pkl")

In [6]:
df_validation = pd.read_pickle("df_validation.pkl")

In [7]:
df_test = pd.read_pickle("df_test.pkl")

In [8]:
df_price = pd.read_pickle("df_price.pkl")

In [9]:
df_option = pd.read_pickle("df_merged.pkl")

In [10]:
class Market_data:

    def __init__(self, df_train, df_validation, df_test, df_price, df_option):
        self.df_train = df_train.copy()
        self.df_validation = df_validation.copy()
        self.df_test = df_test.copy()
        self.df_price = df_price.copy()
        self.df_option = df_option.copy()

    
    def get_dataset(self, name):
        if name == "train":
            return self.df_train
        elif name == "validation":
            return self.df_validation
        elif name == "test":
            return self.df_test
        else:
            raise ValueError(f"Unknown dataset: {name}")


    def get_price_df(self):
        return self.df_price
    

    def get_row_price_date(self, date):
        ''' This function retrieves from the df_price DataFrame the row corresponding 
        to the date provided as a parameter.

        RETURNS
        -------
        type : pandas.core.series.Series
        '''
        rows = self.df_price[self.df_price["Date"] == date]
        if rows.empty:
            raise KeyError(f"Date {date} not found")
        return rows.iloc[0]


    def get_rows_price_date(self,date1,date2):
        '''This function retrieves from the DataFrame the spot prices for the 
        dates between date1 and date2.
        
        RETURNS
        -------
        type : pandas.core.frame.DataFrame
        '''
        filtered = self.df_price[(self.df_price['Date'] >= date1) & (self.df_price['Date'] <= date2)]
        return filtered

    def get_prices_date_list(self,date1,date2):
        '''This function returns the list of spot prices for dates between date1 and date2.

        RETURNS
        -------
        type : np.array
        '''
        filtered = self.get_rows_price_date(date1,date2)
        return filtered['Price'].to_numpy()


    def get_prices_list_lookahead(self, date, nb_period):
        ''' This function returns the list of spot prices for the nb_period preceding the date 
        provided as a parameter.

        RETURNS
        -------
        type : list
        '''
        list_date = self.df_price['Date'].tolist()
        id_date = list_date.index(date)
        filtered = self.df_price.iloc[id_date - nb_period : id_date + 1]
        return filtered['Price'].tolist()

    def get_realized_volatility(self,prices):
        '''This function calculates the annualized realized volatility for a 
        given list of stock prices provided as the parameter prices.

        RETURNS
        -------
        type : float
        '''
        log_returns = np.diff(np.log(prices))
        realized_vol = np.sqrt(np.mean(log_returns ** 2) * 252)
        return realized_vol

    
    def get_list_realized_volatility(self,date,nb_period):
        '''This function returns the list of realized volatilities, 
        each computed over nb_period days, for each day preceding the date 
        provided as a parameter.
        In other words : it returns the evolution of the realized volatility
        over the previous nb_period date preceding date

        RETURNS
        -------
        type : list of float
        '''
        vol = []
        list_date = self.df_price['Date'].tolist()
        id_date = list_date.index(date)
        list_date_lookahead = list_date[id_date - nb_period : id_date + 1] # list of nb_period date preceding id_date
        for d in list_date_lookahead:
            real_vol = self.get_realized_volatility(self.get_prices_list_lookahead(d,nb_period))
            vol.append(real_vol)
        return vol

    def get_list_IV(self,date, nb_period):
        '''This function returns the list of implied volatilities (IV) for 
        the nb_period dates preceding the given date. For each date, the IV 
        is computed as the average of the IVs of the options available on 
        that date.

        RETURNS
        -------
        type : list of float
        '''
        IV = []
        list_date = self.df_price['Date'].tolist()
        id_date = list_date.index(date)
        list_date_lookahead = list_date[id_date - nb_period : id_date + 1] # id_date is included, so the iv of the current straddle is included 

        df_train_filtered = self.df_train[self.df_train['Date'].isin(list_date_lookahead)]
        df_validation_filtered = self.df_validation[self.df_validation['Date'].isin(list_date_lookahead)]

        
        df_filtered_date = pd.concat([df_train_filtered, df_validation_filtered], ignore_index=True)

        mean_iv_by_date = df_filtered_date.groupby('Date')['IV'].mean()
        mean_iv_list = mean_iv_by_date.tolist()

        return mean_iv_list

In [348]:
#for i in range(len(df_price)):
    #print(f"date:{df_price['Date'].iloc[i]},  price:{df_price['Price'].iloc[i]}, Price_unsplited:{df_price['Price_unsplited'].iloc[i]}")

SyntaxError: incomplete input (2164930908.py, line 2)

In [157]:
class Backtester:

    def __init__(self, market_data, strategy):
        '''
        PARAMETERS
        ----------
        market_data
            Class Market_data
        strategy
            Class Strategy
        '''
        self.market_data = market_data
        self.strategy = strategy
    
    # ------------------------- PRICERS AND GREEKS --------------------------------------
    def black_scholes_call_price(self, S, K, T, r, sigma):
        if T <= 0:
            return max(S - K, 0)
        d1 = (np.log(S / K) + (r + 0.5 * sigma**2)* T) / (sigma * np.sqrt(T))
        d2 = d1 - sigma * np.sqrt(T)
        call_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
        return call_price

    def black_scholes_put_price(self, S, K, T, r, sigma):
        if T <= 0:
            return max(K - S, 0)
        d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
        d2 = d1 - sigma * np.sqrt(T)
        put_price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)
        return put_price

    def black_scholes_call_delta(self, S, K, T, r, sigma):
        if T <= 0:
            return 1.0 if S > K else 0.0
        d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
        return norm.cdf(d1)

    def black_scholes_put_delta(self, S, K, T, r, sigma):
        if T <= 0:
            return -1.0 if S < K else 0.0
        d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
        return norm.cdf(d1) - 1 
    # --------------------------------------------------------------------------------

    
    def run_gamma_scalping(self, straddle_row):
        '''

        PARAMETERS
        ----------
        straddle_row : 
            pandas.core.series.Series
        '''
        res = {}

        PNL_evolution = [] # evolution of the pnl at each time step of the delta hedging
        hedge_gain = []

        Straddle_t0 = straddle_row['Straddle']

        df_prices_lifetime = self.market_data.get_rows_price_date(straddle_row['Date'],straddle_row[' [EXPIRE_DATE]']) # We retrieve the df of the spot prices associated to the lifetime of the straddle

        # avoid splited stock problem
        if straddle_row['Date'] <= " 2020-07-31":
            date_to_price = dict(zip(df_prices_lifetime['Date'], df_prices_lifetime['Price_unsplited'])) # We create the dictionnary of date and associated unsplited price
        elif straddle_row['Date'] >= " 2020-09-01":
            date_to_price = dict(zip(df_prices_lifetime['Date'], df_prices_lifetime['Price'])) # We create the dictionnary of date and associated price
        else:
            return {'PNL':0, 'price_strad':0, 'gross_gain':0} 


        list_date = list(date_to_price.keys())

        r = 0.03
        iv_call = straddle_row[' [C_IV]']
        iv_put = straddle_row[' [P_IV]']

        # Hedging part
        for i in range(1, len(date_to_price)):
            S_tbefore = date_to_price[list_date[i-1]]
            S_t = date_to_price[list_date[i]]

            time_to_maturity_before = (len(list_date) - (i - 1)) / 252
            time_to_maturity = (len(list_date) - i) / 252

            C_tbefore = self.black_scholes_call_price(S_tbefore, straddle_row[' [STRIKE]'], time_to_maturity_before, r, iv_call)
            C_t = self.black_scholes_call_price(S_t, straddle_row[' [STRIKE]'], time_to_maturity, r, iv_call)
            P_tbefore = self.black_scholes_put_price(S_tbefore, straddle_row[' [STRIKE]'], time_to_maturity_before, r, iv_put)
            P_t = self.black_scholes_put_price(S_t, straddle_row[' [STRIKE]'], time_to_maturity, r, iv_put)

            Straddle_before = C_tbefore + P_tbefore
            Straddle_t = C_t + P_t

            delta_call_t = self.black_scholes_call_delta(S_tbefore, straddle_row[' [STRIKE]'], time_to_maturity_before, r, iv_call)
            delta_put_t = self.black_scholes_put_delta(S_tbefore, straddle_row[' [STRIKE]'], time_to_maturity_before, r, iv_put)

            hedge_value = (-delta_call_t * (S_t - S_tbefore) +
                        (delta_call_t * S_tbefore - C_tbefore) * (np.exp(r * (1/252)) - 1)
                        - delta_put_t * (S_t - S_tbefore) +
                        (delta_put_t * S_tbefore - P_tbefore) * (np.exp(r * (1/252)) - 1))

            hedge_gain.append(hedge_value)

            pnl_total_t = (Straddle_t - Straddle_t0) + np.sum(hedge_gain[:i])
            PNL_evolution.append(pnl_total_t)
        
        res['PNL'] = PNL_evolution[-1]
        res['price_strad'] = Straddle_t0
        res['gross_gain'] = PNL_evolution[-1] + Straddle_t0

        return res





    
    # ------------------------------------------- IV vs RV Backtesting ---------------------------------------------------------------
    def run_backtest_IVvsRV(self):
        '''This function backtests the predictive power of the strategy by computing the proportion
         of correct predictions between implied volatility (IV) and realized volatility (RV) on 
         df_train and df_validation
        '''
        df_filtered = self.market_data.df_train[self.market_data.df_train['Date'] > " 2016-03-01"]

        mask_trade = df_filtered.apply(self.strategy.should_trade, axis=1)
        mask_trade2 = df_validation.apply(self.strategy.should_trade, axis=1)

        df_trades = df_filtered[mask_trade].copy()
        df_trades2 = df_validation[mask_trade2].copy()

        df_trades["signal"] = df_trades.apply(self.strategy.get_signal, axis=1)
        df_trades2["signal"] = df_trades2.apply(self.strategy.get_signal, axis=1)

        df_trades["success"] = np.where(
            ((df_trades["signal"] == "LONG")  & (df_trades["vol_real"] > df_trades["IV"])) |
            ((df_trades["signal"] == "SHORT") & (df_trades["vol_real"] < df_trades["IV"])),
            1, 0
        )

        df_trades2["success"] = np.where(
            ((df_trades2["signal"] == "LONG")  & (df_trades2["vol_real"] > df_trades2["IV"])) |
            ((df_trades2["signal"] == "SHORT") & (df_trades2["vol_real"] < df_trades2["IV"])),
            1, 0
        )

        success_rate_train = df_trades["success"].mean() if not df_trades.empty else 0.0
        success_rate_validation = df_trades2["success"].mean() if not df_trades2.empty else 0.0


        print(f"The strategy achieved a success rate of {success_rate_train:.2%} on {len(df_trades)} straddle trades in df_train.")
        print(f"The strategy achieved a success rate of {success_rate_validation:.2%} on {len(df_trades2)} straddle trades in df_validation.")

        return None
    # --------------------------------------------------------------------------------------------------------------------------------       
        



    # ------------------------------------------- PNL generation backtesting ---------------------------------------------------------
    def run_row(self, straddle_row):
        trade_signal = "SKIP"
        res = {}
        if self.strategy.should_trade(straddle_row): # return a bool
            signal = self.strategy.get_signal(straddle_row)
            scalp = self.run_gamma_scalping(straddle_row)
            if signal == 'LONG' :
                res['PNL'] = scalp['PNL']
                res['trade_signal'] = 'LONG'
            else : # If signal == 'SHORT'
                res['PNL'] = -scalp['PNL']
                res['trade_signal'] = 'SHORT'
            res['price_strad'] = scalp['price_strad']
        else :
            res = {'PNL' : 0,  'trade_signal' : trade_signal, 'price_strad' : 0}
        #print(res)
        return res
 

    def run_backtest_train(self):
        PNL = 0
        Investment_capital = 0
        df_filtered = self.market_data.df_train[self.market_data.df_train['Date'] > " 2016-03-01"]
        for _, row in df_filtered.iterrows():
            res = self.run_row(row)
            PNL += res['PNL']
            Investment_capital += res['price_strad']
            #print(f"Date:{row['Date']}, PNL:{PNL}, Decision to trade:{res['trade_signal']}, iv:{row['IV']}, rv:{row['vol_real']}")
        ROI = PNL / Investment_capital if Investment_capital != 0 else 0
        
        print(f"PNL:{PNL}, ROI:{ROI*100} %")
        return None
    
    def run_backtest_validation(self):
        PNL = 0
        Investment_capital = 0
        for _,row in self.market_data.df_validation.iterrows():
            res = self.run_row(row)
            PNL += res['PNL']
            Investment_capital += res['price_strad']
            #print(f"Date:{row['Date']}, PNL:{PNL}, Decision to trade:{res['trade_signal']}, iv:{row['IV']}, rv:{row['vol_real']}")
        ROI = PNL / Investment_capital if Investment_capital != 0 else 0
        
        print(f"PNL:{PNL}, ROI:{ROI*100} %")
        return None
    # -----------------------------------------------------------------------------------------------------------------------------
    


In [530]:
mask_trade2 = df_validation.apply(regivrv.should_trade, axis=1)

In [545]:
for _,row in df_validation.iterrows():
    print(f"date:{row['Date']}, should_trade:{regivrv.should_trade(row)}")

date: 2021-01-21, should_trade:False
date: 2021-01-21, should_trade:False
date: 2021-01-21, should_trade:False
date: 2021-01-21, should_trade:False
date: 2021-01-21, should_trade:False
date: 2021-01-21, should_trade:False
date: 2021-01-22, should_trade:False
date: 2021-01-22, should_trade:False
date: 2021-01-22, should_trade:False
date: 2021-01-22, should_trade:False
date: 2021-01-22, should_trade:False
date: 2021-01-25, should_trade:False
date: 2021-01-25, should_trade:False
date: 2021-01-25, should_trade:False
date: 2021-01-25, should_trade:False
date: 2021-01-25, should_trade:False
date: 2021-01-26, should_trade:False
date: 2021-01-26, should_trade:False
date: 2021-01-26, should_trade:False
date: 2021-01-26, should_trade:False
date: 2021-01-26, should_trade:False
date: 2021-01-27, should_trade:False
date: 2021-01-27, should_trade:False
date: 2021-01-27, should_trade:False
date: 2021-01-27, should_trade:False
date: 2021-01-27, should_trade:False
date: 2021-01-27, should_trade:False
d

In [538]:
df_trades2 = df_validation[mask_trade2].copy()

In [None]:
df_trades2

In [None]:
back.run_backtest_train()

In [None]:
back.run_backtest_validation()

In [13]:
'''
market_data = MarketData(df_train, df_validation, df_test, df_price, df_option)
strategy = IVvsRVStrategy(lookahead_days=10, market_data)
backtester = Backtester(strategy, market_data)

results = backtester.run()
'''

'\nmarket_data = MarketData(df_train, df_validation, df_test, df_price, df_option)\nstrategy = IVvsRVStrategy(lookahead_days=10, market_data)\nbacktester = Backtester(strategy, market_data)\n\nresults = backtester.run()\n'

In [12]:
class Model(ABC):

    def __init__(self, market_data):
        self.market_data = market_data

        
    @abstractmethod
    def train_model(self):
        pass


In [13]:
class Regression(Model):

    def __init__(self, market_data, X, Y):
        super().__init__(market_data)
        self.X = X
        self.Y = Y
    
    def train_model(self):

        X = np.array(X).reshape(-1, 1)
        Y = np.array(Y)

        model = LinearRegression()
        model.fit(X, Y)

        return model

In [14]:
class BaseStrategy(ABC):

    def __init__(self, market_data):
        self.market_data = market_data

    @abstractmethod
    def generate_alpha(self, straddle_row):
        pass

    @abstractmethod
    def should_trade(self, straddle_row):
        pass

----

----

# **Regression network : Strategy based on a linear regression between implied volatility (IV) and realized volatility (RV)**

The strategy takes advantage of differences between implied volatility (IV) and realized volatility (RV) to find trading opportunities. For each option, the last 10 dates are considered, and a linear regression between IV and RV is performed to extract the slope, which reflects the recent trend. Here is the momentum strategy we have chosen **based on what worked best during the backtesting**  :

- If (the IV slope is above the 80th percentile of past slopes) **and** (the RV slope is below the 20th percentile), IV is overestimated compared to RV, and the strategy goes **LONG**.
- Conversely, if (the IV slope is below the 20th percentile) **and** (the RV slope is above the 80th percentile), IV is underestimated compared to RV, and the strategy goes **SHORT**.

The strategy exploits recent imbalances between implied and realized volatility, buying when the market overestimates volatility and selling when it underestimates it.


**DRAWBACK** : The main drawback of this strategy is that IV is aggregated using a simple daily average (by quote_date of the straddle). As a result, the strategy does not capture the term structure of volatility. The slope obtained from the regression over the last nb_period days only reflects the average IV trend, which is then compared to realized volatility (standard deviation of returns over the last nb_period days), calculated in the same way.

To sum up, the strategy does not take into account the term structure of straddles during the backtest. Moreover, the choice of quantiles was arbitrary, based on exploratory data analysis, and the window size (nb_period = 10) was also chosen in the same way.

In [15]:
class Regression_realvol(BaseStrategy):
    
    def __init__(self, market_data, nb_period):
        super().__init__(market_data)
        self.nb_period = nb_period
        self.quantile_high = np.quantile(self.compute_slope_train(), 0.75)
        self.quantile_low = np.quantile(self.compute_slope_train(), 0.25)

    def compute_slope_train(self):
        slope_train = []
        df_filtered = self.market_data.df_train[self.market_data.df_train['Date'] > " 2016-03-01"]
        for _, row in df_filtered.iterrows():
            Y = self.market_data.get_list_realized_volatility(row['Date'],self.nb_period)
            X = [i for i in range(1,len(Y)+1)]

            X = np.array(X).reshape(-1, 1)
            Y = np.array(Y)

            model = LinearRegression()
            model.fit(X, Y)

            slope_train.append(model.coef_[0])
        return slope_train


    def generate_alpha(self, straddle_row):
        Y = self.market_data.get_list_realized_volatility(straddle_row['Date'],self.nb_period)
        X = [i for i in range(1,len(Y)+1)]

        X = np.array(X).reshape(-1, 1)
        Y = np.array(Y)

        model = LinearRegression()
        model.fit(X, Y)

        '''
        print(f"Slope : {model.coef_[0]}")
        print(f"Intercept : {model.intercept_}")
        print(f"R² : {model.score(X, Y)}")

        plt.scatter(X, Y, color='blue', label='Données')
    
        # Tracé de la droite de régression
        Y_pred = model.predict(X)
        plt.plot(X, Y_pred, color='red', label='Droite de régression')
        
        plt.xlabel("Index")
        plt.ylabel("Volatilité réalisée")
        plt.title("Régression linéaire de la volatilité réalisée")
        plt.legend()
        plt.show()
        '''

        return model.coef_[0]

        

    def should_trade(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        return (alpha > self.quantile_high) or (alpha < self.quantile_low)

    
    def get_signal(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        if alpha > self.quantile_high: # If RV is over estimated
            return 'SHORT'
        elif alpha < self.quantile_low: # If RV is under estimated
            return 'LONG'



In [440]:
X = [0,1,2,3,4]
Y = [0,-4,-8,-12,-16]

X = np.array(X).reshape(-1, 1)
Y = np.array(Y)

model = LinearRegression()
model.fit(X, Y)
model.coef_[0]

-4.0

In [16]:
class Regression_IV(BaseStrategy):
    
    def __init__(self, market_data, nb_period):
        super().__init__(market_data)
        self.nb_period = nb_period
        self.quantile_high = np.quantile(self.compute_slope_train(), 0.75)
        self.quantile_low = np.quantile(self.compute_slope_train(), 0.25)


    def compute_slope_train(self):
        slope_train = []
        df_filtered = self.market_data.df_train[self.market_data.df_train['Date'] > " 2016-03-01"]
        for _, row in df_filtered.iterrows():
            Y = self.market_data.get_list_IV(row['Date'],self.nb_period)
            X = [i for i in range(1,len(Y)+1)]

            X = np.array(X).reshape(-1, 1)
            Y = np.array(Y)

            model = LinearRegression()
            model.fit(X, Y)

            slope_train.append(model.coef_[0])
        return slope_train


    def generate_alpha(self, straddle_row):
        Y = self.market_data.get_list_IV(straddle_row['Date'],self.nb_period)
        X = [i for i in range(1,len(Y)+1)]

        X = np.array(X).reshape(-1, 1)
        Y = np.array(Y)

        model = LinearRegression()
        model.fit(X, Y)

        '''
        print(f"Slope : {model.coef_[0]}")
        print(f"Intercept : {model.intercept_}")
        print(f"R² : {model.score(X, Y)}")

        
        plt.scatter(X, Y, color='blue', label='Données')
    
        
        # Tracé de la droite de régression
        Y_pred = model.predict(X)
        plt.plot(X, Y_pred, color='red', label='Droite de régression')
        
        plt.xlabel("Index")
        plt.ylabel("Volatilité réalisée")
        plt.title("Régression linéaire de la volatilité réalisée")
        plt.legend()
        plt.show()
        '''

        return model.coef_[0] # return the slop


    def should_trade(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        return (alpha > self.quantile_high) or (alpha < self.quantile_low)

    
    def get_signal(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        if alpha > self.quantile_high: # If IV is over estimated
            return 'LONG'
        elif alpha < self.quantile_low: # If IV is under estimated
            return 'SHORT'




In [130]:
class Regression_IVvsRV(BaseStrategy):
    
    def __init__(self, strat_iv, strat_rv, market_data, nb_period):
        super().__init__(market_data)
        self.strat_iv = strat_iv
        self.strat_rv = strat_rv
        self.nb_period = nb_period

    def generate_alpha(self, straddle_row):
        alpha_iv = self.strat_iv.generate_alpha(straddle_row)
        alpha_rv = self.strat_rv.generate_alpha(straddle_row)

        return [alpha_iv,alpha_rv] # IV:alpha[0] and RV:alpha[1]


    def should_trade(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        condition1 = (alpha[0] > self.strat_iv.quantile_high) and (alpha[1] < self.strat_rv.quantile_low)
        condition2 = (alpha[0] < self.strat_iv.quantile_low) and (alpha[1] > self.strat_rv.quantile_high)
        return condition1 or condition2
        # If (IV is overestimated and RV is underestimated) or if (IV is underestimated and RV is overestimated) in terms of slope variation


    def get_signal(self, straddle_row):
        alpha = self.generate_alpha(straddle_row)
        if (alpha[0] > self.strat_iv.quantile_high) and (alpha[1] < self.strat_rv.quantile_low): 
            return 'LONG'
        elif (alpha[0] < self.strat_iv.quantile_low) and (alpha[1] > self.strat_rv.quantile_high): 
            return 'SHORT' 



In [18]:
data = Market_data(df_train,df_validation,df_test,df_price,df_option)

In [19]:
regrv = Regression_realvol(data, 10)

In [66]:
regiv = Regression_IV(data, 10)

In [131]:
regivrv = Regression_IVvsRV(regiv, regrv, data, 10)

In [158]:
back = Backtester(data,regivrv)

In [None]:
pnll = 0
df_filtered = df_train[df_train['Date'] > " 2016-03-01"]
for _,row in df_filtered.iterrows():
    res = back.run_row(row)
    pnll += res['PNL']
    print(f"date:{row['Date']}, signal:{res['trade_signal']}, iv:{row['IV']}, rv:{row['vol_real']}, pnl:{res['PNL']}")
print(f'PNL TOTAL = {pnll}')

In [125]:
print(back.run_backtest_IVvsRV()) # IV under RV over and IV over RV under
print(back.run_backtest_train())
print(back.run_backtest_validation())

The strategy achieved a success rate of 40.47% on 850 straddle trades in df_train.
The strategy achieved a success rate of 0.00% on 14 straddle trades in df_validation.
None
PNL:-127.29262945552922, ROI:-1.5535199632836612 %
None
PNL:-26.74093148660205, ROI:-23.681306665428668 %
None


In [150]:
print(back.run_backtest_IVvsRV()) # IV over RV under and IV under RV over
print(back.run_backtest_train())
print(back.run_backtest_validation())

The strategy achieved a success rate of 59.53% on 850 straddle trades in df_train.
The strategy achieved a success rate of 100.00% on 14 straddle trades in df_validation.
None
PNL:127.29262945552922, ROI:1.5535199632836612 %
None
PNL:26.74093148660205, ROI:23.681306665428668 %
None


In [69]:
print(back.run_backtest_IVvsRV()) # IV over RV over and IV under RV under
print(back.run_backtest_train())
print(back.run_backtest_validation())

The strategy achieved a success rate of 52.81% on 801 straddle trades in df_train.
The strategy achieved a success rate of 48.45% on 322 straddle trades in df_validation.
None
PNL:1865.0951936721578, ROI:15.4257130093092 %
None
PNL:-155.0146161200161, ROI:-6.42750114523191 %
None


In [77]:
print(back.run_backtest_IVvsRV()) # IV under RV under and IV over RV over
print(back.run_backtest_train())
print(back.run_backtest_validation())

The strategy achieved a success rate of 47.19% on 801 straddle trades in df_train.
The strategy achieved a success rate of 51.55% on 322 straddle trades in df_validation.
None
PNL:-1865.0951936721578, ROI:-15.4257130093092 %
None
PNL:155.0146161200161, ROI:6.42750114523191 %
None


In [None]:
back.run_backtest_IVvsRV()

The strategy achieved a success rate of 64.12% on 641 straddle trades in df_train.
The strategy achieved a success rate of 100.00% on 5 straddle trades in df_validation.


------

-----

# **Strategy based on skew**

In [None]:
df_mean = df_train.groupby(["Date", "day_to_maturity"])[["IV", "vol_real"]].mean().reset_index()

for date in df_mean['Date'].unique():
    subset = df_mean[df_mean['Date'] == date]
    plt.plot(subset['day_to_maturity'], subset['IV'], color ='blue', label=str(date))
    plt.plot(subset['day_to_maturity'], subset['vol_real'], color='red', label=str(date))
    plt.xlabel("Days to Maturity")
    plt.ylabel("Implied Volatility (IV)")
    plt.title(f"Average IV Term Structure {str(date)}")
    plt.legend()
    plt.show()



