# Process the data

Create firstly the long returns, then the short returns (which are used later on).
The files are all saved in the data/straddles folder.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pandas as pd
import numpy as np
import sqlalchemy as db
import sys
from scipy.interpolate import interp1d
import os
from datetime import datetime, timedelta
from scipy.stats import norm
import math

In [2]:
# File paths for the CSV files
file_path_1 = 'Data/Straddles/straddle_returns_unfinished.csv' #used for the return calculation with the betas
file_path_2 = 'Data/Straddles/full_atm_data.csv' #full_atm_data1.csv is the file with the straddle prices computed with the forward price

# Reading the CSV files into Pandas DataFrames
returns_df = pd.read_csv(file_path_1)
option_df = pd.read_csv(file_path_2)

In [3]:
# Option1: calculate the straddle price today and tomorrow, based on 0 deltas
#get straddle price
def calculate_straddle_price(row, n):
    total_delta = abs(row['delta_c']) + abs(row['delta_p'])
    w2 = abs(row['delta_c']) / total_delta  # Calculate weights so we have a delta of zero
    w1 = abs(row['delta_p']) / total_delta
    if n == 1:
        return w1*row['call_price']+w2*row['put_price']
    else:
        return w1*row['next_call_price']+w2*row['next_put_price']

In [4]:
# apply the function to every row (every date)
option_df['current_straddle_price'] = option_df.apply(lambda row: calculate_straddle_price(row, 1), axis=1)
option_df['next_straddle_price'] = option_df.apply(lambda row: calculate_straddle_price(row, 2), axis=1)
# Selecting specific columns and creating a new DataFrame
new_df = option_df[['date', 'days_to_exp', 'current_straddle_price', 'next_straddle_price']].copy()
new_df

Unnamed: 0,date,days_to_exp,current_straddle_price,next_straddle_price
0,1996-01-04,16,5.757318,5.109840
1,1996-01-04,44,9.137655,9.079675
2,1996-01-04,72,11.867779,12.087773
3,1996-01-04,170,20.729906,20.616067
4,1996-01-05,15,5.316093,4.830802
...,...,...,...,...
100036,2023-02-27,291,269.461335,263.662052
100037,2023-02-27,305,273.813965,270.076212
100038,2023-02-27,326,289.737643,284.109680
100039,2023-02-27,354,296.668519,291.005010


In [5]:
# now with the straddle prices, use the formula from the appendix to get the constant maturity returns
# formula has been changed such that the weights are correct (exchanged the weights)
# check if it is correctly interpolated, sometime it might be necessary to extrapolate..
def interpolate_straddle_price(T, df):
    df['date'] = pd.to_datetime(df['date'])  # Convert 'date' column to datetime if it's stored as a string
    #unique_dates = df['date'].dt.date.drop_duplicates()
    unique_dates = df['date'].drop_duplicates()
    return_data = []
    for date_value in unique_dates:
        # Filter the dataframe based on days to expiration exactly equal to the specified days
        subset_df = df[df['date'] == date_value].copy()
        date_value = date_value.strftime("%Y-%m-%d")
        exact_day = subset_df[subset_df['days_to_exp'] == T]
        next_day = df.loc[df['date'] > date_value, 'date'].min()
        if not exact_day.empty:
            # If there's an exact match, calculate the return using the exact match
            return_value = (exact_day['next_straddle_price'] / exact_day['current_straddle_price']) - 1
            return_data.append({'date': next_day, f"return_{T}": float(return_value.iloc[0])})
        else:
            # For interpolation, find two nearest days to expiration
            subset_df['days_diff'] = abs(subset_df['days_to_exp'] - T)
            nearest_two = subset_df.nsmallest(2, 'days_diff').sort_values('days_to_exp')
            # Calculate the interpolated price
            S1 = nearest_two.iloc[0]
            S2 = nearest_two.iloc[1]
            # correct weighting
            price_t1 = (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['next_straddle_price'] + (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['next_straddle_price']
            price_t = (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['current_straddle_price'] + (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['current_straddle_price']
            return_value = price_t1/price_t - 1
            return_data.append({'date': next_day, f"return_{T}": return_value})

    return pd.DataFrame(return_data)


In [6]:
# apply this function and save the results for all necessary maturities
results_30 = interpolate_straddle_price(30, new_df)
results_60 = interpolate_straddle_price(60, new_df)
results_90 = interpolate_straddle_price(90, new_df)
results_180 = interpolate_straddle_price(180, new_df)
results_270 = interpolate_straddle_price(270, new_df)
results_360 = interpolate_straddle_price(360, new_df)

# save the results concisely
merged_results = results_30.merge(results_60, on='date')
merged_results = merged_results.merge(results_90, on='date')
merged_results = merged_results.merge(results_180, on='date')
merged_results = merged_results.merge(results_270, on='date')
merged_results = merged_results.merge(results_360, on='date')
merged_results

Unnamed: 0,date,return_30,return_60,return_90,return_180,return_270,return_360
0,1996-01-05,-0.047362,0.009428,0.012776,-0.004282,-0.000102,0.001096
1,1996-01-08,-0.051276,-0.017906,-0.016097,-0.010467,-0.006793,-0.004988
2,1996-01-09,0.352578,0.133491,0.136242,0.035873,0.098269,0.071248
3,1996-01-10,0.120899,0.082100,0.084634,0.007288,0.007953,0.007787
4,1996-01-11,-0.096908,-0.070833,-0.071009,-0.057062,-0.028652,-0.014971
...,...,...,...,...,...,...,...
6826,2023-02-22,-0.049894,-0.036390,-0.029874,-0.019329,-0.015693,-0.012386
6827,2023-02-23,-0.042998,-0.034530,-0.028604,-0.022993,-0.020272,-0.016729
6828,2023-02-24,0.000661,0.007255,0.010188,0.008399,0.006145,0.002570
6829,2023-02-27,-0.051075,-0.040506,-0.030172,-0.018373,-0.015130,-0.012799


In [7]:
# save the results accordingly
#file_path = os.path.join("Data", "straddle_returns_correct.csv")
file_path = os.path.join("Data", "straddle_returns_forward.csv")
merged_results.to_csv(file_path, index=False) 
# test similarity to given data in compare_data notebook

In [17]:
# Option2: rerun the above, but now change the weights, so it matches the paper
# now with the straddle prices, use the formula from the appendix to get the constant maturity returns
# formula has been changed such that the weights are correct (exchanged the weights)
# check if it is correctly interpolated, sometime it might be necessary to extrapolate..
def interpolate_straddle_price(T, df):
    df['date'] = pd.to_datetime(df['date'])  # Convert 'date' column to datetime if it's stored as a string
    #unique_dates = df['date'].dt.date.drop_duplicates()
    unique_dates = df['date'].drop_duplicates()
    return_data = []
    for date_value in unique_dates:
        # Filter the dataframe based on days to expiration exactly equal to the specified days
        subset_df = df[df['date'] == date_value].copy()
        date_value = date_value.strftime("%Y-%m-%d")
        exact_day = subset_df[subset_df['days_to_exp'] == T]
        next_day = df.loc[df['date'] > date_value, 'date'].min()
        if not exact_day.empty:
            # If there's an exact match, calculate the return using the exact match
            return_value = (exact_day['next_straddle_price'] / exact_day['current_straddle_price']) - 1
            return_data.append({'date': next_day, f"return_{T}": float(return_value.iloc[0])})
        else:
            # For interpolation, find two nearest days to expiration
            subset_df['days_diff'] = abs(subset_df['days_to_exp'] - T)
            nearest_two = subset_df.nsmallest(2, 'days_diff').sort_values('days_to_exp')
            # Calculate the interpolated price
            S1 = nearest_two.iloc[0]
            S2 = nearest_two.iloc[1]
            #wrong weighting (in the paper)
            price_t1 = (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['next_straddle_price'] + (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['next_straddle_price']
            price_t = (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['current_straddle_price'] + (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['current_straddle_price']
            return_value = price_t1/price_t - 1
            return_data.append({'date': next_day, f"return_{T}": return_value})

    return pd.DataFrame(return_data)
# apply this function and save the results for all necessary maturities
results_30 = interpolate_straddle_price(30, new_df)
results_60 = interpolate_straddle_price(60, new_df)
results_90 = interpolate_straddle_price(90, new_df)
results_180 = interpolate_straddle_price(180, new_df)
results_270 = interpolate_straddle_price(270, new_df)
results_360 = interpolate_straddle_price(360, new_df)

# save the results concisely
merged_results1 = results_30.merge(results_60, on='date')
merged_results1 = merged_results1.merge(results_90, on='date')
merged_results1 = merged_results1.merge(results_180, on='date')
merged_results1 = merged_results1.merge(results_270, on='date')
merged_results1 = merged_results1.merge(results_360, on='date')
merged_results1

Unnamed: 0,date,return_30,return_60,return_90,return_180,return_270,return_360
0,1996-01-05,-0.047362,0.005933,0.002039,0.015582,0.008405,0.006920
1,1996-01-08,-0.054206,-0.025427,-0.026459,-0.019554,-0.008769,-0.013060
2,1996-01-09,0.462928,0.200836,0.197305,0.094419,0.037223,0.050676
3,1996-01-10,0.122686,0.083203,0.075302,0.008955,0.014869,0.012539
4,1996-01-11,-0.149936,-0.071963,-0.071734,-0.034765,-0.022992,-0.030964
...,...,...,...,...,...,...,...
6826,2023-02-22,-0.046649,-0.035337,-0.028813,-0.021796,-0.014255,-0.012686
6827,2023-02-23,-0.043014,-0.033473,-0.027998,-0.022898,-0.017985,-0.016541
6828,2023-02-24,-0.000559,0.006665,0.009079,0.007553,0.004692,0.002961
6829,2023-02-27,-0.052030,-0.041104,-0.030785,-0.020353,-0.012987,-0.009571


In [18]:
# save the results accordingly
file_path = os.path.join("Data", "straddle_returns_incorrect.csv")
merged_results1.to_csv(file_path, index=False) 
# test similarity to given data in compare_data notebook

In [20]:
# Option3: Calculate the straddle returns using the beta_call, like in the paper he refers to Expected Option Returns∗
# by Joshua D. Coval and Tyler Shumway
# codes run in the WRDS notebook, but it took a full night to run, so it is just here for completion

## continue here after having calculated the beta_c
# only keep columns in the dataset that are actually needed
#option_df = option_df[['']]
# Sort the DataFrame by 'date' and 'exdate' if it's not sorted already
unique_combinations = option_df[['date', 'days_to_exp']]
# Create a new DataFrame to store returns
returns_df = pd.DataFrame(columns=['date', 'days_to_exp', 'r_c', 'r_p', 'r_straddle'])

# Calculate returns for each date-exdate combination
for index, row in unique_combinations.iterrows():
    current_date = row['date']
    current_days_to_exp = row['days_to_exp']

    # Filter data for the current date-exdate combination
    mask = (option_df['date'] == current_date) & (option_df['days_to_exp'] == current_days_to_exp)
    current_data = option_df[mask].copy() 
    #current_data = df[(df['date'] == current_date) & (df['exdate'] == current_exdate)]

    # Calculate returns (r_c)
    current_data['r_c'] = (current_data['next_call_price'] / current_data['call_price']) - 1
    current_data['r_p'] = (current_data['next_put_price'] / current_data['put_price']) - 1
    call_part = (-current_data['call_price']*current_data['beta_c']+current_data['spotprice'])/(current_data['put_price']*current_data['beta_c'] - current_data['call_price']*current_data['beta_c']+current_data['spotprice']) * current_data['r_c']
    put_part = (current_data['put_price']*current_data['beta_c'])/(current_data['put_price']*current_data['beta_c'] - current_data['call_price']*current_data['beta_c']+current_data['spotprice']) * current_data['r_p']
    current_data['r_straddle'] = call_part + put_part
    

    # Store the results in returns_df
    returns_df = pd.concat([returns_df, current_data[['date', 'days_to_exp', 'r_straddle']]])

# Display the new DataFrame with returns
print(returns_df)

              date days_to_exp  r_c  r_p  r_straddle
0       1996-01-04          16  NaN  NaN    0.017857
1       1996-01-04          44  NaN  NaN    0.025000
2       1996-01-04          72  NaN  NaN    0.063830
3       1996-01-04         170  NaN  NaN    0.003597
4       1996-01-05          15  NaN  NaN   -0.337662
...            ...         ...  ...  ...         ...
100036  2023-02-27         291  NaN  NaN   -0.019481
100037  2023-02-27         305  NaN  NaN   -0.019666
100038  2023-02-27         326  NaN  NaN   -0.017961
100039  2023-02-27         354  NaN  NaN   -0.015870
100040  2023-02-27         382  NaN  NaN   -0.016778

[100041 rows x 5 columns]


In [21]:
# as it has already been done, I just import the file (see top line = returns_df) and interpolate the returns
def interpolate_straddle_returns(T, df):
    df['date'] = pd.to_datetime(df['date'])
    unique_dates = df['date'].drop_duplicates()
    return_data = []
    for date_value in unique_dates:
        # Filter the dataframe based on days to expiration exactly equal to the specified days
        subset_df = df[df['date'] == date_value].copy()
        date_value = date_value.strftime("%Y-%m-%d")
        exact_day = subset_df[subset_df['days_to_exp'] == T]
        next_day = df.loc[df['date'] > date_value, 'date'].min()
        if not exact_day.empty:
            # If there's an exact match, calculate the return using the exact match
            return_value = exact_day['r_straddle']
            return_data.append({'date': next_day, f"return_{T}": float(return_value)})
        else:
            # For interpolation, find two nearest days to expiration
            subset_df['days_diff'] = abs(subset_df['days_to_exp'] - T)
            nearest_two = subset_df.nsmallest(2, 'days_diff').sort_values('days_to_exp')
            # Calculate the interpolated price
            S1 = nearest_two.iloc[0]
            S2 = nearest_two.iloc[1]
            return_value = (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['r_straddle'] + (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['r_straddle']
            return_data.append({'date': next_day, f"return_{T}": return_value})

    return pd.DataFrame(return_data)

In [22]:
results_30 = interpolate_straddle_returns(30, returns_df)
results_60 = interpolate_straddle_returns(60, returns_df)
results_90 = interpolate_straddle_returns(90, returns_df)
results_180 = interpolate_straddle_returns(180, returns_df)
results_270 = interpolate_straddle_returns(270, returns_df)
results_360 = interpolate_straddle_returns(360, returns_df)
# save the results concisely
merged_results2 = results_30.merge(results_60, on='date')
merged_results2 = merged_results2.merge(results_90, on='date')
merged_results2 = merged_results2.merge(results_180, on='date')
merged_results2 = merged_results2.merge(results_270, on='date')
merged_results2 = merged_results2.merge(results_360, on='date')
merged_results2

TypeError: Column 'days_diff' has dtype object, cannot use method 'nsmallest' with this dtype

In [None]:
# save the results concisely
merged_results2 = results_30.merge(results_60, on='date')
merged_results2 = merged_results2.merge(results_90, on='date')
merged_results2 = merged_results2.merge(results_180, on='date')
merged_results2 = merged_results2.merge(results_270, on='date')
merged_results2 = merged_results2.merge(results_360, on='date')
merged_results2

In [None]:
# save the results accordingly
file_path = os.path.join("Data", "straddle_returns_beta.csv")
merged_results2.to_csv(file_path, index=False)
# test similarity to given data in compare_data notebook

## Short straddle returns

The short straddle returns have to be adjusted by the 20% margin as described in the paper at the end.

Note that there are two files saved: one with the correct weighting and one with the incorrect weighting. The name differs accordingly. For the short straddle returns, the correlation to his given data is higher for the correct weighting (in contrast to the long straddle returns, where it is the other way around).

In [23]:
#get straddle prices today and tomorrow based on the respective deltas of the calls and puts
#this section is reused from make_straddle_returns, where the normal straddle returns have been calculated
#based on zero delta
def calculate_straddle_price(row, n):
    total_delta = abs(row['delta_c']) + abs(row['delta_p'])
    w2 = abs(row['delta_c']) / total_delta  # Calculate weights so we have a delta of zero
    w1 = abs(row['delta_p']) / total_delta
    if n == 1:
        return w1*row['call_price']+w2*row['put_price']
    else:
        return w1*row['next_call_price']+w2*row['next_put_price']

# apply the function to every row (every date)
option_df['current_straddle_price'] = option_df.apply(lambda row: calculate_straddle_price(row, 1), axis=1)
option_df['next_straddle_price'] = option_df.apply(lambda row: calculate_straddle_price(row, 2), axis=1)
# Selecting specific columns and creating a new DataFrame
new_df = option_df[['date', 'days_to_exp', 'current_straddle_price', 'next_straddle_price', 'spotprice']].copy()
new_df

Unnamed: 0,date,days_to_exp,current_straddle_price,next_straddle_price,spotprice
0,1996-01-04,16,5.757318,5.109840,617.70
1,1996-01-04,44,9.137655,9.079675,617.70
2,1996-01-04,72,11.867779,12.087773,617.70
3,1996-01-04,170,19.602408,19.529090,617.70
4,1996-01-05,15,5.316093,4.830802,616.71
...,...,...,...,...,...
100036,2023-02-27,291,272.506327,267.009695,3982.24
100037,2023-02-27,305,265.404985,261.572730,3982.24
100038,2023-02-27,326,280.863293,275.551640,3982.24
100039,2023-02-27,354,298.541904,293.474002,3982.24


In [24]:
# now with the straddle prices, calculate the constant maturity straddle price for today and tomorrow and save it
# check the weighting! for the short straddles, the correlation to his data is higher when we use the correct weighting

def interpolate_straddle_price(T, df):
    df['date'] = pd.to_datetime(df['date'])  # Convert 'date' column to datetime if it's stored as a string
    unique_dates = df['date'].drop_duplicates()
    straddle_price_data = []
    for date_value in unique_dates:
        subset_df = df[df['date'] == date_value].copy()
        date_value = date_value.strftime("%Y-%m-%d")
        exact_day = subset_df[subset_df['days_to_exp'] == T]
        #next_day = df.loc[df['date'] > date_value, 'date'].min()
        if not exact_day.empty:
            straddle_price_data.append({
                'date': date_value,
                'spot_price': exact_day['spotprice'].iloc[0],
                f"current_straddle_price_{T}": exact_day['current_straddle_price'].iloc[0],
                f"next_straddle_price_{T}": exact_day['next_straddle_price'].iloc[0]
            })
        else:
            subset_df['days_diff'] = abs(subset_df['days_to_exp'] - T)
            nearest_two = subset_df.nsmallest(2, 'days_diff').sort_values('days_to_exp')
            S1 = nearest_two.iloc[0]
            S2 = nearest_two.iloc[1]
            #correct weighting
            price_t1 = (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['next_straddle_price'] + (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['next_straddle_price']
            price_t = (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['current_straddle_price'] + (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['current_straddle_price']
            straddle_price_data.append({
                'date': date_value,
                'spot_price': S1['spotprice'],
                f"current_straddle_price_{T}": price_t,
                f"next_straddle_price_{T}": price_t1
            })

    return pd.DataFrame(straddle_price_data)


In [25]:
# apply this function and save the results for all necessary maturities
results_30 = interpolate_straddle_price(30, new_df)
results_30['r_v_30'] = (results_30['current_straddle_price_30'] - results_30['next_straddle_price_30']) / (0.2 * results_30['spot_price'])
results_30['r_v_30'] = results_30['r_v_30'].shift(1)

results_60 = interpolate_straddle_price(60, new_df)
results_60['r_v_60'] = (results_60['current_straddle_price_60'] - results_60['next_straddle_price_60']) / (0.2 * results_60['spot_price'])
results_60['r_v_60'] = results_60['r_v_60'].shift(1)

results_90 = interpolate_straddle_price(90, new_df)
results_90['r_v_90'] = (results_90['current_straddle_price_90'] - results_90['next_straddle_price_90']) / (0.2 * results_90['spot_price'])
results_90['r_v_90'] = results_90['r_v_90'].shift(1)

results_180 = interpolate_straddle_price(180, new_df)
results_180['r_v_180'] = (results_180['current_straddle_price_180'] - results_180['next_straddle_price_180']) / (0.2 * results_180['spot_price'])
results_180['r_v_180'] = results_180['r_v_180'].shift(1)

results_270 = interpolate_straddle_price(270, new_df)
results_270['r_v_270'] = (results_270['current_straddle_price_270'] - results_270['next_straddle_price_270']) / (0.2 * results_270['spot_price'])
results_270['r_v_270'] = results_270['r_v_270'].shift(1)

results_360 = interpolate_straddle_price(360, new_df)
results_360['r_v_360'] = (results_360['current_straddle_price_360'] - results_360['next_straddle_price_360']) / (0.2 * results_360['spot_price'])
results_360['r_v_360'] = results_360['r_v_360'].shift(1)

# save the results concisely
merged_results = results_30[['date','spot_price', 'r_v_30']].merge(results_60[['date', 'r_v_60']], on='date')
merged_results = merged_results.merge(results_90[['date', 'r_v_90']], on='date')
merged_results = merged_results.merge(results_180[['date', 'r_v_180']], on='date')
merged_results = merged_results.merge(results_270[['date', 'r_v_270']], on='date')
merged_results = merged_results.merge(results_360[['date', 'r_v_360']], on='date')
merged_results = merged_results.drop(['spot_price'], axis=1)
merged_results = merged_results.rename(columns={'date': 'Date'})
merged_results = merged_results.dropna()
merged_results

Unnamed: 0,Date,r_v_30,r_v_60,r_v_90,r_v_180,r_v_270,r_v_360
1,1996-01-05,0.002855,-0.000816,-0.002624,0.000472,-0.000618,-0.001708
2,1996-01-08,0.003020,0.002114,0.004937,0.002639,0.001618,0.001343
3,1996-01-09,-0.020640,-0.011861,-0.030952,-0.005705,-0.011948,-0.026574
4,1996-01-10,-0.008543,-0.002753,-0.008712,-0.002454,-0.002654,-0.007116
5,1996-01-11,0.007298,0.007645,0.020254,0.005168,0.006527,0.006108
...,...,...,...,...,...,...,...
6826,2023-02-21,-0.007389,-0.006152,-0.006626,-0.004468,-0.003618,-0.003384
6827,2023-02-22,0.005773,0.005847,0.005882,0.006677,0.005165,0.004924
6828,2023-02-23,0.005049,0.005350,0.005396,0.008036,0.006186,0.006094
6829,2023-02-24,0.000285,-0.000949,-0.001635,-0.002590,-0.001648,-0.000988


In [26]:
merged_results.to_csv('Data/Straddles/short_straddle_returns_correct.csv', index=False)

In [27]:
#rerun with different weighting
#now wrong (like in the paper)

def interpolate_straddle_price(T, df):
    df['date'] = pd.to_datetime(df['date'])  # Convert 'date' column to datetime if it's stored as a string
    unique_dates = df['date'].drop_duplicates()
    straddle_price_data = []
    for date_value in unique_dates:
        subset_df = df[df['date'] == date_value].copy()
        date_value = date_value.strftime("%Y-%m-%d")
        exact_day = subset_df[subset_df['days_to_exp'] == T]
        #next_day = df.loc[df['date'] > date_value, 'date'].min()
        if not exact_day.empty:
            straddle_price_data.append({
                'date': date_value,
                'spot_price': exact_day['spotprice'].iloc[0],
                f"current_straddle_price_{T}": exact_day['current_straddle_price'].iloc[0],
                f"next_straddle_price_{T}": exact_day['next_straddle_price'].iloc[0]
            })
        else:
            subset_df['days_diff'] = abs(subset_df['days_to_exp'] - T)
            nearest_two = subset_df.nsmallest(2, 'days_diff').sort_values('days_to_exp')
            S1 = nearest_two.iloc[0]
            S2 = nearest_two.iloc[1]
            #correct weighting
            #price_t1 = (abs(S1['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['next_straddle_price'] + (abs(T - S2['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['next_straddle_price']
            #price_t = (abs(S1['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['current_straddle_price'] + (abs(T - S2['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['current_straddle_price']
            #wrong weighting
            price_t1 = (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['next_straddle_price'] + (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['next_straddle_price']
            price_t = (abs(T - S1['days_to_exp']) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S1['current_straddle_price'] + (abs(S2['days_to_exp'] - T) / abs(S2['days_to_exp'] - S1['days_to_exp'])) * S2['current_straddle_price']
            straddle_price_data.append({
                'date': date_value,
                'spot_price': S1['spotprice'],
                f"current_straddle_price_{T}": price_t,
                f"next_straddle_price_{T}": price_t1
            })

    return pd.DataFrame(straddle_price_data)


In [28]:
# apply this function and save the results for all necessary maturities
results_30 = interpolate_straddle_price(30, new_df)
results_30['r_v_30'] = (results_30['current_straddle_price_30'] - results_30['next_straddle_price_30']) / (0.2 * results_30['spot_price'])
results_30['r_v_30'] = results_30['r_v_30'].shift(1)

results_60 = interpolate_straddle_price(60, new_df)
results_60['r_v_60'] = (results_60['current_straddle_price_60'] - results_60['next_straddle_price_60']) / (0.2 * results_60['spot_price'])
results_60['r_v_60'] = results_60['r_v_60'].shift(1)

results_90 = interpolate_straddle_price(90, new_df)
results_90['r_v_90'] = (results_90['current_straddle_price_90'] - results_90['next_straddle_price_90']) / (0.2 * results_90['spot_price'])
results_90['r_v_90'] = results_90['r_v_90'].shift(1)

results_180 = interpolate_straddle_price(180, new_df)
results_180['r_v_180'] = (results_180['current_straddle_price_180'] - results_180['next_straddle_price_180']) / (0.2 * results_180['spot_price'])
results_180['r_v_180'] = results_180['r_v_180'].shift(1)

results_270 = interpolate_straddle_price(270, new_df)
results_270['r_v_270'] = (results_270['current_straddle_price_270'] - results_270['next_straddle_price_270']) / (0.2 * results_270['spot_price'])
results_270['r_v_270'] = results_270['r_v_270'].shift(1)

results_360 = interpolate_straddle_price(360, new_df)
results_360['r_v_360'] = (results_360['current_straddle_price_360'] - results_360['next_straddle_price_360']) / (0.2 * results_360['spot_price'])
results_360['r_v_360'] = results_360['r_v_360'].shift(1)

# save the results concisely
merged_results = results_30[['date','spot_price', 'r_v_30']].merge(results_60[['date', 'r_v_60']], on='date')
merged_results = merged_results.merge(results_90[['date', 'r_v_90']], on='date')
merged_results = merged_results.merge(results_180[['date', 'r_v_180']], on='date')
merged_results = merged_results.merge(results_270[['date', 'r_v_270']], on='date')
merged_results = merged_results.merge(results_360[['date', 'r_v_360']], on='date')
merged_results = merged_results.drop(['spot_price'], axis=1)
merged_results = merged_results.rename(columns={'date': 'Date'})
merged_results = merged_results.dropna()
merged_results

Unnamed: 0,Date,r_v_30,r_v_60,r_v_90,r_v_180,r_v_270,r_v_360
1,1996-01-05,0.002855,-0.000495,-0.000374,-0.001902,-0.002992,-0.004083
2,1996-01-08,0.003142,0.002154,0.005123,0.002513,0.001727,0.002332
3,1996-01-09,-0.022605,-0.015655,-0.039804,-0.009084,-0.007066,-0.021692
4,1996-01-10,-0.007527,-0.007223,-0.017653,-0.002057,-0.003246,-0.007708
5,1996-01-11,0.008899,0.006616,0.018454,0.006573,0.004793,0.008695
...,...,...,...,...,...,...,...
6826,2023-02-21,-0.007270,-0.006200,-0.006462,-0.004552,-0.003714,-0.003206
6827,2023-02-22,0.005773,0.005898,0.005796,0.006975,0.005058,0.004924
6828,2023-02-23,0.005049,0.005368,0.005517,0.007943,0.006293,0.006443
6829,2023-02-24,0.000182,-0.001003,-0.001669,-0.002740,-0.001592,-0.001141


In [29]:
merged_results.to_csv('Data/Straddles/short_straddle_returns_incorrect.csv', index=False)