In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
from statsmodels.api import OLS
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from statsmodels.tsa.stattools import coint
import functools
import itertools
from math import erf

In [8]:
#initial vol 0.16 is solved using: fit_vol(0.1,10000,637.63), assuming at the money strike at time 0, with the given option premium at time 0
def fun_BS_quick(S = 10000, K = 10000, vol = 0.16, T = 1, r = 0, q = 0, ReturnDelta = False): #vol is solved using: fit_vol(0.1,10000,637.63)

    d1 = (np.log(S/K)+ (r+vol**2/2)*T)/vol/np.sqrt(T)
    d2 = d1 - vol*np.sqrt(T)

    normcdf = lambda x: (1 + erf(x/np.sqrt(2)))/2
    N1 = normcdf(d1)
    N2 = normcdf(d2)

    px = S*N1 - K*np.exp((q-r)*T)*N2

    if ReturnDelta:
        return N1
    else:
        return px
    
def fit_vol(vol_fit = 0.10, S = 9990, px = 620.5, T = 1, step = 0.0001):
    for i in range(30):
        px_new = fun_BS_quick(S=S,vol = vol_fit, T=T)
        #print('px_new',px_new)
        #print('px',px)
        if abs(px_new-px)<0.01:
            #print(px,px_new)

            break
        vol_fit = vol_fit + (px - px_new)*step
    return vol_fit

In [9]:
fit_vol(0.1,10000,637.63)

0.15999827732028848

In [10]:
def percentile_within_minmax(x):
    if len(x) < 2:
        return np.nan  # Not enough data to compute a range and percentile
    min_val = np.min(x)
    max_val = np.max(x)
    last_val = x[-1]  # Use numpy indexing for last element
    # Normalize last value within the min-max range
    # Avoid division by zero if min and max are the same
    if max_val != min_val:
        percentile = (last_val - min_val) / (max_val - min_val) * 100
    else:
        percentile = 0  # All values are the same in the window
    return percentile

In [5]:
round = 4

prices_day_1_df = pd.read_csv(f'../data/round-{round}-island-data-bottle/prices_round_{round}_day_1.csv', index_col='timestamp', sep=';')
prices_day_2_df = pd.read_csv(f'../data/round-{round}-island-data-bottle/prices_round_{round}_day_2.csv', index_col='timestamp', sep=';')
prices_day_3_df = pd.read_csv(f'../data/round-{round}-island-data-bottle/prices_round_{round}_day_3.csv', index_col='timestamp', sep=';')

In [6]:
Data_df = pd.concat([prices_day_1_df,prices_day_2_df,prices_day_3_df],axis = 0)

df_coconut_day_123 = Data_df.loc[Data_df['product']=='COCONUT'].rename(columns={'mid_price': 'coconut_mid_price'})[['day', 'coconut_mid_price']]
df_coupon_day_123 = Data_df.loc[Data_df['product']=='COCONUT_COUPON'].rename(columns={'mid_price': 'coupon_mid_price'})[['day', 'coupon_mid_price']]

df_all_mid_prices_day_123 = functools.reduce(lambda left, right: pd.merge(left, right, on=['timestamp', 'day']),
                                                                                [df_coconut_day_123, df_coupon_day_123])
   

In [11]:
#when calculate the IV, we assume no impact to option price from S change, in order to solve IV
df_all_mid_prices_day_123['Implied_Volatility'] = df_all_mid_prices_day_123.apply(lambda row: fit_vol(0.15, row['coconut_mid_price'] , row['coupon_mid_price'], 1), axis=1)
#IV percentile (min max version)
window_size = 250  # Define the window size
df_all_mid_prices_day_123['IV_Percentile'] = df_all_mid_prices_day_123['Implied_Volatility'].rolling(window=window_size).apply(percentile_within_minmax, raw=True)
df_all_mid_prices_day_123['IV_ema'] = df_all_mid_prices_day_123['Implied_Volatility'].ewm(span=20, adjust=False).mean()

In [12]:
#HV
# Calculate daily returns
df_all_mid_prices_day_123['Returns'] = np.log(df_all_mid_prices_day_123['coconut_mid_price'] / df_all_mid_prices_day_123['coconut_mid_price'].shift(1))
# df_all_mid_prices_day_123['Returns'].iloc[:10000].std()*np.sqrt(252)*100   #Another way to set up initial vol, and solve K
df_all_mid_prices_day_123['HV21'] = df_all_mid_prices_day_123['Returns'].rolling(21).std()*np.sqrt(252)*100
df_all_mid_prices_day_123['HV63'] = df_all_mid_prices_day_123['Returns'].rolling(63).std()*np.sqrt(252)*100
df_all_mid_prices_day_123['HV126'] = df_all_mid_prices_day_123['Returns'].rolling(126).std()*np.sqrt(252)*100

In [28]:
df_all_mid_prices_day_123['HV21_ema'] = df_all_mid_prices_day_123['Returns'].ewm(span=20, adjust=False).std()*np.sqrt(252)*100
df_all_mid_prices_day_123['HV63_ema'] = df_all_mid_prices_day_123['Returns'].ewm(span=63, adjust=False).std()*np.sqrt(252)*100
df_all_mid_prices_day_123['HV126_ema'] = df_all_mid_prices_day_123['Returns'].ewm(span=126, adjust=False).std()*np.sqrt(252)*100

In [29]:
df_all_mid_prices_day_123.tail()

Unnamed: 0_level_0,day,coconut_mid_price,coupon_mid_price,Implied_Volatility,IV_Percentile,IV_ema,Returns,HV21,HV63,HV126,HV21_ema,HV63_ema,HV126_ema
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
999500,3,9883.5,577.5,0.160034,50.000714,0.160495,-5.1e-05,0.171229,0.149255,0.147499,0.167804,0.158377,0.153986
999600,3,9884.5,577.5,0.159906,47.421928,0.160439,0.000101,0.16576,0.150478,0.147499,0.168746,0.158598,0.154106
999700,3,9883.5,575.5,0.159527,39.742611,0.160352,-0.000101,0.170149,0.151462,0.148117,0.167304,0.158597,0.154218
999800,3,9884.0,575.5,0.159463,38.453818,0.160268,5.1e-05,0.167419,0.151854,0.148117,0.161915,0.156828,0.15334
999900,3,9882.5,575.5,0.159654,42.319601,0.160209,-0.000152,0.175742,0.154313,0.149553,0.169063,0.159894,0.155075


In [34]:
(df_all_mid_prices_day_123['Returns'].ewm(span=20, adjust=False).mean()).tail()

timestamp
999500   -0.000013
999600   -0.000002
999700   -0.000012
999800   -0.000006
999900   -0.000020
Name: Returns, dtype: float64

In [35]:
(df_all_mid_prices_day_123['Returns'].ewm(span=63, adjust=False).mean()).tail()

timestamp
999500   -3.795073e-06
999600   -5.148018e-07
999700   -3.660390e-06
999800   -1.965125e-06
999900   -6.646588e-06
Name: Returns, dtype: float64

In [36]:
(df_all_mid_prices_day_123['Returns'].ewm(span=126, adjust=False).mean()).tail()

timestamp
999500   -9.304928e-07
999600    6.774459e-07
999700   -9.265078e-07
999800   -1.152543e-07
999900   -2.503549e-06
Name: Returns, dtype: float64

In [15]:
df_all_mid_prices_day_123['Implied_Volatility'].mean()

0.15911781070657252

In [53]:
lower_quantiles = []
lower_q = [0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.275, 0.3, 0.325, 0.35, 0.375, 0.4]
for q in lower_q:
    #print(q)
    lower_quantiles.append(df_all_mid_prices_day_123['Implied_Volatility'].quantile(q))

In [54]:
upper_quantiles = []
upper_q = sorted([1-x for x in lower_q])
for q in upper_q:
    #print(q)
    upper_quantiles.append(df_all_mid_prices_day_123['Implied_Volatility'].quantile(q))

In [55]:
len(lower_quantiles), len(upper_quantiles)

(13, 13)

In [57]:
lower_quantiles

[0.1548126483102763,
 0.15524564478925557,
 0.15559389236534613,
 0.15593730205783163,
 0.15627209500721972,
 0.15654949361399448,
 0.15680083271943834,
 0.157015403816229,
 0.157224422027324,
 0.15743515099803368,
 0.15766677190957712,
 0.15787996694243617,
 0.15811622787186605]

In [58]:
upper_quantiles

[0.15992161636675647,
 0.16015348901861012,
 0.16038658352235557,
 0.16064647332822493,
 0.16092613458422378,
 0.1612014438414916,
 0.16148954535368149,
 0.1618132410062684,
 0.16213397937019522,
 0.16248968170571057,
 0.1628304893697475,
 0.16321238977320887,
 0.16371509536720638]