# Dependencies

In [7]:
# pip install ripser

In [8]:
# pip install ordpy

In [9]:
import polars as pl
import numpy as np
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

import plotly.graph_objects as go
import plotly.express as px

import pandas as pd
from ripser import Rips
import persim
from ordpy import weighted_permutation_entropy

from hedging_strategy import hedging_strategies, portfolio, risk

# Market Crash Indicators

In [10]:
# Define index names and date range
index_names = ['^GSPC', '^DJI', '^IXIC', '^RUT']
start_date_string = '1997-01-01'
# start_date_string = '2021-01-01'
end_date_string = '2024-04-21'

# Download and prepare data
raw_data = yf.download(index_names, start=start_date_string, end=end_date_string)
df_close = raw_data['Adj Close'].dropna(axis='rows')
df_close.index = pd.to_datetime(raw_data.index.strftime('%Y-%m-%d'))
P = df_close.to_numpy()
r = np.log(np.divide(P[1:], P[:-1]))

# Handle NaN values that might appear after log return calculation
r = np.nan_to_num(r)  # Replace NaNs with zero (or you might choose to drop them)


# Define and compute Wasserstein distances and permutation entropy
rips = Rips(maxdim=2)
w = 20
n = len(raw_data)-(2*w)+1
wasserstein_dists = np.zeros((n,1))
perm_entropy = np.zeros(n)
hawkes_values = np.zeros(n)

# Define the Hawkes process function
def hawkes_process(data, decay):
    alpha = np.exp(-decay)
    output = np.zeros_like(data)
    output[0] = data[0]
    for t in range(1, len(data)):
        output[t] = alpha * output[t - 1] + (1 - alpha) * data[t]
    return output

for i in range(n):
    dgm1 = rips.fit_transform(r[i:i+w])
    dgm2 = rips.fit_transform(r[i+w+1:i+(2*w)+1])
    wasserstein_dists[i] = persim.wasserstein(dgm1[0], dgm2[0])
    # # Calculate permutation entropy for the window
    # perm_entropy[i] = permutation_entropy(r[i:i+(2*w)+1], dx=1, dy=1, taux=1, tauy=1, normalized=True)
    # Calculate permutation entropy for the window, ensuring data is appropriately shaped
    flat_data = r[i:i+(2*w)+1].flatten()  # Flatten the data
    perm_entropy[i] = weighted_permutation_entropy(flat_data, dx=10, normalized=True)
    hawkes_values[i] = hawkes_process(flat_data, decay=0.1)[-1]


df_close['Wasserstein_distances'] = np.concatenate(
    (
        np.zeros(
            len(df_close)
            -
            len(wasserstein_dists)
        ),
        wasserstein_dists.flatten()
    )
)

df_close['Permutation_Entropy'] = np.concatenate(
    (
        np.zeros(
            len(df_close)
            -
            len(perm_entropy)
        ),
        perm_entropy
    )
)

df_close['Hawkes_Process'] = np.concatenate(
    (
        np.zeros(
            len(df_close)
            -
            len(hawkes_values)
        ),
        hawkes_values
    )
)

df_close.reset_index(inplace = True)
df_close.to_csv('Data/prices_with_indicators.csv', index = False)

[*********************100%%**********************]  4 of 4 completed


Rips(maxdim=2, thresh=inf, coeff=2, do_cocycles=False, n_perm = None, verbose=True)


In [11]:
df = pd.read_csv('Data/prices_with_indicators.csv')

## Visualize indicators 

In [12]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = df['Date'], 
        y = df['^GSPC'], 
        name='^GSPC', 
        yaxis='y1', 
        mode = 'lines'
    )
)

indicators = [
    'Wasserstein_distances',
    'Permutation_Entropy',
    'Hawkes_Process'
]
for col in indicators:
    
    fig.add_trace(
        go.Scatter(
            x = df['Date'], 
            y = df[col], 
            name = col, 
            yaxis = 'y2', 
            mode = 'lines'
        )
    )



fig.update_layout(
    title='Homology, Permutation Entropy, and Market Drawdowns with Peaks',
    xaxis=dict(title='Date'),
    yaxis=dict(
        title='^GSPC',
        side='left',
        showgrid=True
    ),
    yaxis2=dict(
        title='indicators',
        side='right',
        overlaying='y',
        showgrid=False
    ),
)


# Define Hedging indicator

In [13]:
df['hedge'] = np.where(
    (df['Wasserstein_distances'] > 0.17)
    &
    (df['Wasserstein_distances'].shift() < 0.17),
    1,0
)

hedge_events = (
    df
    [df['hedge'] == 1]
    [['Date', '^GSPC']]
    .to_dict(
        orient = 'records'
    )
)

# Portfolio Holding & Reading Options Data

In [14]:
spy = yf.download('SPY')[['Adj Close']] * 500 # assume we hold 500 shares
spy_pl = (
    pl.from_pandas(
        spy.reset_index()
    )
    .with_columns(
        pl.col('Date').cast(pl.Date)
    )
    .rename(
        {
            'Adj Close' : 'Holding'
        }
    )
)

[*********************100%%**********************]  1 of 1 completed


In [15]:
option_data = (
    pl.read_parquet(
        'Data/SPX_options_1996_2024.parquet'
    )
    .with_columns(
        pl.col('date').cast(pl.Date),
        pl.col('exdate').cast(pl.Date)
    )
)

# Excecute Hedging

In [None]:
all_var = pl.DataFrame()

for event in  hedge_events:
    hedge_date = event['Date']
    
    hedge = hedging_strategies.hedging(
        hedge_date = hedge_date,
        holding_period = 300,
        options = option_data,
        min_dte = 300,
        close = event['^GSPC']
    )

    long_put_value = hedge.buy_put()
    short_call_value = hedge.short_call()
    bear_put_spread_value = hedge.bear_put_spread(
        strike2 = (
            spy_pl
            .filter(
                pl.col('Date') 
                .is_between(
                    pl.lit(hedge_date).str.strptime(pl.Date, '%Y-%m-%d') - pl.duration(days = 60),
                    pl.lit(hedge_date).str.strptime(pl.Date, '%Y-%m-%d')
                )
                
            )
            ['Holding']
            .min()
            / 50
        )
    )

    port = portfolio.portfolio(
        hedge_date = hedge_date,
        holding_value = spy_pl
    )

    do_nothing = (
        port
        .compute_portfolio()
    )

    long_put = (
        port
        .compute_portfolio(
            hedge_value = (
                long_put_value
                .select(
                    'date', 'hedge_value'
                )
            )
        )
    )

    short_call = (
        port
        .compute_portfolio(
            hedge_value = (
                short_call_value
                .select(
                    'date', 'hedge_value'
                )
            )
        )
    )

    bear_put_spread = (
        port
        .compute_portfolio(
            hedge_value = (
                bear_put_spread_value
                .select(
                    'date', 'hedge_value'
                )
            )
        )
    )

    returns_list = [
        pl.Series(name = 'do_nothing', values = do_nothing['returns'].to_numpy().flatten()[1:]),
        pl.Series(name = 'long_put', values = long_put['returns'].to_numpy().flatten()[1:]),
        pl.Series(name = 'short_call', values = short_call['returns'].to_numpy().flatten()[1:]),
        pl.Series(name = 'bear_put_spread', values = bear_put_spread['returns'].to_numpy().flatten()[1:])
    ]
    
    risk_info = (
        risk.risk(
            returns = returns_list
        )
    )
    
    var = risk_info.compute_var(99)

    all_var = (
        pl.concat(
            [
                all_var,
                var
                .select(
                    '99_var'
                )
                .rename(
                    {
                        '99_var' : f'{hedge_date}_99_var'
                    }
                )
            ],
            how = 'horizontal'
        )
    )
    print('='*100)
    print(f'HEDGING ON {hedge_date}')    

    (
        risk_info
        .visualize_returns(
            bins = 1/100,
            title = hedge_date,
            n_rows = 1, 
            height = 400, 
            width = 1500
        )
    )
    
all_var = (
    all_var
    .insert_column(
        0, 
        var['strategy']
    )
)

all_var

HEDGING ON 1998-11-13


# Hedging Analysis

In [None]:
ranking = (
    all_var
    .with_columns(
        pl.all().exclude('strategy')
        .rank('ordinal', descending = True) # rank 1 has lowest VaR
    )
    .unpivot(
        pl.selectors.numeric(),
        index = 'strategy'
    )
    .rename(
        {
            'value' : 'rank'
        }
    )
    .group_by(
        'strategy', 'rank'
    )
    .count()
    .sort(
        by = ['rank', 'count'],
        descending = [False, True])
)

ranking

strategy,rank,count
str,u32,u32
"""long_put""",1,6
"""short_call""",1,5
"""bear_put_spread""",1,3
"""long_put""",2,6
"""short_call""",2,5
…,…,…
"""long_put""",3,2
"""bear_put_spread""",3,2
"""bear_put_spread""",4,7
"""do_nothing""",4,6


In [None]:
(
    px.imshow(
        all_var.to_pandas().set_index('strategy'),
        text_auto = '.1%', 
        aspect = 'auto', 
        color_continuous_scale = 'Reds_r', 
        title = '99% VaR For Each Strategy'
    )
)