# Modification of Market Crashes paper, using different time series
## By Harsha Somisetty

Uses crude oil futures, ten year yield, and dollar index to better show relationship between L1 norm and S&P prices

In [1]:
import yfinance as yf
import numpy as np
from numpy import linalg as LA
import pandas as pd


from gtda.time_series import SlidingWindow

from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude

import plotly.graph_objects as go
from plotly.subplots import make_subplots

#method get index of date in the datagrame
def dates_to_index(dates, df):
    dates = [dates] if isinstance(dates, str) else dates
    return [len(df[:date])-1 for date in dates]

In [6]:
# downloading data

# oil, ten year yield, dollar index respectively. downloading earliest data


data = yf.download("CL=F ^TNX DX-Y.NYB, ^GSPC", start="2000-08-23", end="2021-08-20", interval = "1d", group_by = "ticker")
data_df = np.log(data.xs("Adj Close", level=1, axis=1).pct_change()+1)[1:].fillna(-.3)

wins = [50, 100]

[*********************100%***********************]  4 of 4 completed


In [11]:
x, dates = data_df.to_numpy(), data_df.index

SW = SlidingWindow(size=50, stride = 2)
x_sw, y_sw  = SW.fit_transform_resample(x, dates)

VR = VietorisRipsPersistence(homology_dimensions=[0,1], n_jobs=6)
x_vr = VR.fit_transform(X = x_sw)

Ampl = Amplitude(metric="landscape", metric_params={"p":1})
norm = Ampl.fit_transform(X=x_vr)

In [9]:
# no stride

dates = ["2017-01-01", "2021-08-21"]
clean_data = yf.download("^GSPC ^VIX", start=dates[0], end=dates[1], interval = "1d", group_by = "ticker").xs("Adj Close", level=1, axis=1)

calced_df = pd.DataFrame(
    {
        # "log_price": log_price[1:],
        "spy_price": clean_data["^GSPC"][50:],
        "norm": norm[:,1][1:],
        "vix_price": clean_data["^VIX"][50:]
        # "log_returns": np.diff(log_price),
    },
    index = y_sw[1:]
)



fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
fig.update_layout(
    height=1000,
)

for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[dates[0]:dates[1]][col].values/max(calced_df[dates[0]:dates[1]][col].values)
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=normed_norm, name=col), row=i, col=1)
    else:
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=calced_df[dates[0]:dates[1]][col].values, name=col), row=i, col=1)
    
fig.show()

# to fix data, change which homology is being used

[*********************100%***********************]  2 of 2 completed


In [12]:
# stride

dates = ["2017-01-01", "2021-08-21"]
clean_data = yf.download("^GSPC ^VIX", start=dates[0], end=dates[1], interval = "1d", group_by = "ticker").xs("Adj Close", level=1, axis=1)

calced_df = pd.DataFrame(
    {
        # "log_price": log_price[1:],
        "spy_price": clean_data["^GSPC"][50:],
        "norm": norm[:,1][1:],
        "vix_price": clean_data["^VIX"][50:]
        # "log_returns": np.diff(log_price),
    },
    index = y_sw[1:]
)



fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
fig.update_layout(
    height=1000,
)

for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[dates[0]:dates[1]][col].values/max(calced_df[dates[0]:dates[1]][col].values)
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=normed_norm, name=col), row=i, col=1)
    else:
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=calced_df[dates[0]:dates[1]][col].values, name=col), row=i, col=1)
    
fig.show()

# to fix data, change which homology is being used

[*********************100%***********************]  2 of 2 completed
