In [1]:
import yfinance as yf
from tqdm import tqdm
import numpy as np
from numpy import linalg as LA
import pandas as pd
from datetime import datetime, timedelta, date


from gtda.time_series import TakensEmbedding
from gtda.time_series import SlidingWindow
from gtda.time_series import SingleTakensEmbedding

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude
from gtda.diagrams import PersistenceLandscape

from gtda.plotting import plot_point_cloud


from gtda.pipeline import make_pipeline


from sklearn import set_config
set_config(display='diagram')  # For HTML representations of pipelines

from sklearn.cluster import KMeans


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

#method get index of date in the datagrame
def dates_to_index(dates, df):
    dates = [dates] if isinstance(dates, str) else dates
    return [len(df[:date])-1 for date in dates]

In [2]:
#downloading data
#gspc is spy, ixic is qqq, dji is dow, rut is russel
data = yf.download("^GSPC ^DJI ^IXIC ^RUT", start="1992-01-01", end="2016-12-08", interval = "1d", group_by = "ticker")
data_df = np.log(data.xs("Adj Close", level=1, axis=1).pct_change()+1)[1:]
# price_data = data_df[["^GSPC", "^DJI", "^IXIC", "^RUT"]]

wins = [50, 100]


[*********************100%***********************]  4 of 4 completed


In [3]:


# index of the following dates in the data_df 
dates = dates_to_index(["2000-5-08", "2000-5-09", "2000-5-31"], data_df)

layout = dict(
    width = 500,
    height = 250
)
plotly_params = {
    "layout": layout
}

for date in dates:
    plot_point_cloud(data_df[["^GSPC", "^IXIC"]][date-50:date].to_numpy(), plotly_params = plotly_params).show()


In [4]:
x, dates = data_df.to_numpy(), data_df.index

SW = SlidingWindow(size=50)
x_sw, y_sw  = SW.fit_transform_resample(x, dates)

VR = VietorisRipsPersistence(homology_dimensions=[0,1], n_jobs=6)
x_vr = VR.fit_transform(X = x_sw)

# PL = PersistenceLandscape()
# x_pl = PL.fit_transform(X = x_vr, y = y_sw)

Ampl = Amplitude(metric="landscape", metric_params={"p":1})
norm = Ampl.fit_transform(X=x_vr, y=y_sw)

In [5]:
# dates = ["2000-01-10"]
# inds = dates_to_index(dates)


# fig, axs = plt.subplots(1, 3)
# fig_size = 4
# fig.set_size_inches(fig_size*3, fig_size)

# axs[0].plot(np.arange(0, wins[0]), data_df.to_numpy()[inds[0]-50:inds[0]])
# axs[0].set_title("Price Returns")
# plot_diagrams(dgms, title="Persis Diagram", ax=axs[1])

# persim.landscapes.plot_landscape_simple(persim.landscapes.PersLandscapeExact(dgms, hom_deg=1), title="Persis Landscape", ax=axs[2])

# fig.suptitle(dates[0])
# plt.tight_layout()
# plt.show()

# modify this code to see price returns, diagram, and landscape. did it on previous notebook for reference, but not really needed.

In [6]:
dates = ["1996-02-01", "2000-02-01"]
clean_data = yf.download("^GSPC ^VIX", start=dates[0], end=dates[1], interval = "1d", group_by = "ticker").xs("Adj Close", level=1, axis=1)

calced_df = pd.DataFrame(
    {
        # "log_price": log_price[1:],
        "spy_price": clean_data["^GSPC"][50:],
        "norm": norm[:,1][1:],
        "vix_price": clean_data["^VIX"][50:]
        # "log_returns": np.diff(log_price),
    },
    index = y_sw[1:]
)



fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
fig.update_layout(
    height=1000,
)

for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[dates[0]:dates[1]][col].values/max(calced_df[dates[0]:dates[1]][col].values)
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=normed_norm, name=col), row=i, col=1)
    else:
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=calced_df[dates[0]:dates[1]][col].values, name=col), row=i, col=1)
    
fig.show()

# to fix data, change which homology is being used

[*********************100%***********************]  2 of 2 completed
