In [11]:
import yfinance as yf
from tqdm import tqdm
import numpy as np
from numpy import linalg as LA
import pandas as pd
from datetime import datetime, timedelta, date


from gtda.time_series import TakensEmbedding
from gtda.time_series import SlidingWindow
from gtda.time_series import SingleTakensEmbedding

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude
from gtda.diagrams import PersistenceLandscape


from gtda.pipeline import make_pipeline


from sklearn import set_config
set_config(display='diagram')  # For HTML representations of pipelines

from sklearn.cluster import KMeans


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
#downloading data
#gspc is spy, ixic is qqq, dji is dow, rut is russel
data = yf.download("BTC-USD ETH-USD LTC-USD XRP-USD", start="2016-01-01", end="2018-02-02", interval = "1d", group_by = "ticker")
data_df = np.log(data.xs("Adj Close", level=1, axis=1).pct_change()+1)[1:]

wins = [50, 100]

[*********************100%***********************]  4 of 4 completed


In [44]:
STE = SingleTakensEmbedding(parameters_type="fixed", dimension=4)
SW = SlidingWindow(size=50)
VR = VietorisRipsPersistence()
PL = PersistenceLandscape()
Ampl = Amplitude(metric="landscape", metric_params={"p":1})

In [87]:
x, dates = np.transpose(data_df["BTC-USD"].to_numpy()), data_df["BTC-USD"].index
x_STE, dates_ste = STE.fit_transform_resample(X = x, y = dates)
x_sw, y_sw  = SW.fit_transform_resample(x_STE, dates_ste)
x_vr = VR.fit_transform(X = x_sw, y = y_sw)
x_pl = PL.fit_transform(X = x_vr, y = y_sw)
# VR.plot(x_vr, sample=1)
# PL.plot(x_pl, sample=1)

In [185]:
c0_function = np.array([np.max(arr[1]) for arr in x_pl])[1:]
c0_derivative = np.array([np.max(arr[1]) for arr in x_pl])[:-1]


c1 = c0_function + np.abs(c0_function-c0_derivative)
l1 = Ampl.fit_transform(X=x_vr, y=y_sw)[:,0]
log_price = np.log(data.xs("Adj Close", level=1, axis=1)["BTC-USD"]).to_numpy()[-len(x_pl):]

# px.line(c1)

In [188]:
# need to plot all the shits
# need to plot dates
# kmeans
calced_df = pd.DataFrame(
    {
        "log_price": log_price[1:],
        # "log_returns": np.diff(log_price),
        # "l1": l1[1:],
        # "l1_diff": np.diff(l1),
        "c1": c1,
        "c0_deri": c0_derivative,
        "c0_fun": c0_function
    },
    index = y_sw[1:]
)

In [189]:
fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
# , subplot_titles=calced_df.keys()
fig.update_layout(
    height=1000,
)

for i, col in enumerate(calced_df.keys(), start=1):
    fig.add_trace(go.Scatter(x=calced_df[col].index, y=calced_df[col].values, name=col), row=i, col=1)
    
fig.show()

# to fix data, change which homology is being used

In [174]:
kmeans_predata = np.transpose(calced_df[["log_price", "c1"]].to_numpy())

kmeans_postdata = np.transpose(np.array([kmeans_predata[0] / kmeans_predata[0].max(), kmeans_predata[1]/kmeans_predata[1].max()]))


In [177]:
kmeans = KMeans(n_clusters=5, random_state=0).fit(kmeans_postdata)

In [178]:
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
       2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,