In [1]:
import yfinance as yf
from tqdm import tqdm
import numpy as np
from numpy import linalg as LA
import pandas as pd
from datetime import datetime, timedelta, date


from gtda.time_series import TakensEmbedding
from gtda.time_series import SlidingWindow
from gtda.time_series import SingleTakensEmbedding

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude
from gtda.diagrams import PersistenceLandscape


from gtda.pipeline import make_pipeline


from sklearn import set_config
set_config(display='diagram')  # For HTML representations of pipelines

from sklearn.cluster import KMeans


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
#downloading data
#gspc is spy, ixic is qqq, dji is dow, rut is russel
data = yf.download("BTC-USD ETH-USD LTC-USD XRP-USD", start="2016-01-01", end="2018-02-02", interval = "1d", group_by = "ticker")
data_df = np.log(data.xs("Adj Close", level=1, axis=1).pct_change()+1)[1:]

wins = [50, 100]

[*********************100%***********************]  4 of 4 completed


In [3]:
STE = SingleTakensEmbedding(parameters_type="fixed", dimension=4)
SW = SlidingWindow(size=50)
VR = VietorisRipsPersistence()
PL = PersistenceLandscape()
Ampl = Amplitude(metric="landscape", metric_params={"p":1})

In [4]:
x, dates = np.transpose(data_df["BTC-USD"].to_numpy()), data_df["BTC-USD"].index
x_STE, dates_ste = STE.fit_transform_resample(X = x, y = dates)
x_sw, y_sw  = SW.fit_transform_resample(x_STE, dates_ste)
x_vr = VR.fit_transform(X = x_sw, y = y_sw)
x_pl = PL.fit_transform(X = x_vr, y = y_sw)
# VR.plot(x_vr, sample=1)
# PL.plot(x_pl, sample=1)

In [5]:
c0_function = np.array([np.max(arr[1]) for arr in x_pl])[1:]
c0_derivative = np.array([np.max(arr[1]) for arr in x_pl])[:-1]


c1 = c0_function + np.abs(c0_function-c0_derivative)
l1 = Ampl.fit_transform(X=x_vr, y=y_sw)[:,0]
log_price = np.log(data.xs("Adj Close", level=1, axis=1)["BTC-USD"]).to_numpy()[-len(x_pl):]

# px.line(c1)

In [6]:
log_price

array([6.04200554, 6.05198325, 6.05101567, 6.0687774 , 6.06962628,
       6.07190102, 6.08152687, 6.07562872, 6.04970754, 6.04417796,
       6.01844477, 5.99288855, 6.01054878, 6.02664107, 6.0257983 ,
       6.02794108, 6.03340035, 6.04427045, 6.02011029, 6.02602297,
       6.03173756, 6.03267843, 6.03311257, 6.0417322 , 6.01505412,
       6.0172395 , 6.02527402, 6.02419067, 6.0356943 , 6.03557948,
       6.03163196, 6.0335106 , 6.03534986, 6.05623354, 6.05027809,
       6.03192486, 6.02783507, 6.03243614, 6.03538571, 6.04233109,
       6.04240476, 6.04368691, 6.0498042 , 6.04834805, 6.04676915,
       6.04108532, 6.03885137, 6.04397159, 6.04614921, 6.05253613,
       6.04910592, 6.05039836, 6.06311757, 6.06511454, 6.05771797,
       6.06050309, 6.07651547, 6.08992659, 6.10796896, 6.09972909,
       6.10987408, 6.12808022, 6.13432168, 6.14437658, 6.09737069,
       6.10704736, 6.12051055, 6.10550279, 6.11340559, 6.0973302 ,
       6.10992288, 6.10193644, 6.10473969, 6.13036307, 6.12803

In [13]:
# need to plot all the shits
# need to plot dates
# kmeans

dates = ["2016-01-01", "2018-08-06"]
calced_df = pd.DataFrame(
    {
        "log_price": log_price[1:],
        "log_returns": np.diff(log_price),
        "l1": l1[1:],
        "l1_diff": np.diff(l1),
        "c1": c1,
        "c0_deri": c0_derivative,
        "c0_fun": c0_function
    },
    index = y_sw[1:]
)


fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
fig.update_layout(
    height=1000,
)
print(calced_df.keys())
for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[dates[0]:dates[1]][col].values/max(calced_df[dates[0]:dates[1]][col].values)
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=normed_norm, name=col), row=i, col=1)
    else:
        fig.add_trace(go.Scatter(x=calced_df[dates[0]:dates[1]][col].index, y=calced_df[dates[0]:dates[1]][col].values, name=col), row=i, col=1)
    
fig.show()

Index(['log_price', 'log_returns', 'l1', 'l1_diff', 'c1', 'c0_deri', 'c0_fun'], dtype='object')


In [None]:
kmeans_predata = np.transpose(calced_df[["log_price", "c1"]].to_numpy())

kmeans_postdata = np.transpose(np.array([kmeans_predata[0] / kmeans_predata[0].max(), kmeans_predata[1]/kmeans_predata[1].max()]))


In [177]:
kmeans = KMeans(n_clusters=5, random_state=0).fit(kmeans_postdata)