In [300]:
import sktime
import pandas as pd
from sktime.transformations.panel.rocket import MiniRocket
import statsmodels
import joblib
from sktime.datatypes._panel._convert import(
    from_2d_array_to_nested,
    from_nested_to_2d_array,
    is_nested_dataframe
)
from sklearn.linear_model import RidgeCV
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
import numpy as np
import random

In [301]:
train = pd.read_csv("/Users/ryanl/Desktop/site/forecaster/api/BTC-USD.csv",
                            usecols=['Date', 'Close'],
                            parse_dates=['Date'],
                            infer_datetime_format=True)
train = train.set_index('Date').to_period("D")

In [302]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

clean_dataset(train)
train.round(2)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2014-09-17,457.33
2014-09-18,424.44
2014-09-19,394.80
2014-09-20,408.90
2014-09-21,398.82
...,...
2021-11-13,64469.53
2021-11-14,65466.84
2021-11-15,63557.87
2021-11-16,60161.25


In [303]:
y = train["Close"]

In [304]:
fourier = CalendarFourier(freq="Q", order=24)
dp = DeterministicProcess(
    index = train.index,
    constant = False,
    order=2,
    seasonal=True,
    #fourier=12,
    additional_terms=[fourier],
    drop=True,
)

In [305]:
x = dp.in_sample()
X_2d = x.values
X_nested = from_2d_array_to_nested(X_2d)
minirocket = MiniRocket()
minirocket.fit(X_nested)
X_nested_transform = minirocket.transform(X_nested)

In [306]:
model = RidgeCV(alphas=np.logspace(-3, 3, 10), normalize=True)


In [307]:
model.fit(X_nested_transform, y)

RidgeCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
        normalize=True)

In [308]:
from sklearn.metrics import mean_squared_log_error
def RMSLE(y_true: np.ndarray, y_pred: np.ndarray) -> np.float64:
    return np.sqrt(mean_squared_log_error(y_true, y_pred))

In [309]:
y_pred = model.predict(X_nested_transform)
y_pred[y_pred<0] = 0.0
print(RMSLE(y, y_pred))

0.4375407642898116


In [310]:
epoch = 0
freqlist = ['Q', 'M', 'D', 'A', 'B', 'W', 'H']
import warnings
warnings.filterwarnings("ignore")
best_model = [1, '', 0]

while(RMSLE(y, y_pred) > 0.025):
    randorder = np.random.randint(6, 24)
    randfreq = random.choices(freqlist)[0]
    fourier = CalendarFourier(freq=randfreq, order=randorder)
    dp = DeterministicProcess(
    index = train.index,
    constant = False,
    order=2,
    seasonal=True,
    additional_terms=[fourier],
    drop=True,
    )
    x = dp.in_sample()
    X_2d = x.values
    X_nested = from_2d_array_to_nested(X_2d)
    minirocket = MiniRocket()
    minirocket.fit(X_nested)
    X_nested_transform = minirocket.transform(X_nested)
    model = RidgeCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    model.fit(X_nested_transform, y)
    y_pred = model.predict(X_nested_transform)
    y_pred[y_pred<0] = 0.0
    loss = RMSLE(y, y_pred)
    if loss < best_model[0]:
        best_model = [loss, randfreq, randorder]
        joblib.dump(model, "./models/forecaster1.joblib")
        print("Epoch: " + str(epoch) + ", Loss: " + str(loss) + ", Order: " + str(randorder) + ", Freq: " + randfreq)
    epoch+=1
        

Epoch: 0, Loss: 0.041530572880363166, Order: 13, Freq: H
Epoch: 8, Loss: 0.040975802811998405, Order: 23, Freq: H
Epoch: 28, Loss: 0.03743084531330884, Order: 16, Freq: B
Epoch: 37, Loss: 0.032092687689842254, Order: 17, Freq: W
Epoch: 203, Loss: 0.027558222502020474, Order: 23, Freq: D


KeyboardInterrupt: 

In [None]:
print(best_model)

[0.03359373238922596, 'H', 13]
