In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import data as DATADATA
import keras.saving
from keras.models import Sequential, load_model
from keras.layers import Input, LSTM, Dense
from keras.models import Model
from tensorflow.keras.optimizers import Adam


def build_uncertainty_model():
    inputs = Input(shape=(60, 1))
    x = LSTM(128, return_sequences=True)(inputs)
    x = LSTM(64, return_sequences=False)(x)
    x = Dense(25, activation='relu')(x)
    outputs = Dense(2)(x)  # [mean, log_variance]
    return Model(inputs, outputs)

from keras.models import clone_model
from sklearn.preprocessing import MinMaxScaler
import os
import tensorflow as tf

@keras.saving.register_keras_serializable()
def gaussian_nll(y_true, y_pred):
    mean = y_pred[:, 0]
    log_var = y_pred[:, 1]
    precision = tf.exp(-log_var)
    return tf.reduce_mean(0.5 * (log_var + tf.square(y_true - mean) * precision))


os.makedirs('models-KaggleSMA', exist_ok=True)

tickers = ['SOLUSDT', 'BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'DOGEUSDT']
scalers = {}  # Save scalers per ticker for later testing

# Step 1: Build combined training dataset
combined_x_train = []
combined_y_train = []

for ticker in tickers:
    df = DATADATA.load_asset(ticker, sampling='1d')
    df = DATADATA.subset(df, start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2024-03-01'))
    df['Returns'] = df['Close'].pct_change()
    df.dropna(inplace=True)

    data = df.filter(['Returns'])
    dataset = data.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)
    scalers[ticker] = scaler

    for i in range(60, len(scaled_data)):
        combined_x_train.append(scaled_data[i - 60:i, 0])
        combined_y_train.append(scaled_data[i, 0])

# Convert combined dataset to numpy arrays
combined_x_train = np.array(combined_x_train)
combined_y_train = np.array(combined_y_train)
combined_x_train = np.reshape(combined_x_train, (combined_x_train.shape[0], combined_x_train.shape[1], 1))

# Step 2: Train base model
base_model = build_uncertainty_model()
base_model.compile(optimizer='adam', loss=gaussian_nll)
base_model.fit(combined_x_train, combined_y_train, batch_size=8, epochs=16)
base_model.save('models-KaggleSMA/base_model_uncertainty.keras')


Epoch 1/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 31ms/step - loss: -1.4410
Epoch 2/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 27ms/step - loss: -1.9333
Epoch 3/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9126
Epoch 4/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9250
Epoch 5/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9863
Epoch 6/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9482
Epoch 7/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9683
Epoch 8/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9230
Epoch 9/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: -1.9531
Epoch 10/16
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [2]:
from tensorflow.keras.optimizers import Adam


# Step 3: Fine-tune separate model per ticker
for ticker in tickers:
    df = DATADATA.load_asset(ticker, sampling='1d')
    df = DATADATA.subset(df, start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2024-03-01'))
    df['Returns'] = df['Close'].pct_change()
    df.dropna(inplace=True)

    data = df.filter(['Returns'])
    dataset = data.values

    scaler = scalers[ticker]
    scaled_data = scaler.transform(dataset)

    x_train = []
    y_train = []
    for i in range(60, len(scaled_data)):
        x_train.append(scaled_data[i - 60:i, 0])
        y_train.append(scaled_data[i, 0])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # Clone base model architecture and load weights
    fine_tuned_model = clone_model(base_model)
    fine_tuned_model.set_weights(base_model.get_weights())

    # Freeze base layers
    for layer in fine_tuned_model.layers:
        layer.trainable = False

    # Add a new trainable output layer
    model = build_uncertainty_model()
    model.set_weights(base_model.get_weights())
    model.compile(optimizer='adam', loss=gaussian_nll)
    for layer in model.layers[:-1]:
        layer.trainable = False

    model.compile(optimizer=Adam(1e-3), loss=gaussian_nll)
    model.fit(x_train, y_train, epochs=4)

    # Phase 2: unfreeze base layers but use lower LR
    for layer in model.layers:
        layer.trainable = True

    model.compile(optimizer=Adam(1e-4), loss=gaussian_nll)
    model.fit(x_train, y_train, epochs=4)

    model.save(f'models-KaggleSMA/lstm_uncertainty_variable_model_{ticker}.keras')

    # Predict mean and stddev
    preds = model.predict(x_train)
    pred_mean = preds[:, 0]
    pred_std = np.sqrt(np.exp(preds[:, 1]))

    pred_mean_rescaled = scaler.inverse_transform(pred_mean.reshape(-1, 1))

    valid = data[60:].copy()
    valid['PredictedMean'] = pred_mean_rescaled
    valid['PredictedStd'] = pred_std
    valid['Z-Score'] = (valid['Returns'] - valid['PredictedMean']) / valid['PredictedStd']
    display(valid.head(10))



Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -2.0558
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: -2.0185
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: -2.0961
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.0960
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -2.0346
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.1485
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.0829
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.0971
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.050258,0.00515,0.07271,-0.762048
2022-03-04,-0.076344,-0.000277,0.075982,-1.001126
2022-03-05,0.014832,-0.008329,0.079109,0.292774
2022-03-06,-0.056454,-0.011514,0.077306,-0.581322
2022-03-07,-0.034409,-0.00478,0.075528,-0.392297
2022-03-08,0.008082,-0.006465,0.076526,0.190094
2022-03-09,0.069728,-0.007825,0.07596,1.02097
2022-03-10,-0.058937,-0.003833,0.072967,-0.75519
2022-03-11,-0.026548,0.002195,0.073691,-0.390041
2022-03-12,0.008801,-0.006546,0.07726,0.198648


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: -1.8667
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: -1.8868
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.8247
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.8860
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: -1.8604
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.9337
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -1.8647
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -1.8635
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.032784,0.009432,0.087797,-0.480834
2022-03-04,-0.077857,0.000455,0.099379,-0.788012
2022-03-05,0.006368,-0.005539,0.106306,0.11201
2022-03-06,-0.024802,-0.008513,0.100901,-0.161435
2022-03-07,-0.011265,-0.002846,0.094143,-0.089429
2022-03-08,0.019549,-0.001091,0.093483,0.220787
2022-03-09,0.082908,-0.001231,0.092443,0.91017
2022-03-10,-0.060076,0.00144,0.085895,-0.716175
2022-03-11,-0.017565,0.006785,0.089651,-0.271606
2022-03-12,0.002009,-0.002934,0.100681,0.049094


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -1.7555
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -1.7453
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.7033
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.8831
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: -1.8374
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.9006
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -1.8369
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.7623
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.038445,0.009474,0.092553,-0.517755
2022-03-04,-0.074767,0.003836,0.100947,-0.778662
2022-03-05,0.016418,-0.002815,0.108526,0.177223
2022-03-06,-0.042722,-0.006943,0.104269,-0.343138
2022-03-07,-0.023584,-0.001556,0.099634,-0.221093
2022-03-08,0.034061,-0.001332,0.100302,0.352864
2022-03-09,0.058623,-0.001607,0.097343,0.618728
2022-03-10,-0.044107,0.002964,0.091371,-0.51516
2022-03-11,-0.01912,0.007002,0.094664,-0.275941
2022-03-12,0.00467,-0.000415,0.10269,0.049517


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: -2.4184
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.6264
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -2.5776
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -2.3654
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: -2.6492
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.4466
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.5815
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.6166
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.022375,0.007679,0.050429,-0.595975
2022-03-04,-0.05163,0.005192,0.05074,-1.11985
2022-03-05,0.057949,0.001517,0.051076,1.104859
2022-03-06,-0.038329,-0.000293,0.050303,-0.756125
2022-03-07,-0.007447,0.002586,0.050381,-0.199137
2022-03-08,0.001389,0.002206,0.050364,-0.016221
2022-03-09,0.062578,0.001965,0.050304,1.20494
2022-03-10,-0.038783,0.003881,0.049777,-0.857105
2022-03-11,0.090749,0.006869,0.05022,1.670259
2022-03-12,-0.021422,0.007369,0.049732,-0.578936


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: -2.1448
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.1772
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.0328
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: -2.1720
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -2.1356
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.2300
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.2028
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.1784
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.024812,0.007879,0.065486,-0.499205
2022-03-04,-0.054742,0.004519,0.066888,-0.885975
2022-03-05,0.019576,-0.001394,0.06757,0.310342
2022-03-06,-0.0344,-0.005015,0.066046,-0.444914
2022-03-07,-0.03314,-0.003221,0.065274,-0.458357
2022-03-08,0.002571,-0.003819,0.065086,0.098177
2022-03-09,0.038462,-0.00447,0.06442,0.666432
2022-03-10,-0.03786,-0.001212,0.063483,-0.577294
2022-03-11,-0.011976,0.002012,0.064301,-0.217536
2022-03-12,-0.006926,-0.000618,0.064991,-0.09706
