In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import data as DATADATA
import keras.saving
from keras.models import Sequential, load_model
from keras.layers import Input, LSTM, Dense
from keras.models import Model
from tensorflow.keras.optimizers import Adam


def build_uncertainty_model():
    inputs = Input(shape=(60, 1))
    x = LSTM(128, return_sequences=True)(inputs)
    x = LSTM(64, return_sequences=False)(x)
    x = Dense(25, activation='relu')(x)
    outputs = Dense(2)(x)  # [mean, log_variance]
    return Model(inputs, outputs)

from keras.models import clone_model
from sklearn.preprocessing import MinMaxScaler
import os
import tensorflow as tf

@keras.saving.register_keras_serializable()
def gaussian_nll(y_true, y_pred):
    mean = y_pred[:, 0]
    log_var = y_pred[:, 1]
    precision = tf.exp(-log_var)
    return tf.reduce_mean(0.5 * (log_var + tf.square(y_true - mean) * precision))


os.makedirs('models-KaggleSMA', exist_ok=True)

tickers = ['SOLUSDT', 'BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'DOGEUSDT',
           'ADAUSDT', 'ALGOUSDT', 'ATOMUSDT', 'AVAXUSDT', 'BCHUSDT',
           'DOTUSDT', 'EOSUSDT', 'LINKUSDT', 'LTCUSDT', 'MATICUSDT',
           'NEOUSDT', 'PEPEUSDT', 'UNIUSDT', 'XLMUSDT', 'TUSDT']
scalers = {}  # Save scalers per ticker for later testing

# Step 1: Build combined training dataset
combined_x_train = []
combined_y_train = []

for ticker in tickers:
    df = DATADATA.load_asset(ticker, sampling='1d')
    df = DATADATA.subset(df, start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2024-03-01'))
    df['Returns'] = df['Close'].pct_change()
    df.dropna(inplace=True)

    data = df.filter(['Returns'])
    dataset = data.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)
    scalers[ticker] = scaler

    for i in range(60, len(scaled_data)):
        combined_x_train.append(scaled_data[i - 60:i, 0])
        combined_y_train.append(scaled_data[i, 0])

# Convert combined dataset to numpy arrays
combined_x_train = np.array(combined_x_train)
combined_y_train = np.array(combined_y_train)
combined_x_train = np.reshape(combined_x_train, (combined_x_train.shape[0], combined_x_train.shape[1], 1))

# Step 2: Train base model
base_model = build_uncertainty_model()
base_model.compile(optimizer='adam', loss=gaussian_nll)
base_model.fit(combined_x_train, combined_y_train, batch_size=8, epochs=16)
base_model.save('models-KaggleSMA/base_model_uncertainty.keras')


https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2025-03.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2025-02.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2025-01.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-12.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-11.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-10.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-09.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-08.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-07.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-06.zip
https://data.binance.vision/data/spot/monthly/klines/ADAUSDT/1d/ADAUSDT-1d-2024-05.zip
https://data.binance.vision/data/spot/month

In [2]:
from tensorflow.keras.optimizers import Adam

tickers = ['SOLUSDT', 'BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'DOGEUSDT']

# Step 3: Fine-tune separate model per ticker
for ticker in tickers:
    df = DATADATA.load_asset(ticker, sampling='1d')
    df = DATADATA.subset(df, start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2024-03-01'))
    df['Returns'] = df['Close'].pct_change()
    df.dropna(inplace=True)

    data = df.filter(['Returns'])
    dataset = data.values

    scaler = scalers[ticker]
    scaled_data = scaler.transform(dataset)

    x_train = []
    y_train = []
    for i in range(60, len(scaled_data)):
        x_train.append(scaled_data[i - 60:i, 0])
        y_train.append(scaled_data[i, 0])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # Clone base model architecture and load weights
    fine_tuned_model = clone_model(base_model)
    fine_tuned_model.set_weights(base_model.get_weights())

    # Freeze base layers
    for layer in fine_tuned_model.layers:
        layer.trainable = False

    # Add a new trainable output layer
    model = build_uncertainty_model()
    model.set_weights(base_model.get_weights())
    model.compile(optimizer='adam', loss=gaussian_nll)
    for layer in model.layers[:-1]:
        layer.trainable = False

    model.compile(optimizer=Adam(1e-3), loss=gaussian_nll)
    model.fit(x_train, y_train, epochs=4)

    # Phase 2: unfreeze base layers but use lower LR
    for layer in model.layers:
        layer.trainable = True

    model.compile(optimizer=Adam(1e-4), loss=gaussian_nll)
    model.fit(x_train, y_train, epochs=4)

    model.save(f'models-KaggleSMA/lstm_uncertainty_variable_model_{ticker}.keras')

    # Predict mean and stddev
    preds = model.predict(x_train)
    pred_mean = preds[:, 0]
    pred_std = np.sqrt(np.exp(preds[:, 1]))

    pred_mean_rescaled = scaler.inverse_transform(pred_mean.reshape(-1, 1))

    valid = data[60:].copy()
    valid['PredictedMean'] = pred_mean_rescaled
    valid['PredictedStd'] = pred_std
    valid['Z-Score'] = (valid['Returns'] - valid['PredictedMean']) / valid['PredictedStd']
    display(valid.head(10))



Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -2.1295
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.1408
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.1723
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -2.1512
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -2.1353
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.1982
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.1924
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.1167
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.050258,0.000561,0.069353,-0.732763
2022-03-04,-0.076344,-0.000405,0.064698,-1.173745
2022-03-05,0.014832,-0.013695,0.077576,0.367738
2022-03-06,-0.056454,-0.009242,0.07841,-0.602125
2022-03-07,-0.034409,-0.002244,0.073414,-0.438134
2022-03-08,0.008082,-0.004125,0.070233,0.173809
2022-03-09,0.069728,-0.005261,0.067642,1.108619
2022-03-10,-0.058937,-0.002684,0.068526,-0.82089
2022-03-11,-0.026548,-0.000107,0.069156,-0.382325
2022-03-12,0.008801,-0.006188,0.064996,0.230615


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -1.9006
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -1.9486
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.9556
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.8716
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -1.9340
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.8921
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: -1.9209
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.9624
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.032784,0.002949,0.101909,-0.350636
2022-03-04,-0.077857,0.002131,0.094528,-0.846176
2022-03-05,0.006368,0.011425,0.206708,-0.024466
2022-03-06,-0.024802,0.007741,0.193753,-0.16796
2022-03-07,-0.011265,-0.001856,0.158957,-0.059193
2022-03-08,0.019549,-0.002663,0.13685,0.162312
2022-03-09,0.082908,-0.002625,0.114714,0.745626
2022-03-10,-0.060076,-0.003252,0.114654,-0.495616
2022-03-11,-0.017565,0.003601,0.128381,-0.164862
2022-03-12,0.002009,-0.00852,0.133589,0.078814


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: -1.9316
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.9427
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -1.8928
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -1.9053
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: -1.9168
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.9153
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -1.9182
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -1.9373
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.038445,-0.000931,0.097222,-0.385861
2022-03-04,-0.074767,0.00221,0.083012,-0.927306
2022-03-05,0.016418,-0.005815,0.102303,0.217328
2022-03-06,-0.042722,-0.011018,0.118907,-0.266626
2022-03-07,-0.023584,-0.003129,0.107078,-0.191032
2022-03-08,0.034061,-0.005094,0.105938,0.369605
2022-03-09,0.058623,-0.003927,0.095123,0.657566
2022-03-10,-0.044107,-0.003524,0.09967,-0.407184
2022-03-11,-0.01912,0.001917,0.10912,-0.192786
2022-03-12,0.00467,-0.00787,0.105802,0.118519


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -2.3967
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -2.6703
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: -2.4250
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.6861
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -2.3148
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.5534
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.3549
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: -2.5018
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.022375,0.0057,0.048843,-0.574809
2022-03-04,-0.05163,-0.000747,0.046288,-1.099269
2022-03-05,0.057949,-0.003668,0.045145,1.364881
2022-03-06,-0.038329,-0.011443,0.042668,-0.630102
2022-03-07,-0.007447,-0.003152,0.042902,-0.10011
2022-03-08,0.001389,-0.008251,0.04564,0.211227
2022-03-09,0.062578,-0.008808,0.046689,1.52897
2022-03-10,-0.038783,-0.007795,0.047457,-0.652973
2022-03-11,0.090749,0.00421,0.048597,1.78075
2022-03-12,-0.021422,-0.00684,0.048513,-0.30058


Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: -2.1296
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.2104
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.3063
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: -2.1748
Epoch 1/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: -2.2320
Epoch 2/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.2818
Epoch 3/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.2943
Epoch 4/4
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: -2.2516
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Unnamed: 0_level_0,Returns,PredictedMean,PredictedStd,Z-Score
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-03,-0.024812,0.002812,0.062759,-0.440167
2022-03-04,-0.054742,-0.001966,0.060356,-0.874415
2022-03-05,0.019576,-0.006881,0.061998,0.426741
2022-03-06,-0.0344,-0.010077,0.060932,-0.39919
2022-03-07,-0.03314,-0.003955,0.058388,-0.499844
2022-03-08,0.002571,-0.006161,0.058628,0.148935
2022-03-09,0.038462,-0.008187,0.057776,0.807402
2022-03-10,-0.03786,-0.00532,0.060151,-0.540967
2022-03-11,-0.011976,0.001033,0.064316,-0.202261
2022-03-12,-0.006926,-0.006092,0.062631,-0.013317
