In [1]:
import pandas as pd
import numpy as np
from app.modules import prep_data, train_model, select_features, build_dset, build_LSTM, build_BLSTM, build_AttentiveBLSTM
from app.modules import gen_test_df, gen_importance_df
from app.modules.lstm import save_model, load_model, get_lime_df, prep_data
from app.modules.models_meta import pred_models as models_dict
from app.api import GoogleTrends
import datetime as dt
import time

import lime
import lime.lime_tabular
from keras.callbacks import EarlyStopping

# Read easier than building everytime
data = pd.read_csv('hist_data_all.csv', parse_dates=['Date'], index_col='Date')
# Build the dataset of pre-selected coins
dsets = {}
for t in list(data.Ticker.unique()):
  df = data.query('Ticker==@t')
  dsets[t] = df[['High', 'Low', 'Volume', 'FVX', 'TNX', 'TYX', 'Gtrend', 'Close']]
dsets['BTC-USD']

# Apply simple feature selection
select_features(dsets)
# Include BTC price as feature for other coins
btc_price = dsets['BTC-USD'].rename(columns={'Close': 'BTC'})['BTC']
for coin, df in dsets.items():
    if coin != 'BTC-USD':
        orig_feats = list(df.iloc[:,:-1].columns)
        new_df = pd.merge(df, btc_price, how='left', left_index=True, right_index=True)
        new_df = new_df[orig_feats + ['BTC', 'Close']]
        dsets[coin] = new_df

for coin, df in dsets.items():
    print(coin + '\n')
    print(df.info())

timesteps = 60
prep_dsets = {
    coin: prep_data(df, timesteps) for coin, df in dsets.items()
}

BTC-USD

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2849 entries, 2014-09-17 to 2022-06-28
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Volume  2849 non-null   int64  
 1   Gtrend  2849 non-null   float64
 2   TYX     2849 non-null   float64
 3   Close   2849 non-null   float64
dtypes: float64(3), int64(1)
memory usage: 175.8 KB
None
ETH-USD

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1702 entries, 2017-11-09 to 2022-06-28
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Volume  1702 non-null   int64  
 1   Gtrend  1702 non-null   float64
 2   FVX     1702 non-null   float64
 3   BTC     1702 non-null   float64
 4   Close   1702 non-null   float64
dtypes: float64(4), int64(1)
memory usage: 79.8 KB
None
LTC-USD

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2863 entries, 2014-09-17 to 2022-06-28
Data columns (total 5 columns):
 #   Co

# BTC - Bitcoin

In [None]:
cc = 'BTC-USD'
timesteps = 60
ticker = cc[:3]
scope = '1 day ahead'
X_train, y_train, X_test, y_test, xscaler, yscaler, test_dates = prep_dsets[cc]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
model_id = 'Deep Learning LSTM'
model = build_LSTM(
                (60, 4),
                num_rnns=2,
                dim_rnn=100, 
                dense_units=100, 
                drop=False,
                drop_rate=0.1
                )
model = load_model(model, 'BTC_LSTM_60_lags_4_fts', 'models/')
monitor = EarlyStopping(monitor='val_loss', 
                        min_delta=1e-4,
                        patience=20, 
                        verbose=0, 
                        mode='auto', 
                        restore_best_weights=True)
history = model.fit(
                X_test, y_test, 
                validation_data=(X_test, y_test),
                callbacks=[monitor],
                verbose=2,
                epochs=50,
                batch_size=1
                )

save_model(model, coin_ticker=ticker, suffix='_prod')

# Retrain and Save All Models

In [2]:
test_models_path = './app/dashboard/test_models/'
prod_models_path = './app/dashboard/prod_models/'

for coin_label, coin_meta in models_dict.items():
    for model_label, mdl_dict in coin_meta.items():

        for scope_label, model_meta in mdl_dict.items():
            print('Retraining {} {}'.format(coin_label, model_label))

            # Get production model metadata
            builder_func = model_meta['builder_func']
            builder_kwargs = model_meta['builder_kwargs']
            lags = model_meta['lags']
            n_features = model_meta['n_features']
            ticker = model_meta['ticker']
            cc = ticker + '-USD'

            # Define a model that is still not trained
            rebuilt_model = builder_func(in_shape=(lags, n_features), **builder_kwargs)
            # load model weights
            model_id = model_meta['model_id'][:-5]
            model = load_model(rebuilt_model, model_id=model_id, root_path=test_models_path)

            # Prepared data
            timesteps = lags
            scope = '1 day ahead'
            X_train, y_train, X_test, y_test, xscaler, yscaler, test_dates = prep_dsets[cc]

            if coin_label == 'BTC - Bitcoin':

                test_df = gen_test_df(
                    model, 
                    X_test, 
                    y_test, 
                    yscaler, 
                    coin_name=coin_label, 
                    test_dates=test_dates, 
                    lags=timesteps, 
                    model_id=model_label, 
                    pred_scope=scope
                    )
                idf = gen_importance_df(model, dsets[cc], timesteps, ticker, model_label, scope)
                preds_df = pd.read_csv('./models/predictions.csv', parse_dates=['Date'], index_col='Date')
                preds_df = pd.concat([preds_df, test_df])
                preds_df.to_csv('./models/predictions.csv', index=True)

                imp_df = pd.read_csv('./models/ft_importance.csv')
                imp_df = pd.concat([imp_df, idf])
                imp_df.to_csv('./models/ft_importance.csv', index=False)
                
                # limedf = get_lime_df(model, model_label, X_train, X_test, dsets, test_dates, coin_label, scope, yscaler)
                # lime_all = pd.read_csv('./models/lime.csv')
                # lime_all = pd.concat([lime_all, limedf])
                # lime_all.to_csv('./models/lime.csv', index=False)

            # Retrain the model for production
            monitor = EarlyStopping(
                        monitor='val_loss', 
                        min_delta=1e-4,
                        patience=20, 
                        verbose=0, 
                        mode='auto', 
                        restore_best_weights=True)
            history = model.fit(
                            X_test, y_test, 
                            validation_data=(X_test, y_test),
                            callbacks=[monitor],
                            verbose=2,
                            epochs=50,
                            batch_size=1
                        )

            save_model(model, coin_ticker=ticker, suffix='_prod',  path=prod_models_path)

Retraining NMC - Namecoin Deep Learning LSTM
Epoch 1/50
365/365 - 12s - loss: 0.0011 - mse: 0.0011 - val_loss: 9.4505e-04 - val_mse: 9.4505e-04 - 12s/epoch - 33ms/step
Epoch 2/50
365/365 - 10s - loss: 8.6781e-04 - mse: 8.6781e-04 - val_loss: 6.1447e-04 - val_mse: 6.1447e-04 - 10s/epoch - 28ms/step
Epoch 3/50
365/365 - 10s - loss: 8.2952e-04 - mse: 8.2952e-04 - val_loss: 5.3482e-04 - val_mse: 5.3482e-04 - 10s/epoch - 27ms/step
Epoch 4/50
365/365 - 10s - loss: 7.8110e-04 - mse: 7.8110e-04 - val_loss: 6.9679e-04 - val_mse: 6.9679e-04 - 10s/epoch - 27ms/step
Epoch 5/50
365/365 - 10s - loss: 8.1171e-04 - mse: 8.1171e-04 - val_loss: 5.4734e-04 - val_mse: 5.4734e-04 - 10s/epoch - 27ms/step
Epoch 6/50
365/365 - 10s - loss: 6.6740e-04 - mse: 6.6740e-04 - val_loss: 5.1291e-04 - val_mse: 5.1291e-04 - 10s/epoch - 27ms/step
Epoch 7/50
365/365 - 10s - loss: 7.8765e-04 - mse: 7.8765e-04 - val_loss: 5.9200e-04 - val_mse: 5.9200e-04 - 10s/epoch - 27ms/step
Epoch 8/50
365/365 - 10s - loss: 7.0820e-04 - 

In [2]:
import sqlite3 as sql
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy.types import Integer, Date, DateTime, Float, DATE

In [3]:
conn = sql.connect('database.db', detect_types=sql.PARSE_DECLTYPES)
preddf = pd.read_csv('./models/predictions.csv', parse_dates=['Date'], index_col='Date').drop_duplicates().reset_index()
imp = pd.read_csv('./models/ft_importance.csv').drop_duplicates()
imp

Unnamed: 0,Feature,Importance,Metric,Coin,Model,Scope
0,Gtrend t-1,-0.000718,mae,ETH,Deep Learning LSTM,1 day ahead
1,Gtrend t-2,-0.000207,mae,ETH,Deep Learning LSTM,1 day ahead
2,Gtrend t-3,-0.000182,mae,ETH,Deep Learning LSTM,1 day ahead
3,Gtrend t-4,-0.000174,mae,ETH,Deep Learning LSTM,1 day ahead
4,FVX t-4,-0.000145,mae,ETH,Deep Learning LSTM,1 day ahead
...,...,...,...,...,...,...
4555,TYX t-2,0.000041,mse,BTC,Deep Learning LSTM,1 day ahead
4556,Close t-4,0.000134,mse,BTC,Deep Learning LSTM,1 day ahead
4557,Close t-3,0.000815,mse,BTC,Deep Learning LSTM,1 day ahead
4558,Close t-2,0.004886,mse,BTC,Deep Learning LSTM,1 day ahead


In [None]:
gt_data_local = pd.read_sql('SELECT * FROM google_trend_hist WHERE (Ticker="BTC-USD") AND (Date > DATE("2022-06-24"))', conn)

In [12]:
pd.read_sql('SELECT * FROM feature_importance_ep', conn)

Unnamed: 0,Feature,Importance,Metric,Coin,Model,Scope
0,Gtrend t-1,-0.000718,mae,ETH,Deep Learning LSTM,1 day ahead
1,Gtrend t-2,-0.000207,mae,ETH,Deep Learning LSTM,1 day ahead
2,Gtrend t-3,-0.000182,mae,ETH,Deep Learning LSTM,1 day ahead
3,Gtrend t-4,-0.000174,mae,ETH,Deep Learning LSTM,1 day ahead
4,FVX t-4,-0.000145,mae,ETH,Deep Learning LSTM,1 day ahead
...,...,...,...,...,...,...
4555,TYX t-2,0.000041,mse,BTC,Deep Learning LSTM,1 day ahead
4556,Close t-4,0.000134,mse,BTC,Deep Learning LSTM,1 day ahead
4557,Close t-3,0.000815,mse,BTC,Deep Learning LSTM,1 day ahead
4558,Close t-2,0.004886,mse,BTC,Deep Learning LSTM,1 day ahead


In [21]:
limedf = pd.read_csv('./models/lime.csv', parse_dates=['Date_dt'])
limedf = limedf.rename(columns={col: col.replace(' ', '_') for col in limedf.columns.to_list()})

In [22]:
limedf

Unnamed: 0,Feature,LIME_Weight,Predicted_Close_t+1,Date_dt,Model,Coin,Scope,Date
0,Close_t-1,266.882340,266.882340,2022-03-01,Deep Learning LSTM,ETH - Ethereum,1 day ahead,2022-03-01
1,Close_t-2,255.579509,255.579509,2022-03-01,Deep Learning LSTM,ETH - Ethereum,1 day ahead,2022-03-01
2,Close_t-0,237.614962,237.614962,2022-03-01,Deep Learning LSTM,ETH - Ethereum,1 day ahead,2022-03-01
3,Close_t-3,229.199775,229.199775,2022-03-01,Deep Learning LSTM,ETH - Ethereum,1 day ahead,2022-03-01
4,Close_t-4,200.510933,200.510933,2022-03-01,Deep Learning LSTM,ETH - Ethereum,1 day ahead,2022-03-01
...,...,...,...,...,...,...,...,...
24295,Close_t-5,360.500732,360.500732,2022-06-28,Deep Learning LSTM,BTC - Bitcoin,1 day ahead,2022-06-28
24296,Gtrend_t-0,267.166592,267.166592,2022-06-28,Deep Learning LSTM,BTC - Bitcoin,1 day ahead,2022-06-28
24297,Volume_t-49,253.033055,253.033055,2022-06-28,Deep Learning LSTM,BTC - Bitcoin,1 day ahead,2022-06-28
24298,Close_t-57,248.060930,248.060930,2022-06-28,Deep Learning LSTM,BTC - Bitcoin,1 day ahead,2022-06-28


In [4]:
gtrend = pd.read_csv('hist_data_all.csv', parse_dates=['Date'])
gtrend = gtrend[['Date', 'Gtrend', 'Ticker']]