In [1]:
import pandas as pd
import numpy as np
from app.modules import prep_data, train_model, select_features, build_dset, build_LSTM, build_BLSTM, build_AttentiveBLSTM
from app.modules import gen_test_df, gen_importance_df
from app.modules.lstm import save_model, series_to_supervised, load_model, get_lime_df, train_model, prep_data
from app.modules.models_meta import pred_models as models_dict
from app.api import GoogleTrends
import datetime as dt
import time

import lime
import lime.lime_tabular
from keras.callbacks import EarlyStopping

# Read easier than building everytime
data = pd.read_csv('hist_data_all.csv', parse_dates=['Date'], index_col='Date')
# Build the dataset of pre-selected coins
dsets = {}
for t in list(data.Ticker.unique()):
  df = data.query('Ticker==@t')
  dsets[t] = df[['High', 'Low', 'Volume', 'FVX', 'TNX', 'TYX', 'Gtrend', 'Close']]
dsets['BTC-USD']

# Apply simple feature selection
select_features(dsets)
dsets['BTC-USD']

timesteps = 60
prep_dsets = {
    coin: prep_data(df, timesteps) for coin, df in dsets.items()
}

(2849, 4)
(1698, 4)
(2849, 4)
(2849, 4)
(2849, 4)
(2849, 4)


# BTC - Bitcoin

In [2]:
cc = 'BTC-USD'
timesteps = 60
ticker = cc[:3]
scope = '1 day ahead'
X_train, y_train, X_test, y_test, xscaler, yscaler, test_dates = prep_dsets[cc]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(2424, 60, 4) (2424,) (365, 60, 4) (365,)


In [3]:
model_id = 'Deep Learning LSTM'
model = build_LSTM(
                (60, 4),
                num_rnns=2,
                dim_rnn=100, 
                dense_units=100, 
                drop=False,
                drop_rate=0.1
                )
model = load_model(model, 'BTC_LSTM_60_lags_4_fts', 'models/')
monitor = EarlyStopping(monitor='val_loss', 
                        min_delta=1e-4,
                        patience=50, 
                        verbose=0, 
                        mode='auto', 
                        restore_best_weights=True)
history = model.fit(
                X_test, y_test, 
                validation_data=(X_test, y_test),
                callbacks=[monitor],
                verbose=2,
                epochs=100,
                batch_size=1
                )

save_model(model, coin_ticker=ticker, suffix='_prod')

Epoch 1/100
365/365 - 124s - loss: 0.0023 - mse: 0.0023 - val_loss: 0.0014 - val_mse: 0.0014 - 124s/epoch - 340ms/step
Epoch 2/100
365/365 - 109s - loss: 0.0021 - mse: 0.0021 - val_loss: 0.0011 - val_mse: 0.0011 - 109s/epoch - 299ms/step
Epoch 3/100
365/365 - 111s - loss: 0.0016 - mse: 0.0016 - val_loss: 8.9511e-04 - val_mse: 8.9511e-04 - 111s/epoch - 303ms/step
Epoch 4/100
365/365 - 111s - loss: 0.0014 - mse: 0.0014 - val_loss: 0.0013 - val_mse: 0.0013 - 111s/epoch - 303ms/step
Epoch 5/100
365/365 - 110s - loss: 0.0013 - mse: 0.0013 - val_loss: 9.9531e-04 - val_mse: 9.9531e-04 - 110s/epoch - 302ms/step
Epoch 6/100
365/365 - 109s - loss: 0.0012 - mse: 0.0012 - val_loss: 8.7117e-04 - val_mse: 8.7117e-04 - 109s/epoch - 300ms/step
Epoch 7/100
365/365 - 110s - loss: 0.0010 - mse: 0.0010 - val_loss: 9.9598e-04 - val_mse: 9.9598e-04 - 110s/epoch - 300ms/step
Epoch 8/100
365/365 - 104s - loss: 0.0012 - mse: 0.0012 - val_loss: 0.0015 - val_mse: 0.0015 - 104s/epoch - 285ms/step
Epoch 9/100
365/

# Retrain and Save All Models

In [3]:
test_models_path = './app/dashboard/test_models/'
prod_models_path = './app/dashboard/prod_models/'

for coin_label, coin_meta in models_dict.items():
    for model_label, mdl_dict in coin_meta.items():
        if coin_label == 'BTC - Bitcoin' and model_label == 'Deep Learning LSTM':
            continue
        else:
            for scope_label, model_meta in mdl_dict.items():
                print('Retraining {} {}'.format(coin_label, model_label))

                # Get production model metadata
                builder_func = model_meta['builder_func']
                builder_kwargs = model_meta['builder_kwargs']
                lags = model_meta['lags']
                n_features = model_meta['n_features']
                ticker = model_meta['ticker']
                cc = ticker + '-USD'

                # Define a model that is still not trained
                rebuilt_model = builder_func(in_shape=(lags, n_features), **builder_kwargs)
                # load models weight
                model_id = model_meta['model_id'][:-5]
                model = load_model(rebuilt_model, model_id=model_id, root_path=test_models_path)
                # Prepared data
                timesteps = lags
                scope = '1 day ahead'
                X_train, y_train, X_test, y_test, xscaler, yscaler, test_dates = prep_dsets[cc]
                # Retrain the model for production
                monitor = EarlyStopping(
                            monitor='val_loss', 
                            min_delta=1e-4,
                            patience=20, 
                            verbose=0, 
                            mode='auto', 
                            restore_best_weights=True)
                history = model.fit(
                                X_test, y_test, 
                                validation_data=(X_test, y_test),
                                callbacks=[monitor],
                                verbose=2,
                                epochs=50,
                                batch_size=1
                            )

                save_model(model, coin_ticker=ticker, suffix='_prod',  path=prod_models_path)

Retraining BTC - Bitcoin Bidirectional LSTM
Epoch 1/50
365/365 - 15s - loss: 9.3919e-04 - mse: 9.3919e-04 - val_loss: 0.0012 - val_mse: 0.0012 - 15s/epoch - 40ms/step
Epoch 2/50
365/365 - 11s - loss: 8.7307e-04 - mse: 8.7307e-04 - val_loss: 6.1388e-04 - val_mse: 6.1388e-04 - 11s/epoch - 31ms/step
Epoch 3/50
365/365 - 11s - loss: 8.1913e-04 - mse: 8.1913e-04 - val_loss: 7.0730e-04 - val_mse: 7.0730e-04 - 11s/epoch - 30ms/step
Epoch 4/50
365/365 - 11s - loss: 8.9674e-04 - mse: 8.9674e-04 - val_loss: 5.8496e-04 - val_mse: 5.8496e-04 - 11s/epoch - 30ms/step
Epoch 5/50
365/365 - 11s - loss: 7.2891e-04 - mse: 7.2891e-04 - val_loss: 8.2022e-04 - val_mse: 8.2022e-04 - 11s/epoch - 30ms/step
Epoch 6/50
365/365 - 11s - loss: 7.1766e-04 - mse: 7.1766e-04 - val_loss: 5.5644e-04 - val_mse: 5.5644e-04 - 11s/epoch - 30ms/step
Epoch 7/50
365/365 - 11s - loss: 7.9198e-04 - mse: 7.9198e-04 - val_loss: 5.3001e-04 - val_mse: 5.3001e-04 - 11s/epoch - 30ms/step
Epoch 8/50
365/365 - 11s - loss: 7.9897e-04 - m

In [27]:
import sqlite3 as sql
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy.types import Integer, Date, DateTime, Float, DATE
history = data.reset_index()[['Date', 'Ticker', 'Gtrend']]

In [28]:
conn = sql.connect('database.db', detect_types=sql.PARSE_DECLTYPES)
history.to_sql(
    'google_trend_hist', 
    conn, 
    index=False, 
    if_exists='replace'
    )


In [40]:
gt_data_local = pd.read_sql('SELECT * FROM google_trend_hist WHERE (Ticker="BTC-USD") AND (Date > DATE("2022-06-24"))', conn)

In [41]:
gt_data_local['Date'].max()

Timestamp('2022-06-28 00:00:00')