# Artificial Neural Network Methods

## Imports 

In [None]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
import sys; sys.path.insert(0, '..')

from src import preprocess as dp
from src import models

In [None]:
DEBUG = True
N_ASSETS = 1
EPOCHS = 200
WINDOW_SIZE = 15
BATCH_SIZE = 1024
PCT_VALIDATION = 10 # last 10% of the data are used as validation set

## Data Loading

Set the correct path for the data folder

In [None]:
#data_path = '/kaggle/input/'
data_path = '../data/'

Import asset details

In [None]:
asset_details = pd.read_csv(data_path + 'g-research-crypto-forecasting/asset_details.csv')

In [None]:
asset_details.sort_values(by='Asset_ID')

Import training data

In [None]:
train = pd.read_csv(data_path + "g-research-crypto-forecasting/train.csv")
train

In [None]:
train = train[train.Asset_ID == 1].copy()

In [None]:
# Convert timestamp
train['timestamp'] = pd.to_datetime(train['timestamp'], unit='s')
train

In [None]:
# Smaller dataset for debugging
if DEBUG:
    train = train[train.timestamp.dt.year == 2021].copy()

## Data Pre-Processing

###  Feature Engineering

In [None]:
# ctrain_processed = dp.process_all_assets(train)
train_robust = dp.process_all_assets(train, scaler=RobustScaler())

In [None]:
#ctrain_processed_minmax_sc = dp.process_all_assets(train, scaler=MinMaxScaler())
train_minmax = dp.process_all_assets(train, scaler=MinMaxScaler())

In [None]:
train_robust.head()

In [None]:
train_minmax.head()

In [None]:
train_robust[train_robust.is_real==0].shape

In [None]:
feature_cols = train_robust.columns.drop(['Asset_ID', 'Target', 'timestamp', 'is_real'])

In [None]:
feature_cols

In [None]:
len(feature_cols)

### Time Seties Reconstruction

In [1]:
def train_val_batches(df):
    # Reshaping target
    targets = df['Target'].to_numpy().reshape(-1, N_ASSETS)

    # Reshaping trainign data
    train_data = df[feature_cols].values
    train_data = train_data.reshape(-1, N_ASSETS, train_data.shape[-1])

    # Train / Validation splitting

    X_train, X_test = train_data[:-len(train_data)//PCT_VALIDATION], train_data[-len(train_data)//PCT_VALIDATION:]
    y_train, y_test = targets[:-len(train_data)//PCT_VALIDATION], targets[-len(train_data)//PCT_VALIDATION:]

    # Batch generators
    train_generator = models.sample_generator(X_train, y_train, length=WINDOW_SIZE, batch_size=BATCH_SIZE)
    val_generator = models.sample_generator(X_test, y_test, length=WINDOW_SIZE, batch_size=BATCH_SIZE)

    return train_generator, val_generator

In [None]:
train_generator, val_generator = train_val_batches(train_robust)
print(f'Sample shape: {train_generator[0][0].shape}')
print(f'Target shape: {train_generator[0][1].shape}')

In [None]:
train_generator_mm, val_generator_mm = train_val_batches(train_minmax)
print(f'Sample shape: {train_generator_mm[0][0].shape}')
print(f'Target shape: {train_generator_mm[0][1].shape}')

In [None]:
tf.random.set_seed(0)
estop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=7, verbose=0, mode='min', restore_best_weights=True)
scheduler = keras.optimizers.schedules.ExponentialDecay(1e-3, (0.5 * len(X_train) / BATCH_SIZE), 1e-3)
lr = keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

## LSTM

In [None]:
performances_df = pd.DataFrame()

### Model 1

Robust Scaler

In [None]:
model_1 = models.get_model_LSTM(train_generator, N_ASSETS)
model_1.summary()

In [None]:
epochs = EPOCHS
history_1 = model_1.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_1 = models.plot_training_history(history_1)
fig_1.show()

In [None]:
predictions_1 = model_1.predict(val_generator)

In [None]:
model_1_performance = models.prediction_details(predictions=predictions_1,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_1.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_1_performance, ignore_index=True)
performances_df

MinMax Scaler

In [None]:
model_1_mm = models.get_model_LSTM(train_generator_mm, N_ASSETS)
model_1_mm.summary()

In [None]:
epochs = EPOCHS
history_1_mm = model_1_mm.fit(train_generator_mm, validation_data=(val_generator_mm), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_1_mm = models.plot_training_history(history_1_mm)
fig_1_mm.show()

In [None]:
predictions_1_mm = model_1_mm.predict(val_generator_mm)

In [None]:
model_1_mm_performance = models.prediction_details(predictions=predictions_1_mm,
                                                   y_test=y_test,
                                                   window_size=WINDOW_SIZE,
                                                   asset_details=asset_details,
                                                   model_name=model_1_mm.name + '_mm',
                                                   assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_1_mm_performance, ignore_index=True)
performances_df

### Model 2

Multivariate 2-Layered Bidirectional LSTM

In [None]:
model_2 = models.get_model_Bidirectional_2_layer_LSTM(train_generator, N_ASSETS)
model_2.summary()

In [None]:
epochs = EPOCHS
history_2 = model_2.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_2 = models.plot_training_history(history_2)
fig_2.show()

In [None]:
predictions_2 = model_2.predict(val_generator)

In [None]:
model_2_performance = models.prediction_details(predictions=predictions_2,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_2.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_2_performance, ignore_index=True)
performances_df

### Model 3

2 LSTM

In [None]:
model_3 = models.get_model_Double_LSTM(train_generator, N_ASSETS)
model_3.summary()

In [None]:
epochs = EPOCHS
history_3 = model_3.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_3 = models.plot_training_history(history_3)
fig_3.show()

In [None]:
predictions_3 = model_3.predict(val_generator)

In [None]:
model_3_performance = models.prediction_details(predictions=predictions_3,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_3.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_3_performance, ignore_index=True)
performances_df

### Model 4

In [None]:
model_4 = models.get_model_LSTM_dropout(train_generator, N_ASSETS)
model_4.summary()

In [None]:
epochs = EPOCHS
history_4 = model_4.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_4 = models.plot_training_history(history_4)
fig_4.show()

In [None]:
predictions_4 = model_4.predict(val_generator)

In [None]:
model_4_performance = models.prediction_details(predictions=predictions_4,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_4.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_4_performance, ignore_index=True)
performances_df

### Model 5

In [None]:
model_5 = models.get_model_Conv1D_Double_LSTM(train_generator, N_ASSETS)
model_5.summary()

In [None]:
epochs = EPOCHS
history_5 = model_5.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_5 = models.plot_training_history(history_5)
fig_5.show()

In [None]:
predictions_5 = model_5.predict(val_generator)

In [None]:
model_5_performance = models.prediction_details(predictions=predictions_5,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_5.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_5_performance, ignore_index=True)
performances_df

### Model 6

In [None]:
model_6 = models.get_model_Triple_LSTM(train_generator, N_ASSETS)
model_6.summary()

In [None]:
epochs = EPOCHS
history_6 = model_6.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_6 = models.plot_training_history(history_6)
fig_6.show()

In [None]:
predictions_6 = model_6.predict(val_generator)

In [None]:
model_6_performance = models.prediction_details(predictions=predictions_6,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_6.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_6_performance, ignore_index=True)
performances_df

### TCN

Can't run it locally :(

In [None]:
model_7 = models.get_model_TCN(train_generator, N_ASSETS)
model_7.summary()

In [None]:
epochs = EPOCHS
history_7 = model_7.fit(train_generator, validation_data=(val_generator), epochs=epochs, callbacks=[lr, estop])

In [None]:
fig_7 = models.plot_training_history(history_7)
fig_7.show()

In [None]:
predictions_7 = model_7.predict(val_generator)

In [None]:
model_6_performance = models.prediction_details(predictions=predictions_7,
                                                y_test=y_test,
                                                window_size=WINDOW_SIZE,
                                                asset_details=asset_details,
                                                model_name=model_7.name,
                                                assets=train.Asset_ID.unique())

In [None]:
performances_df = performances_df.append(model_7_performance, ignore_index=True)
performances_df

### Change features

In [None]:
ctrain_mini.columns



In [None]:
ctrain_processed_more_feat = dp.process_all_assets(ctrain_mini,more_feat=True)

In [None]:
ctrain_processed_more_feat.head()

In [None]:
targets = ctrain_processed_more_feat['Target'].to_numpy().reshape(-1, Constants.N_ASSETS)
features = ctrain_processed_more_feat.columns.drop(['Asset_ID', 'Target', 'group_num','is_real'])
train = ctrain_processed_more_feat[features]
train_data = train.values.reshape(-1, Constants.N_ASSETS, train.shape[-1])


In [None]:
X_train, X_test = train_data[:-len(train_data)//PCT_VALIDATION], train_data[-len(train_data)//PCT_VALIDATION:]
y_train, y_test = targets[:-len(train_data)//PCT_VALIDATION], targets[-len(train_data)//PCT_VALIDATION:]

In [None]:
train_generator = models.sample_generator(X_train, y_train, length=WINDOW_SIZE, batch_size=BATCH_SIZE)
val_generator = models.sample_generator(X_test, y_test, length=WINDOW_SIZE, batch_size=BATCH_SIZE)

In [None]:
model_more_feat_1lstm = models.get_modell(X_train, y_train)

In [None]:
history_model_more_feat_1lstm = model_more_feat_1lstm.fit(train_generator, validation_data=(val_generator), epochs=20, callbacks=[lr])

models.plot_training_history(history_model_more_feat_1lstm)


In [None]:

predictions_model_more_feat_1lstm = model_more_feat_1lstm.predict(val_generator)

In [None]:
perf_df_model_more_feat_1lstm  = models.prediction_details(predictions=predictions_model_more_feat_1lstm,
                                           y_test=y_test, asset_details=info,model_name ='LSTM_1layer_more_fear',assets=range(N_ASSETS))

In [None]:
perf_df_model_more_feat_1lstm