In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
%load_ext autoreload
%autoreload 2
plt.style.use('ggplot')

2021-12-29 11:51:58.242829: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2


In [2]:
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
import sys 
sys.path.append("/scratch/izar/kapps/DEX-Cyclic-Arbitrage/")
from config.get import cfg

# Train test split

In [24]:
X = np.load(cfg['files']["raw_features"])

In [26]:
from sklearn.model_selection import train_test_split
# train/test split and standard scaling 
test_size = 0.3
N =  X.shape[0]
N_train = int(N * (1 - test_size))
X_train, X_test, train_id, test_id = train_test_split(X, np.arange(X.shape[0]),test_size=test_size, random_state=123)


#scaler = StandardScaler()
#scaler.fit(X_train)

#X_train_scaled = scaler.transform(X_train)
#X_test_scaled  = scaler.transform(X_test)

X_train_scaled = X_train
X_test_scaled = X_test

In [27]:
np.save(cfg['files']['raw_test_features'] , X_test)
np.save(cfg['files']['raw_train_features'] ,X_train)
np.save(cfg['files']['raw_test_ids'] , test_id)
np.save(cfg['files']['raw_train_ids'] , train_id)

PermissionError: [Errno 13] Permission denied: '/scratch/izar/kapps/DEX-Cyclic-Arbitrage/data/ML_features/raw_test_features.npy'

# Model definition

In [4]:
import talos

In [28]:
def build_model(params):
    # build encoder
    input_img = keras.Input(shape=(3,600, 2))
    x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(input_img)
    x = layers.MaxPooling2D((3, 3), padding='same')(x)
    x = layers.Conv2D(4, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    encoded = layers.Conv2D(1, (2, 2), activation='relu', padding='same')(x)
    # build decoder
    x = layers.Conv2D(4, (2, 2), activation='relu', padding='same')(encoded)
    x = layers.UpSampling2D((1, 2))(x)
    x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((3, 3))(x)
    decoded = layers.Conv2D(2, (3, 3), activation='relu', padding='same')(x)
    # combine encoder and decoder
    autoencoder = keras.Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error',)
    return autoencoder

In [29]:
autoencoder = build_model()
autoencoder.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 3, 600, 2)]       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 3, 600, 8)         152       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 1, 200, 8)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 1, 200, 4)         292       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 1, 100, 4)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 1, 100, 1)         17        
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 1, 100, 4)        

# Training

In [None]:
# fit function for talos
def fit_model(X_train, y_train, X_val, y_val, params):
    # build model
    autoencoder = build_model(params)
    # fit model
    out = autoencoder.fit(
        X_train_scaled, X_train_scaled,
        epochs=500,
        batch_size=32,
        shuffle=True,
        verbose=1,
        batch_size=params['batch_size'],
        epochs=params['epochs'],
        validation_data=[x_val, y_val],
        verbose=0,
        callbacks=[talos.utils.early_stopper(params['epochs'])],
    )
    plt.plot(out.history["loss"])
    plt.show()
    return out, autoencoder

Epoch 1/500
Epoch 2/500
Epoch 3/500

## hyperparameters tuning

In [None]:
from tensorflow.keras.activations import relu, elu
## the parameter that you want to be optimized are defined in this dictionnary
p = {'activation':['relu', 'elu'],
     'losses': ['logcosh'],
     'shapes': ['brick'],
     'first_neuron': [32],
     'dropout': [.2, .3],
     'batch_size': [32, 64],
     'epochs': [1]}

In [None]:
scan_object = talos.Scan(
    x=X_train, y=y_train, params=p, 
    model=fit_model, 
    experiment_name='ml4f-autoencoder', 
    x_val=X_val, y_val=y_val
) 

In [None]:
scan_object.evaluate_models(
    x_val=x_val,
    y_val=y_val,
    n_models=10,
    metric='f1score',
    folds=5,
    shuffle=True,
    average='binary',
    asc=False
)

In [None]:
scan_object.data

# Final evaluation

In [10]:
from sklearn.metrics import mean_squared_error
def print_mse(compute_pred = lambda x :autoencoder.predict(x)):
    ae_of_train = mean_squared_error(X_train_scaled.reshape(-1),compute_pred(X_train_scaled).reshape(-1))
    ae_of_test = mean_squared_error(X_test_scaled.reshape(-1), compute_pred(X_test_scaled).reshape(-1))
    print(f"MSE train : {ae_of_train} \nMSE test  : {ae_of_test}")
print_mse()

MSE train : 25541.59590293752 
MSE test  : 26834.885744139337


In [11]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=100)
tX_train = X_train_scaled.reshape(N_train,-1)
tX_test = X_test_scaled.reshape(N - N_train,-1)
pca.fit(tX_train)
#print_mse(lambda x: pca.inverse_transform(pca.transform(tX_train)))
pca_train = mean_squared_error(tX_train, pca.inverse_transform(pca.transform(tX_train)))
pca_test =  mean_squared_error(tX_test, pca.inverse_transform(pca.transform(tX_test)))
print(f"MSE train : {pca_train} \nMSE test  : {pca_test}")

MSE train : 1237.185732071038 
MSE test  : 1838.7659055421211


In [12]:
autoencoder.save('autoencoder_model')

2021-12-21 15:20:06.729392: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


# Load model

In [13]:
autoencoder = keras.models.load_model('autoencoder_model')
X = np.load(cfg['files']["raw_features"])

In [6]:
encoding_layer = 5

def extract_layers(main_model, starting_layer_ix, ending_layer_ix):
    """extract layers between starting_layer_ix and ending_layer_ix from a given model"""
     # create an empty model
    new_model = keras.Sequential()
    for ix in range(starting_layer_ix, ending_layer_ix + 1):
        curr_layer = main_model.get_layer(index=ix)
    # copy this layer over to the new model
        new_model.add(curr_layer)
    return new_model
# extract en encoder part of the autoencoder
encoder = extract_layers(autoencoder,0,encoding_layer)