In [None]:
df_train_tmp = pd.read_csv(f'data/sets/train_set.csv').set_index('Matching Time')
df_test = pd.read_csv(f'data/sets/test_set.csv').set_index('Matching Time')

In [None]:
def train_validation_split(data, train_end_index):
    tmp = data.reset_index()
    train = tmp.iloc[:train_end_index].set_index(['Matching Time'],drop=True)
    validation = tmp[train_end_index:].set_index(['Matching Time'],drop=True)
    return train, validation

In [None]:
train_to_val_ratio = 0.8
train_index = int(len(df_train_tmp_reduced) * train_to_val_ratio)
df_train, df_val = train_validation_split(df_train_tmp_reduced, train_index)

In [None]:
print(len(df_train)+len(df_val))
print(len(df_train_tmp_reduced))

In [None]:
def create_x_y(df):
    X_set = []
    y_set = []
    predictor_list = []
    current_mid_price = 0
    for row in tqdm(range(len(df))):
        # Partitionate LOB states in chunks of 10
        if row % 10 == 0 and row != 0:
            X_set.append(np.array(predictor_list))
            y_set.append(current_mid_price)
            predictor_list = []
        predictor_list += list(df.iloc[row, 2:len(df.columns) - 2].values)
        current_mid_price = df.iloc[row]['TREND_MID_PRICE']
    return np.array(X_set), np.array(y_set)

In [None]:
X_train, y_train = create_x_y(df_train)
y_train = to_categorical(y_train, 3)

In [None]:
X_val, y_val = create_x_y(df_val)
y_val = to_categorical(y_val, 3)

In [None]:
X_test, y_test = create_x_y(df_test)
y_test = to_categorical(y_test, 3)

In [None]:
def keras_model(n_layers, units, learning_rate, alpha_rate):
    # Model definition separated from tuner in order to achieve modularity 
    # Build model
    model = Sequential()
    model.add(layers.Input(shape=(400,)))
    # Add layers iteratively and assign a units hyperparam selector
    for i in range(n_layers):
        model.add(layers.Dense(units=units[0][i], activation=LeakyReLU(alpha=alpha_rate)))
        model.add(layers.Dropout(0.5))
    model.add(layers.Dense(units=3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

class HyperRegressor(keras_tuner.HyperModel):
    def __init__(self, n_layers, *args, **kwargs):
        # Pass all arguments except number of layers to parent
        self.n_layers = n_layers
        super().__init__(*args, **kwargs)

    def build(self, hp):
        # Hyperparameters choices and ranges definition 
        units=[hp.Int(f'units_{i + 1}',min_value=16,max_value=256,step=16) for i in range(self.n_layers)],
        learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
        alpha_rate = hp.Float("alpha_rate", min_value=1e-4, max_value=1e-1, sampling="log")
        return keras_model(self.n_layers, units, learning_rate, alpha_rate)
    
    def fit(self, hp, model, x, y, validation_data, **kwargs):
        model.fit(x, y, **kwargs)
        x_val, y_val = validation_data
        y_pred = model.predict(x_val)
        # Return a single float to minimize.
        return -np.sum(y_val * np.log(y_pred))

In [None]:
########################
# CONSTANTS DEFINITION #
########################

MAX_TRIALS = 20
EXECUTION_PER_TRIAL = 3
EPOCHS = 10
BATCH_SIZE = 16

def tune_model(n_layers=2):
    # Early stop if loss does not improve after 3 epochs
    callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)
    tuner = RandomSearch(
        hypermodel=HyperRegressor(n_layers),
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        overwrite=True,
        directory='IA_QR',
        project_name=f'NN_new_{n_layers}'
    )
    tuner.search(
      X_train, 
      y_train,
      batch_size=BATCH_SIZE,
      epochs=EPOCHS,
      validation_data=(X_val, y_val),
      callbacks=[callback]
    )
    return tuner

In [None]:
models = []
parameters = []
tuners = []
for n in range(1,5):
    tuner = tune_model(n)
    parameters.append(tuner.get_best_hyperparameters)
    models.append(tuner.get_best_models(1)[0])
    tuners.append(tuner)

In [None]:
for i in range(len(tuners)):
    print(tuners[i].get_best_hyperparameters()[0].values)

In [None]:
with open('data/models.pkl','wb') as f:
    pickle.dump(models,f)
with open('data/tuners.pkl','wb') as f:
    pickle.dump(tuners,f)

In [None]:
def format_units(buffer_dict):
    # Convert units param to a list of units to match processing formatting
    units = []
    # Check if key is unit, if it is add to list 
    for key, value  in buffer_dict.values.items():
        if 'units' in key:
            units += [value]
    # Crate new dict with correct format 
    best_params = {}
    best_params['units'] = [units]
    best_params['learning_rate'] = buffer_dict['learning_rate']
    best_params['alpha_rate'] = buffer_dict['alpha_rate']
    return best_params

models_refitted = []
results = {}
for i in range(len(models)):
    # Build and refit model with best params
    callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)
    best_hps = format_units(tuners[i].get_best_hyperparameters()[0])
    n_layers = len(best_hps['units']) # Get num of hidden layers
    model = keras_model(n_layers, **best_hps) # Rebuild model
    model.fit(X_train, y_train, epochs=100, batch_size=16,verbose=True, callbacks=[callback])
    models_refitted.append(model)
    # Evaluate train and test 
    train_result = model.evaluate(X_train, y_train, batch_size=16)
    val_result = model.evaluate(X_val, y_val, batch_size=16)
    test_result = model.evaluate(X_test, y_test, batch_size=16)
    results[f'NN{i + 1}'] = {'train': train_result, 'val': val_result, 'test': test_result}

In [None]:
with open('data/models_refitted.pkl','wb') as f:
    pickle.dump(models_refitted,f)

In [None]:
results

In [None]:
def format_y_pred(y_pred):
    y_pred = []
    for trend in y_pred:
        index = np.argmax(trend)
        print(index)
        if index == 0:
            y_pred += [0]
        elif index == 1:
            y_pred += [-1]
        else:
            y_pred += [1]
    return y_pred

In [None]:
def format_y_pred(y_pred):
    y_pred = []
    for trend in y_pred:
        index = np.argmax(trend)
        print(index)
        if index == 0:
            y_pred += [0]
        elif index == 1:
            y_pred += [-1]
        else:
            y_pred += [1]
    return y_pred