#  Ubiquant Market Prediction with DNN

Hey, thanks for viewing my Kernel!

If you like my work, please, leave an upvote: it will be really appreciated and it will motivate me in offering more content to the Kaggle community ! 😊

## Import Packages

In [None]:
import os
import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from scipy import stats

## Import dataset

In [None]:
%%time
n_features = 300
features = [f'f_{i}' for i in range(n_features)]
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
train.head()

In [None]:
train.shape

In [None]:
investment_id = train.pop("investment_id")
investment_id.head()

In [None]:
_ = train.pop("time_id")

In [None]:
y = train.pop("target")
y.head()

## Create a IntegerLookup layer for investment_id input

In [None]:
%%time
investment_ids = list(investment_id.unique())
investment_id_size = len(investment_ids) + 1
investment_id_lookup_layer = layers.IntegerLookup(max_tokens=investment_id_size)
investment_id_lookup_layer.adapt(pd.DataFrame({"investment_ids":investment_ids}))

## Make Tensorflow dataset

In [None]:
import tensorflow as tf
def preprocess(X, y):
    return X, y
def make_dataset(feature, investment_id, y, batch_size=1024, mode="train"):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature), y))
    ds = ds.map(preprocess)
    if mode == "train":
        ds = ds.shuffle(4096)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

## Modeling

In [None]:
def get_model():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish', kernel_initializer='random_normal')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish', kernel_initializer='random_normal')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish', kernel_initializer='random_normal')(investment_id_x)
    
    feature_x = layers.Dense(256, activation='swish', kernel_initializer='random_normal')(features_inputs)
    feature_x = layers.Dense(256, activation='swish', kernel_initializer='random_normal')(feature_x)
    feature_x = layers.Dense(256, activation='swish', kernel_initializer='random_normal')(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2", kernel_initializer='random_normal')(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2", kernel_initializer='random_normal')(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2", kernel_initializer='random_normal')(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model

In [None]:
model = get_model()
model.summary()
keras.utils.plot_model(model, show_shapes=True)


In [None]:
def get_best_lr(model, train_ds, epochs=100, min_lr=1e-3, verbose=0, plot=True, max_th=5):
    model_history = model.fit(train_ds, epochs=epochs, verbose=verbose, callbacks=[
        tf.keras.callbacks.LearningRateScheduler(lambda epoch: min_lr * 10 ** (epoch / epochs * 2.5))])
    
    fig, ax = plt.subplots(figsize=(16, 8))
    if plot:
        loss = model_history.history['loss']
        new_loss = np.where(np.array(loss) > max_th, max_th, loss)
        
        ax.plot(
            np.arange(1, epochs+1), 
            new_loss, 
            label='Loss', lw=3
                )
        ax2 = ax.twinx()
        ax2.plot(
            np.arange(1, epochs+1), 
            model_history.history['lr'], 
            label='Learning rate', color='orange', lw=3
                )
        
        min_lost_index = np.argmin(model_history.history['loss'])
        best_loss = np.min(model_history.history['loss'])
        best_lr = model_history.history['lr'][min_lost_index]

        ax.set_title('Evaluation metrics', size=20)
        ax.set_xlabel('Epoch', size=14)
        ax.set_ylabel('Loss', size=14)
        ax2.set_ylabel('Learnig Rate', size=14)
        ax.axhline(y=max_th, color ='black', linestyle='--')
        ax.axvline(x = min_lost_index, ymin = 0, ymax = 1, color ='red', linestyle='--')
        ax.text(min_lost_index + 1, 1, 'lr  :' + str(best_lr), color='r', fontsize=14)
        ax.text(min_lost_index + 1, 2, 'loss:' + str(best_loss), color='r', fontsize=14)
        ax2.legend();
        ax.legend();
    
    return best_lr

In [None]:
#train_ds = make_dataset(train.iloc[:10000, :], investment_id[:10000], y[:10000])
train_ds = make_dataset(train, investment_id, y)
model = get_model()
best_lr = get_best_lr(model, train_ds, epochs=20, min_lr=1e-4, verbose=0, plot=True)

In [None]:
best_lr