# If you like my model, please upvote ⬆️⬆️⬆️

This notebook consists of 4 models published by other users and one [mine](https://www.kaggle.com/code/nataliasz/ump-multimodal-nn-with-time-id).<br>

Thank you [@老肥](https://www.kaggle.com/librauee), [@shigeeeru](https://www.kaggle.com/shigeeeru) for publishing your models and [@Lonnie](https://www.kaggle.com/lonnieqin) for publishing dataset pickle 😊

- [老肥](https://www.kaggle.com/librauee) models: 
    - [https://www.kaggle.com/datasets/librauee/dnnmodelnew](https://www.kaggle.com/datasets/librauee/dnnmodelnew)
    - [https://www.kaggle.com/code/librauee/dnn-base](https://www.kaggle.com/code/librauee/dnn-base)
    - [https://www.kaggle.com/code/librauee/train-dnn-v2-10fold](https://www.kaggle.com/code/librauee/train-dnn-v2-10fold)
<br>
- [shigeeeru](https://www.kaggle.com/shigeeeru) model: 
     - [https://www.kaggle.com/code/shigeeeru/prediction-including-spatial-info-with-conv1d](https://www.kaggle.com/code/shigeeeru/prediction-including-spatial-info-with-conv1d)
<br>
- [NataeSz](https://www.kaggle.com/nataliasz)
    - [https://www.kaggle.com/code/nataliasz/ump-multimodal-nn-with-time-id](https://www.kaggle.com/code/nataliasz/ump-multimodal-nn-with-time-id)
- [Lonnie](https://www.kaggle.com/lonnieqin) dataset:
     - [https://www.kaggle.com/datasets/lonnieqin/ubiquant-market-prediction-half-precision-pickle](https://www.kaggle.com/datasets/lonnieqin/ubiquant-market-prediction-half-precision-pickle)

# Table of Contents
1. [Load UMP data](#Load-UMP-data)
2. [EDA](#EDA)
3. [Preprocess](#Preprocess)
    - [Drop Short Investments](#Drop-Short-Investments)
    - [Make TensorFlow Dataset](#Make-TensorFlow-Dataset)
4. [Model](#Model)
    - [Build Models](#Build-Models)
    - [Load models weights](#Load-models-weights)
    - [Train the Model](#Train-the-Model)
5. [Predict and Submit](#Predict-and-Submit)

In [None]:
import numpy as np
import pandas as pd
import os
import gc
import tensorflow as tf
from tensorflow.keras import layers

# Load UMP data

The size of the original csv dataset is 18.55 GB.<br>
Thanks to [@Lonnie](https://www.kaggle.com/lonnieqin), we can load smaller [pickle of the dataset](https://www.kaggle.com/datasets/lonnieqin/ubiquant-market-prediction-half-precision-pickle) 📈 <br>

In [None]:
%%time
n_features = 300
features = [f'f_{i}' for i in range(n_features)]
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
train.head()

## EDA

In [None]:
train.info()

In [None]:
train['target'].hist(bins = 100, figsize = (20,6));

In [None]:
train.groupby(['investment_id'])['time_id'].count().hist(bins = 100, figsize = (16,6));

In [None]:
train.groupby(['time_id'])['investment_id'].count().hist(bins = 100, figsize = (20,6));

# Preprocess

### Drop Short Investments
Let's remove some short investments. They appear to be less representative.<br>
Statistical methods like [IQR](https://towardsdatascience.com/why-1-5-in-iqr-method-of-outlier-detection-5d07fdc82097) don't catch all of investments that may disturb results, so I have dropped 2% of the shortest ones.

In [None]:
short_investments = train.groupby(['investment_id'])['time_id'].count()
short_investments_count = len(short_investments) *0.02
short_investments = short_investments[short_investments < short_investments_count].index
short_investments = train[train['investment_id'].isin(short_investments)].index

### Make TensorFlow Dataset

In [None]:
investment_id = train.pop('investment_id')
time_id = train.pop("time_id")
y = train.pop("target")

In [None]:
def make_dataset(investment_id, feature, time_id, y=None, batch_size=1024):
    if y is not None:
        slices = ((investment_id, feature, time_id), y)
    else:
        slices = ((investment_id, feature, time_id))
        
    ds = tf.data.Dataset.from_tensor_slices(slices)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

In [None]:
train_ds = make_dataset(investment_id=investment_id, feature=train, time_id=time_id, y=y)

In [None]:
investment_ids = list(investment_id.unique())
investment_id_size = len(investment_ids) + 1
investment_id_lookup_layer = layers.IntegerLookup(max_tokens=investment_id_size)
with tf.device("cpu"):
    investment_id_lookup_layer.adapt(investment_id)

# Model

### Build Models

In [None]:
def get_model():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    output = layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape"])
    return model


def get_model2():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)    
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
   # investment_id_x = layers.Dropout(0.65)(investment_id_x)
   
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dropout(0.65)(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(x)
   # x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
  #  x = layers.Dropout(0.4)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.75)(x)
    output = layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape"])
    return model


def get_model3():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float32)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dropout(0.5)(investment_id_x)
    investment_id_x = layers.Dense(32, activation='swish')(investment_id_x)
    investment_id_x = layers.Dropout(0.5)(investment_id_x)
    #investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.5)(feature_x)
    feature_x = layers.Dense(128, activation='swish')(feature_x)
    feature_x = layers.Dropout(0.5)(feature_x)
    feature_x = layers.Dense(64, activation='swish')(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(64, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(16, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.5)(x)
    output = layers.Dense(1)(x)
    output = tf.keras.layers.BatchNormalization(axis=1)(output)
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape"])
    return model

def get_model5():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    ## feature ##
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.1)(feature_x)
    ## convolution 1 ##
    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 2 ##
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 3 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 4 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 5 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## flatten ##
    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    output = layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape"])
    return model


In [None]:
def get_model6():
    investment_id_input = tf.keras.Input(shape=(1,), dtype=tf.uint16, name='investment_id')
    inv_x = layers.Dense(64, activation='relu')(investment_id_input)
    inv_x = layers.Dropout(0.2)(inv_x)

    features_input = tf.keras.Input(shape=(300,), dtype=tf.float16, name='features')
    f_x = layers.Dense(512, activation='relu')(features_input)
    f_x = layers.Dropout(0.25)(f_x)
    f_x = layers.Dense(256, activation='relu')(f_x)
    f_x = layers.Dropout(0.2)(f_x)

    time_id_input = tf.keras.Input(shape=(1,), dtype=tf.uint16, name='time_id')
    time_x = layers.Dense(64, activation='relu')(time_id_input)
    time_x = layers.Dropout(0.2)(time_x)

    concatenated = layers.concatenate([inv_x, f_x, time_x], axis=-1)
    output = layers.Dense(1)(concatenated)

    model = tf.keras.models.Model([investment_id_input, features_input, time_id_input], output, name='model_with_time_id')
    
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mse', 'mae', 'mape'])
    return model

In [None]:
gc.collect()

In [None]:
model6 = get_model6()
model6.summary()

In [None]:
tf.keras.utils.plot_model(model6, show_shapes=True)

### Load models weights

In [None]:
model6.load_weights('../input/ump-multimodal-nn-with-time-id/ns_model_with_time_id.tf')

In [None]:
models = []

for i in range(5):
    model = get_model()
    model.load_weights(f'../input/dnn-base/model_{i}')
    models.append(model)

for i in range(10):
    model = get_model2()
    model.load_weights(f'../input/train-dnn-v2-10fold/model_{i}')
    models.append(model)
    
    
for i in range(10):
    model = get_model3()
    model.load_weights(f'../input/dnnmodelnew/model_{i}')
    models.append(model)
    
    
models2 = []
    
for i in range(5):
    model = get_model5()
    model.load_weights(f'../input/prediction-including-spatial-info-with-conv1d/model_{i}.tf')
    models2.append(model)

In [None]:
# def get_model_dr04():
#     features_inputs = tf.keras.Input((300, ), dtype=tf.float32)
    
#     feature_x = layers.Dense(256, activation='swish')(features_inputs)
#     feature_x = layers.Dropout(0.4)(feature_x)
#     feature_x = layers.Dense(128, activation='swish')(feature_x)
#     feature_x = layers.Dropout(0.4)(feature_x)
#     feature_x = layers.Dense(64, activation='swish')(feature_x)
    
#     x = layers.Concatenate(axis=1)([feature_x])
#     x = layers.Dropout(0.4)(x)
#     x = layers.Dense(64, activation='swish', kernel_regularizer="l2")(x)
#     x = layers.Dropout(0.4)(x)
#     x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
#     x = layers.Dropout(0.4)(x)
#     x = layers.Dense(16, activation='swish', kernel_regularizer="l2")(x)
#     x = layers.Dropout(0.4)(x)
#     output = layers.Dense(1)(x)
#     output = tf.keras.layers.BatchNormalization(axis=1)(output)
#     model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
#     model.compile(optimizer=tf.optimizers.Adam(0.001),  loss = correlationLoss, metrics=[correlationMetric])
#     return model

# dr=0.3

# gpus = tf.config.experimental.list_physical_devices('GPU')
# for gpu in gpus:
#     print("Name:", gpu.name, "  Type:", gpu.device_type)
    

# def correlationMetric(x, y, axis=-2):
#     """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
#     x = tf.convert_to_tensor(x)
#     y = math_ops.cast(y, x.dtype)
#     n = tf.cast(tf.shape(x)[axis], x.dtype)
#     xsum = tf.reduce_sum(x, axis=axis)
#     ysum = tf.reduce_sum(y, axis=axis)
#     xmean = xsum / n
#     ymean = ysum / n
#     xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
#     yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
#     cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
#     corr = cov / tf.sqrt(xvar * yvar)
#     return tf.constant(1.0, dtype=x.dtype) - corr


# def correlationLoss(x,y, axis=-2):
#     """Loss function that maximizes the pearson correlation coefficient between the predicted values and the labels,
#     while trying to have the same mean and variance"""
#     x = tf.convert_to_tensor(x)
#     y = math_ops.cast(y, x.dtype)
#     n = tf.cast(tf.shape(x)[axis], x.dtype)
#     xsum = tf.reduce_sum(x, axis=axis)
#     ysum = tf.reduce_sum(y, axis=axis)
#     xmean = xsum / n
#     ymean = ysum / n
#     xsqsum = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
#     ysqsum = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
#     cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
#     corr = cov / tf.sqrt(xsqsum * ysqsum)
#     return tf.convert_to_tensor( K.mean(tf.constant(1.0, dtype=x.dtype) - corr ) , dtype=tf.float32 )


# gc.collect()

    
# models3 = []

# for index in range(10):
#     model = get_model_dr04()
#     model.load_weights(f"../input/model10mse/model_{index}")
#     models3.append(model)
    

### Train the Model

In [None]:
# history = model6.fit(train_ds, epochs=40)  # callbacks=early_stop

In [None]:
# model6.save_weights(f'ns_{model6.name}.tf')

# Predict and Submit

In [None]:
def inference(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append(y_pred)
    return np.mean(y_preds, axis=0)

In [None]:
def preprocess_test(investment_id, feature):
    return (investment_id, feature), 0

def preprocess_test_s(feature):
    return (feature), 0

# def preprocess_test(*args):
#     return (args), 0

def make_test_dataset(feature, investment_id, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature)))
    ds = ds.map(preprocess_test)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

def make_test_dataset2(feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((feature)))
    ds = ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
    return ds

def make_test_dataset3(feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices((feature))
    ds = ds.map(preprocess_test_s)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()

for (test_df, sample_prediction_df) in iter_test:
    
    ds = make_test_dataset(test_df[features], test_df["investment_id"])
    p1 = inference(models, ds)
    ds2 = make_test_dataset2(test_df[features])
    p2 = inference(models2, ds2)
#     ds3 = make_test_dataset3(test_df[features])
#     p3 = inference(models3, ds3)
    
    test_time_id = test_df['row_id'].str.split('_', expand=True).get(key=0).astype(int)
    ds6 = make_dataset(investment_id=test_df['investment_id'], feature=test_df[features], time_id=test_time_id)
    p6 = model6.predict([test_df['investment_id'], test_df[features], test_time_id])[:, 0]
    
    sample_prediction_df['target'] = p1 * 0.29 + p2 * 0.59 + p6 * 0.12
    env.predict(sample_prediction_df) 