In [None]:
import os
import gc
import random
import numpy as np
import pandas as pd
from scipy import stats
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.model_selection import KFold
from tensorflow.keras import backend as K
from keras.utils.generic_utils import get_custom_objects

In [None]:
features = [f"f_{i}" for i in range(300)]
train = pd.read_pickle("../input/ubiquant-market-prediction-half-precision-pickle/train.pkl")
investment_id = train.pop("investment_id")
time_id = train.pop("time_id")
y = train.pop("target")

In [None]:
from tensorflow.python.ops import math_ops
def correlation(x, y, axis=-2):
    """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
    x = tf.convert_to_tensor(x)
    y = math_ops.cast(y, x.dtype)
    n = tf.cast(tf.shape(x)[axis], x.dtype)
    xsum = tf.reduce_sum(x, axis=axis)
    ysum = tf.reduce_sum(y, axis=axis)
    xmean = xsum / n
    ymean = ysum / n
    
    xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
    yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)

    cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
    corr = cov / tf.sqrt(xvar * yvar)
    return tf.constant(1.0, dtype=x.dtype) - corr

In [None]:
dr = 0.1
def get_model_cnn_dr01():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.1)(feature_x)

    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(dr)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(1e-4), loss='mse', metrics=['mse', "mae", "mape", rmse, correlation])
    return model

dr = 0.3
def get_model_cnn_dr03():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(dr)(feature_x)

    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(dr)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(1e-4), loss='mse', metrics=['mse', "mae", "mape", rmse, correlation])
    return model

dr = 0.5
def get_model_cnn_dr05():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(dr)(feature_x)

    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(dr)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(1e-4), loss='mse', metrics=['mse', "mae", "mape", rmse, correlation])
    return model

dr = 0.7
def get_model_cnn_dr07():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(dr)(feature_x)

    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(dr)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(dr)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(1e-4), loss='mse', metrics=['mse', "mae", "mape", rmse, correlation])
    return model

In [None]:
investment_ids = [i for i in range(1,7000)]
investment_id_size = len(investment_ids) + 1
investment_id_lookup_layer = layers.IntegerLookup(max_tokens=investment_id_size)
investment_id_lookup_layer.adapt(pd.DataFrame({"investment_id":investment_id}))

In [None]:
time_ids = [i for i in range(1,5000)]
time_id_size = len(time_ids) + 1
time_id_lookup_layer = layers.IntegerLookup(max_tokens=time_id_size)
time_id_lookup_layer.adapt(pd.DataFrame({"time_id":time_id}))

In [None]:
def hard_swish(x):
    return x * (K.relu(x + 3., max_value = 6.) / 6.)

def get_model_ae():
    investment_id_inputs = keras.Input((1, ), dtype=tf.uint16)
    time_id_inputs = keras.Input((1,), dtype=tf.uint16)
    features_inputs = keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1, embeddings_initializer=tf.keras.initializers.GlorotNormal())(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation=hard_swish)(investment_id_x)
    investment_id_x = tf.keras.layers.Dropout(0.7)(investment_id_x)
    investment_id_x = layers.Dense(64, activation=hard_swish)(investment_id_x)
    investment_id_x = tf.keras.layers.Dropout(0.7)(investment_id_x)
    investment_id_x = layers.Dense(64, activation=hard_swish)(investment_id_x)
    
    time_id_x = time_id_lookup_layer(time_id_inputs)
    time_id_x = layers.Embedding(time_id_size, 8, input_length=1, embeddings_initializer=tf.keras.initializers.GlorotNormal())(time_id_x)
    time_id_x = layers.Reshape((-1, ))(time_id_x)
    time_id_x = layers.Dense(16, activation=hard_swish)(time_id_x)
    time_id_x = tf.keras.layers.Dropout(0.7)(time_id_x)
    time_id_x = layers.Dense(16, activation=hard_swish)(time_id_x)
    time_id_x = tf.keras.layers.Dropout(0.7)(time_id_x)
    time_id_x = layers.Dense(16, activation=hard_swish)(time_id_x)
    
    feature_x = layers.Dense(256, activation=hard_swish, kernel_initializer=tf.keras.initializers.GlorotNormal())(features_inputs)
    encoder = tf.keras.layers.GaussianNoise(0.5)(feature_x)
    encoder = tf.keras.layers.Dense(256, activation=hard_swish)(feature_x)
    encoder = tf.keras.layers.experimental.SyncBatchNormalization()(encoder)
    encoder = tf.keras.layers.Activation(hard_swish)(encoder)
    decoder = tf.keras.layers.Dropout(0.7)(encoder)
    decoder = tf.keras.layers.Dense(256, name = 'decoder')(decoder)
    x_ae = tf.keras.layers.Dense(256)(decoder)
    x_ae = tf.keras.layers.experimental.SyncBatchNormalization()(x_ae)
    x_ae = tf.keras.layers.Activation(hard_swish)(x_ae)
    x_ae = tf.keras.layers.Dropout(0.7)(x_ae)
    x_ae = tf.keras.layers.Dense(256, activation=tf.keras.layers.LeakyReLU(0.3))(x_ae)    
    
    x = layers.Concatenate(axis=1)([investment_id_x, time_id_x, x_ae])
    x = layers.Dense(512, activation=hard_swish, kernel_regularizer='l2')(x)
    x = layers.Dense(128, activation=hard_swish, kernel_regularizer='l2')(x)
    x = layers.Dense(32, activation=hard_swish, kernel_regularizer='l2')(x)
    output = layers.Dense(1)(x)
    
    rmse = keras.metrics.RootMeanSquaredError(name='rmse')
    model = keras.Model(inputs=[investment_id_inputs, time_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-4), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model

In [None]:
# Final models
from tqdm import tqdm
models_ae_wt_1, models_ae_wt_2, models_ae_wt_3, models_ae_wt_4 = [], [], [], []
models_ae_upd_drgn_1, models_ae_upd_drgn_2 = [], []
models_gru_1, models_gru_2, models_gru_3, models_gru_4 = [], [], [], []
models_cnn_1, models_cnn_2, models_cnn_3, models_cnn_4 = [], [], [], []

for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-wt-01/model_{i}")
    models_ae_wt_1.append(m)
for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-wt-03/model_{i}")
    models_ae_wt_2.append(m)  
for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-wt-05/model_{i}")
    models_ae_wt_3.append(m)  
for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-wt-07/model_{i}")
    models_ae_wt_4.append(m)
for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-update-drgn-0-05/model_{i}")
    models_ae_upd_drgn_1.append(m)  
for i in tqdm(range(5)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/ae-update-drgn-05-1/model_{i}")
    models_ae_upd_drgn_2.append(m)
for i in tqdm(range(3)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/gru-01/model_{i}")
    models_gru_1.append(m)
for i in tqdm(range(3)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/gru-03/model_{i}")
    models_gru_2.append(m)  
for i in tqdm(range(3)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/gru-05/model_{i}")
    models_gru_3.append(m)  
for i in tqdm(range(3)): 
    m = keras.models.load_model(f"../input/finalmodels/final-models/gru-07/model_{i}")
    models_gru_4.append(m)
for i in tqdm(range(5)): 
    m = get_model_cnn_dr01()
    m.load_weights(f"../input/finalmodels/final-models/cnn-01/model_{i}.tf")
    models_cnn_1.append(m)
for i in tqdm(range(5)): 
    m = get_model_cnn_dr03()
    m.load_weights(f"../input/finalmodels/final-models/cnn-03/model_{i}.tf")
    models_cnn_2.append(m)
for i in tqdm(range(5)): 
    m = get_model_cnn_dr05()
    m.load_weights(f"../input/finalmodels/final-models/cnn-05/model_{i}.tf")
    models_cnn_3.append(m)
for i in tqdm(range(5)): 
    m = get_model_cnn_dr07()
    m.load_weights(f"../input/finalmodels/final-models/cnn-07/model_{i}.tf")
    models_cnn_4.append(m)

In [None]:
# Supplemental models
models_supp_0 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-100-epochs/model_0")]
models_supp_1 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-500-epochs/model_0")]
models_supp_2 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-ada03/model_0")]
models_supp_3 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-henorm-prelu/model_0")]
models_supp_4 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-sgd-expdec/model_0")]
models_supp_5 = [keras.models.load_model(f"../input/suppmodels/supp-models/supp-ae-single-swat/model_0")]
m_6 = get_model_ae()
m_6.load_weights("../input/suppmodels/supp-models/supp-ae-single-full-data/model_ae.tf")
models_supp_6 = [m_6]

In [None]:
def preprocess_test(investment_id, feature):
    return (investment_id, feature), 0
def make_test_dataset(feature, investment_id, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature)))
    ds = ds.map(preprocess_test)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds
def inference(models, ds, ds_wt=None):
    y_preds = []
    for model in models:
        try:
            y_pred = model.predict(ds)
        except ValueError:
            y_pred = model.predict(ds_wt)
        y_preds.append(y_pred)
    res = np.mean(y_preds, axis=0)
    return res

def preprocess_test_gru_cnn(feature):
    return (feature), 0
def make_test_dataset_gru_cnn(feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((feature)))
    ds = ds.map(preprocess_test_gru_cnn)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds
def inference_gru_cnn(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append(y_pred)
    return np.mean(y_preds, axis=0)

def preprocess_test_wt(investment_id, time_id, feature):
    return (investment_id, time_id, feature), 0
def make_test_dataset_wt(investment_id, time_id, feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, time_id, feature)))
    ds = ds.map(preprocess_test_wt)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds
def inference_wt(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append(y_pred)
    return np.mean(y_preds, axis=0)

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()
lst_t = 1211
for (test_df, sample_prediction_df) in iter_test:
    test_df["time_id"] = lst_t+1
    ds_wt = make_test_dataset_wt(test_df["investment_id"], test_df["time_id"], test_df[features])
    ds_st = make_test_dataset(test_df[features], test_df["investment_id"])
    ds_of = make_test_dataset_gru_cnn(test_df[features])
    # ae with varying dropouts and gaussian noise
    t_1 = inference(models_ae_upd_drgn_1, ds_st)
    t_2 = inference(models_ae_upd_drgn_2, ds_st)
    # aes with time ids
    t_3 = inference_wt(models_ae_wt_1, ds_wt)
    t_4 = inference_wt(models_ae_wt_2, ds_wt)
    t_5 = inference_wt(models_ae_wt_3, ds_wt)
    t_6 = inference_wt(models_ae_wt_4, ds_wt)
    # gru without time id | investment id
    t_7 = inference_gru_cnn(models_gru_1, ds_of)
    t_8 = inference_gru_cnn(models_gru_2, ds_of)
    t_9 = inference_gru_cnn(models_gru_3, ds_of)
    t_10 = inference_gru_cnn(models_gru_4, ds_of)
    # cnn without time id or investment id
    t_11 = inference_gru_cnn(models_cnn_1, ds_of)
    t_12 = inference_gru_cnn(models_cnn_2, ds_of)
    t_13 = inference_gru_cnn(models_cnn_3, ds_of)
    t_14 = inference_gru_cnn(models_cnn_4, ds_of)
    # supplemental models
    t_15 = inference(models_supp_0, ds_st, ds_wt)
    t_16 = inference(models_supp_1, ds_st, ds_wt)
    t_17 = inference(models_supp_2, ds_st, ds_wt)
    t_18 = inference(models_supp_3, ds_st, ds_wt)
    t_19 = inference(models_supp_4, ds_st, ds_wt)
    t_20 = inference(models_supp_5, ds_st, ds_wt)
    t_21 = inference(models_supp_6, ds_wt)
    
    t_f = np.hstack((np.array([t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12, t_13, t_14, t_15,
                              t_16, t_17, t_18, t_19, t_20, t_21])))
    
    sample_prediction_df['target'] = np.mean(t_f, axis=1)
    env.predict(sample_prediction_df) 