In [1]:
#keras

In [2]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold

from tqdm import tqdm
from random import choices
import random

import kerastuner as kt

In [3]:
keras_f_mean = np.load('../input/npyfolder/keras_f_mean_online.npy')

In [4]:
def create_autoencoder(input_dim,output_dim,noise=0.05):
    i = Input(input_dim)
    encoded = BatchNormalization()(i)
    encoded = GaussianNoise(noise)(encoded)
    encoded = Dense(64,activation='relu')(encoded)
    decoded = Dropout(0.2)(encoded)
    decoded = Dense(input_dim,name='decoded')(decoded)
    x = Dense(32,activation='relu')(decoded)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    x = Dense(output_dim,activation='sigmoid',name='label_output')(x)
    
    encoder = Model(inputs=i,outputs=decoded)
    autoencoder = Model(inputs=i,outputs=[decoded,x])
    
    autoencoder.compile(optimizer=Adam(0.001),loss={'decoded':'mse','label_output':'binary_crossentropy'})
    return autoencoder, encoder

In [5]:
def create_model(hp,input_dim,output_dim,encoder):
    inputs = Input(input_dim)
    
    x = encoder(inputs)
    x = Concatenate()([x,inputs]) #use both raw and encoded features
    x = BatchNormalization()(x)
    x = Dropout(hp.Float('init_dropout',0.0,0.5))(x)
    
    for i in range(hp.Int('num_layers',1,3)):
        x = Dense(hp.Int(f'num_units_{i}',64,256))(x) # f
        x = BatchNormalization()(x)
        x = Lambda(tf.keras.activations.swish)(x)
        x = Dropout(hp.Float(f'dropout_{i}',0.0,0.5))(x)
    x = Dense(output_dim,activation='sigmoid')(x)
    model = Model(inputs=inputs,outputs=x)
    model.compile(optimizer=Adam(hp.Float('lr',0.00001,0.1,default=0.001)),loss=BinaryCrossentropy(label_smoothing=hp.Float('label_smoothing',0.0,0.1)),metrics=[tf.keras.metrics.AUC(name = 'auc')])
    return model

In [6]:
autoencoder, encoder = create_autoencoder(130,5,noise=0.1) 
encoder.load_weights('../input/modelfolder/encoder.hdf5')
encoder.trainable = False

In [7]:
model_fn = lambda hp: create_model(hp,130,5,encoder)

FOLDS =xx
SEED = xx

keras_models = []
hp = pd.read_pickle(f'../input/modelfolder/best_hp_{SEED}.pkl')
for f in range(FOLDS):
    model = model_fn(hp)
    model.load_weights(f'../input/modelfolder/model_{SEED}_{f}_finetune.hdf5')
    keras_models.append(model)

In [8]:
keras_model= keras_models[-2:]

In [9]:
#lgb
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from random import choices
from sklearn.model_selection import train_test_split
import lightgbm as lgbm

In [10]:
feat_cols = [f'feature_{i}' for i in range(130)]
features = [col for col in feat_cols]
features.extend(['feature_stock_id_sum', 'feature_1_2_cross'])

In [11]:
import os, sys, gc, time, warnings, pickle, psutil, random
lgb_models = []
for i in range(5):
    model_path = '../input/modelfolder/lgb_model_'+str(i)+'.bin' 
    clf = pickle.load(open(model_path, 'rb'))
    lgb_models.append(clf)

In [12]:
import janestreet
env = janestreet.make_env()
env_iter = env.iter_test()

In [13]:
test_df_columns = ['weight'] + [f'feature_{i}' for i in range(130)] + ['date']
index_features = [n for n, col in enumerate(test_df_columns) if col in feat_cols]

In [14]:
import gc
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].values[0]>0:
        #keras
        keras_x_tt = test_df.values[0][index_features].reshape(1, -1)
        if np.isnan(keras_x_tt.sum()):
            keras_x_tt = np.nan_to_num(keras_x_tt) + np.isnan(keras_x_tt) * keras_f_mean
        keras_pred = np.mean([model(keras_x_tt, training = False).numpy() for model in keras_model],axis=0)
        keras_pred =  np.median(keras_pred)
        #lgb_pred
        test_df['feature_stock_id_sum'] = test_df['feature_41'] + test_df['feature_42'] + test_df['feature_43']
        test_df['feature_1_2_cross'] = test_df['feature_1']/(test_df['feature_2']+1e-5)
        test_df.fillna(-9999,inplace=True)
        lgb_x_tt = test_df.loc[:, features].values
        lgb_pred = np.median([model.predict(lgb_x_tt) for model in lgb_models])
        # final_pred
        final_pred=0.5*keras_pred+0.5*lgb_pred # adjust weight
        pred_df.action = int(final_pred >= 0.5) #0.499,0.501
    else:
        pred_df['action'].values[0] = 0
    env.predict(pred_df)

15219it [05:22, 47.19it/s] 
