In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow
np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input

from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import tensorflow_addons as tfa
from sklearn.metrics import log_loss
import tensorflow as tf

data_train = pd.read_csv('train_features.csv')
data_test = pd.read_csv('test_features.csv')
data_train_target_ns = pd.read_csv('train_targets_nonscored.csv')
data_train_target_s = pd.read_csv('train_targets_scored.csv')
sub = pd.read_csv('sample_submission.csv')

In [2]:
def preprocess(df):
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72:2})
    del df['sig_id']
    return df

train = preprocess(data_train)
test = preprocess(data_test)

del data_train_target_s['sig_id']

In [3]:
def create_model(num_columns):
    model = Sequential()
    model.add(Input(num_columns))
    model.add(BatchNormalization())
    model.add(Dense(2048, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Dense(206, activation='sigmoid'))
    
    optimizer = tfa.optimizers.Lookahead('adam',sync_period=10)
    
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    model.summary()
    return model

In [4]:
def metric(y_true, y_pred):
    metrics = []
    for _target in data_train_target_s.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels=[0,1]))
    return np.mean(metrics)

In [5]:
N_STARTS = 2
tf.random.set_seed(42)

res = data_train_target_s.copy()
sub.loc[:, data_train_target_s.columns] = 0
sub.loc[:, data_train_target_s.columns] = 0

for seed in range(N_STARTS):
    for n, (train_idx, test_idx) in enumerate(KFold(n_splits=5, random_state=seed, shuffle=True).split(data_train_target_s, data_train_target_s)):
        print(f'Fold {n}')
    
        model = create_model(875)
        checkpoint_path = f'repeat:{seed}_Fold:{n}.h5'
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')
        cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 0, save_best_only = True,
                                     save_weights_only = True, mode = 'min')
        model.fit(train.values[train_idx],
                  data_train_target_s.values[train_idx],
                  validation_data=(train.values[test_idx], data_train_target_s.values[test_idx]),
                  epochs=25, batch_size=128,
                  callbacks=[reduce_lr_loss, cb_checkpt], verbose=1
                 )
        
        model.load_weights(checkpoint_path)
        test_predict = model.predict(test.values)
        val_predict = model.predict(train.values[test_idx])
        
        sub.loc[:, data_train_target_s.columns] += test_predict
        res.loc[test_idx, data_train_target_s.columns] += val_predict
        print('')
    
sub.loc[:, data_train_target_s.columns] /= ((n+1) * N_STARTS)
res.loc[:, data_train_target_s.columns] /= N_STARTS

Fold 0
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 875)               3500      
_________________________________________________________________
dense (Dense)                (None, 2048)              1794048   
_________________________________________________________________
dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)         

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 2
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_8 (Batch (None, 875)               3500      
_________________________________________________________________
dense_10 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_8 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization

Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 3
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_12 (Batc (None, 875)               3500      
_________________________________________________________________
dense_15 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_12 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_13 (Batc (None, 1024)              4096      
_________________________________________________________________
dropout_13 (Dropout)         (None

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 0
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_20 (Batc (None, 875)               3500      
_________________________________________________________________
dense_25 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_20 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_26 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization

Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 1
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_24 (Batc (None, 875)               3500      
_________________________________________________________________
dense_30 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_24 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_31 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_25 (Batc (None, 1024)              4096      
_________________________________________________________________
dropout_25 (Dropout)         (None

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 3
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_32 (Batc (None, 875)               3500      
_________________________________________________________________
dense_40 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_32 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_41 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization

Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Fold 4
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_36 (Batc (None, 875)               3500      
_________________________________________________________________
dense_45 (Dense)             (None, 2048)              1794048   
_________________________________________________________________
dropout_36 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_46 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_37 (Batc (None, 1024)              4096      
_________________________________________________________________
dropout_37 (Dropout)         (None

In [6]:
print(f'OOF Metric: {metric(data_train_target_s, res)}')

OOF Metric: 0.004675162921113734


In [7]:
sub.loc[test['cp_type']==1, data_train_target_s.columns] = 0

In [8]:
sub.to_csv('submission.csv', index=False)