In [1]:
!pip install keras
!pip install sklearn
!pip install matplotlib
!pip install -U -q PyDrive

Collecting sklearn
  Downloading https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz
Building wheels for collected packages: sklearn
  Running setup.py bdist_wheel for sklearn ... [?25l- done
[?25h  Stored in directory: /content/.cache/pip/wheels/76/03/bb/589d421d27431bcd2c6da284d5f2286c8e3b2ea3cf1594c074
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0


In [2]:
from keras import backend as K
K.clear_session()

Using TensorFlow backend.


In [0]:
from keras import backend as K
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
file_import = drive.CreateFile({'id':'1p1bsltfTcIrZ_kfE6kwGTPzcdXorHbb2'})
file_import.GetContentFile('colab_setup.py') 
from colab_setup import setup

setup(drive)

In [6]:
!ls

colab_setup.py			 roc_auc_callback.py	X_train.npy
custom_fast_text_embeddings.npy  sample_submission.csv	X_val.npy
datalab				 train_model.py		y_test.npy
fast_text_embeddings.npy	 X_submission.npy	y_train_full.npy
plot_history.py			 X_test.npy		y_train.npy
__pycache__			 X_train_full.npy	y_val.npy


In [0]:
import numpy as np
import os

from plot_history import plot_history
from roc_auc_callback import RocAucCallback
from train_model import train_with_cv, train_with_submitting, evaluate_on_test

from keras.models import Model
from keras.layers import *
from keras.layers.merge import concatenate

In [0]:
def yoon_kim_conv_layer(filtersNumber, inputLayer):
    block_1_conv_1 = Conv1D(filtersNumber, 1, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(inputLayer)
    block_1_batchnorm1 = BatchNormalization()(block_1_conv_1)
    block_1_max_pool1 = MaxPooling1D()(block_1_batchnorm1)
    
    block_1_conv_2 = Conv1D(filtersNumber, 2, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(inputLayer)
    block_1_batchnorm2 = BatchNormalization()(block_1_conv_2)
    block_1_max_pool2 = MaxPooling1D()(block_1_batchnorm2)

    block_1_conv_3 = Conv1D(filtersNumber, 3, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(inputLayer)
    block_1_batchnorm3 = BatchNormalization()(block_1_conv_3)
    block_1_max_pool3 = MaxPooling1D()(block_1_batchnorm3)

    block_1_conv_4 = Conv1D(filtersNumber, 5, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(inputLayer)
    block_1_batchnorm4 = BatchNormalization()(block_1_conv_4)
    block_1_max_pool4 = MaxPooling1D()(block_1_batchnorm4)

    block_1_features = concatenate([block_1_max_pool1, block_1_max_pool2, block_1_max_pool3, block_1_max_pool4])
    block_1_features = Dropout(0.2)(block_1_features)
    
    return block_1_features

In [9]:
maxWords = 30000
maxSequenceLengthInWords = 400
embeddingDimension = 300
filtersNumber = 64

input_layer = Input(shape=(maxSequenceLengthInWords,))

embedding_layer = Embedding(
    maxWords, 
    output_dim=embeddingDimension, 
    input_length=maxSequenceLengthInWords,
    weights = [np.load('custom_fast_text_embeddings.npy')],
    trainable = False
)(input_layer)
embedding_dropout = SpatialDropout1D(0.2)(embedding_layer)

bidirectional = Bidirectional(GRU(150, dropout = 0.2, recurrent_dropout = 0.2, return_sequences=True))(embedding_dropout)
bidirectional_normalization = BatchNormalization()(bidirectional)
bidirectional_dropout = SpatialDropout1D(0.2)(bidirectional_normalization)
bidirectional_avg_pool = GlobalAveragePooling1D()(bidirectional_normalization)
bidirectional_max_pool = GlobalMaxPooling1D()(bidirectional_normalization)
bidirectional_gru_outs = concatenate([bidirectional_avg_pool, bidirectional_max_pool])

block_1_features = yoon_kim_conv_layer(filtersNumber, bidirectional_dropout)
block_2_features = yoon_kim_conv_layer(filtersNumber * 2, block_1_features)
block_3_features = yoon_kim_conv_layer(filtersNumber * 4, block_2_features)

block_4_conv_1 = Conv1D(filtersNumber * 8, 1, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(block_3_features)
block_4_batchnorm1 = BatchNormalization()(block_4_conv_1)
block_4_max_pool1 = GlobalMaxPooling1D()(block_4_batchnorm1)

block_4_conv_2 = Conv1D(filtersNumber * 8, 2, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(block_3_features)
block_4_batchnorm2 = BatchNormalization()(block_4_conv_2)
block_4_max_pool2 = GlobalMaxPooling1D()(block_4_batchnorm2)

block_4_conv_3 = Conv1D(filtersNumber * 8, 3, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(block_3_features)
block_4_batchnorm3 = BatchNormalization()(block_4_conv_3)
block_4_max_pool3 = GlobalMaxPooling1D()(block_4_batchnorm3)

block_4_conv_4 = Conv1D(filtersNumber * 8, 5, activation = 'elu', padding = 'same', kernel_initializer = 'he_uniform')(block_3_features)
block_4_batchnorm4 = BatchNormalization()(block_4_conv_4)
block_4_max_pool4 = GlobalMaxPooling1D()(block_4_batchnorm4)

block_4_features = concatenate([block_4_max_pool1, block_4_max_pool2, block_4_max_pool3, block_4_max_pool4])
block_4_features = Dropout(0.2)(block_4_features)

features = concatenate([block_4_features, bidirectional_gru_outs])
features_dropout = Dropout(0.2)(features)

dense_1 = Dense(256, activation = 'elu')(features_dropout)
dense_1_normalization = BatchNormalization()(dense_1)
dense_1_dropout = Dropout(0.2)(dense_1_normalization)

dense_2 = Dense(256, activation = 'elu')(dense_1_dropout)
dense_2_normalization = BatchNormalization()(dense_2)
dense_2_dropout = Dropout(0.2)(dense_2_normalization)

output_layer = Dense(6, activation='sigmoid')(dense_2_dropout)

model = Model(inputs=[input_layer], outputs=[output_layer])
            
model.summary()

model.compile(
    loss='binary_crossentropy', 
    optimizer='Adam',
    metrics=['accuracy']
)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 400)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 400, 300)     9000000     input_1[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 400, 300)     0           embedding_1[0][0]                
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 400, 300)     405900      spatial_dropout1d_1[0][0]        
__________________________

In [0]:
history = train_with_cv(model, batchSize=64, rocEvery = 2)

Train on 102124 samples, validate on 25532 samples
Epoch 1/32

Epoch 2/32
  4992/102124 [>.............................] - ETA: 50:12 - loss: 0.0550 - acc: 0.9801



roc-auc: 0.9725 - roc-auc_val: 0.971                                                                                                    
Epoch 3/32
 17344/102124 [====>.........................] - ETA: 43:42 - loss: 0.0506 - acc: 0.9806



Epoch 4/32
 20160/102124 [====>.........................] - ETA: 43:17 - loss: 0.0476 - acc: 0.9816



roc-auc: 0.9879 - roc-auc_val: 0.9847                                                                                                    
Epoch 5/32
 17280/102124 [====>.........................] - ETA: 44:46 - loss: 0.0457 - acc: 0.9826



Epoch 6/32
 20160/102124 [====>.........................] - ETA: 43:24 - loss: 0.0435 - acc: 0.9832



roc-auc: 0.9904 - roc-auc_val: 0.9878                                                                                                    
Epoch 7/32
 17280/102124 [====>.........................] - ETA: 44:23 - loss: 0.0426 - acc: 0.9833



Epoch 8/32
 20160/102124 [====>.........................] - ETA: 42:52 - loss: 0.0415 - acc: 0.9837



roc-auc: 0.9928 - roc-auc_val: 0.9889                                                                                                    
Epoch 9/32
 17280/102124 [====>.........................] - ETA: 44:17 - loss: 0.0385 - acc: 0.9848



Epoch 10/32
 20096/102124 [====>.........................] - ETA: 43:21 - loss: 0.0387 - acc: 0.9850







```
# It seems like model is best to fit in the interval of 8 to 12 epochs.

```



In [0]:
plot_history(history)

In [0]:
print(evaluate_on_test(model))