In [14]:
%config IPCompleter.greedy=True

In [15]:
import pandas as pd

df_train = pd.read_csv('train_kaggle.csv')
df_test = pd.read_csv('sample_solution.csv')
Y = df_train['Label'].values

In [16]:
import numpy as np

dataframes = np.load('allData.npy', allow_pickle = True)
dataframes.shape

(18662,)

In [71]:
feature_length = [8,4,16,16,8,12,16,12,10]
selected_features = [True, True, True, True, True, False, False, False, False]
feature_pos = [[0,8],[8,12],[12,28],[28,44],[44,52],[52,64],[64,80],[80,92],[92,102]] 

def process(dfs, feature):
    data = []    
    length = feature_length[feature]   
    for df in dfs:
        out = np.zeros((df.shape[0],length))
        out[:,0:length] = df[:,feature_pos[feature,0]:feature_pos[feature,1]]

        data.append(out)                 
    return data

def pad_data(dfs):
    padsize = 500
    data = []
    for df in dfs:
        diff = padsize-df.shape[0]
        if diff >0:
            df = np.pad(df, [(0, diff), (0,0)], 'constant')
        else:
            df = df[-padsize:]
        data.append(df)
        
    data = np.stack(data)
    return data

In [54]:
def load_test_dataframe(id):
    test_data = np.load("test/test/{}.npy".format(id))
    return pd.DataFrame(data=test_data)

testdatas = []
for id in df_test['Id']:
    dfi = load_test_dataframe(id)
    testdatas.append(dfi.values)

In [55]:
XTrain = pad_data(dataframes)
XTest = pad_data(np.array(testdatas))

np.save('XTrain.npy', XTrain)
np.save('XTest.npy', XTrain)

In [56]:
XTrain.shape

(18662, 500, 102)

In [57]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(XTrain, Y, test_size=0.1, random_state=42)

In [58]:
X_train.shape

(16795, 500, 102)

In [77]:
import tensorflow as tf
from tensorflow import keras

METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.AUC(name='auc'),
]

def get_simple_conv(out):
    out = keras.layers.Conv1D(filters=6, kernel_size=5, padding='same', activation='relu')(out)
    out = keras.layers.MaxPooling1D(pool_size=2)(out)
    out = keras.layers.Dropout(0.4)(out)
    out = keras.layers.Conv1D(filters=16, kernel_size=5, padding='same', activation='relu')(out)
    out = keras.layers.MaxPooling1D(pool_size=2)(out)
    out = keras.layers.Dropout(0.4)(out)
    out = keras.layers.Flatten()(out)
    out = keras.layers.Dense(120, activation='relu')(out)
    out = keras.layers.Dense(84, activation='relu')(out)
    out = keras.layers.Dense(1, activation='sigmoid')(out)
    return out

inputs = keras.Input(shape=(X_train.shape[1], X_train.shape[2])) 
branch_outputs = []
for i in range(len(feature_length)):
    if selected_features[i] == False:
        continue
    # Slicing the ith channel:
    out = inputs[:,:,feature_pos[i][0]:feature_pos[i][1]]
    # Setting up your per-channel layers (replace with actual sub-models):
    out = get_simple_conv(out)
    branch_outputs.append(out)

'''
x = get_simple_conv(inputs)
branch_outputs.append(x)
'''

# Concatenating together the per-channel results:
out = keras.layers.Concatenate()(branch_outputs)
out = tf.math.reduce_mean(
    out,
    axis=1
)
out = tf.reshape(
    out,
    (-1, 1)
)
model = keras.Model(inputs=inputs, outputs=out)

# model.compile(loss='categorical_crossentropy',
#              optimizer='adam',
#              metrics=['accuracy'])

model.compile(
      optimizer=keras.optimizers.Adam(),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=METRICS)

# Take a look at the model summary
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           [(None, 500, 102)]   0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice_95 (T [(None, 500, 8)]     0           input_15[0][0]                   
__________________________________________________________________________________________________
tf_op_layer_strided_slice_96 (T [(None, 500, 4)]     0           input_15[0][0]                   
__________________________________________________________________________________________________
tf_op_layer_strided_slice_97 (T [(None, 500, 16)]    0           input_15[0][0]                   
___________________________________________________________________________________________

In [78]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

In [79]:
from tensorflow.keras.utils import to_categorical

EPOCHS = 100
baseline_history = model.fit(
    X_train,
    y_train,
    epochs=EPOCHS,
    callbacks = [early_stopping],
    validation_data=(X_val, y_val))

Train on 16795 samples, validate on 1867 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 00048: early stopping


In [80]:
YTest = model.predict(XTest)
df_test['Predicted'] = YTest[:, 0]
df_test

Unnamed: 0,Id,Predicted
0,0,0.999736
1,1,0.961501
2,2,0.001920
3,3,0.970701
4,4,0.451361
5,5,0.137144
6,6,0.000174
7,7,0.645911
8,8,0.999999
9,9,1.000000


In [81]:
df_test.to_csv('test.csv', index=False)