In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from keras.models import Model
from keras.layers import Embedding, Dense, LSTM, RepeatVector
from keras.layers.wrappers import TimeDistributed
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras import regularizers
from keras.callbacks import ReduceLROnPlateau, EarlyStopping




# Load data

In [None]:
data_df = pd.read_csv('x_train.csv')
label_df = pd.read_csv('y_train.csv')

# Prepare data for training

In [None]:
grp = data_df.groupby('GeneId')
X_ser = grp.apply(lambda x: np.array(x[[ 'H3K4me3', 'H3K4me1', 'H3K36me3', 'H3K9me3', 'H3K27me3']]))

X = np.stack(np.array(X_ser))
X_CNN = X.reshape(X.shape[0], X.shape[2], X.shape[1], 1)
y = label_df.Prediction.values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_CNN, y, test_size=0.2)

# CNN Training

In [None]:
inp = Input(shape=(5, 100, 1))

x = Conv2D(32, (3, 3), activation='relu')(inp)
x = Dropout(0.2)(x)
x = MaxPooling2D(pool_size=(4, 4),data_format="channels_first")(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = Dropout(0.2)(x)
x = MaxPooling2D(pool_size=(2, 2),data_format="channels_first")(x)
x = Flatten()(x) 
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)

out = Dense(1,activation='sigmoid')(x)

adam = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
reduce_lr = ReduceLROnPlateau(monitor='acc', factor=0.4,
                              patience=5, min_lr=0.0005, verbose=1)

model = Model(inputs=inp, outputs=out)
model.compile(loss='binary_crossentropy',
              metrics=['accuracy'], optimizer= Adam(lr=0.01))

In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=40, callbacks=[reduce_lr])

# LSTM Training

In [None]:
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X, y, test_size=0.2)

In [None]:
inp = Input(shape=(100, 5))
x = Embedding(5, 4, input_length=100)(x)
x = LSTM(254, dropout=0.2, return_sequences=True)(inp)
x = LSTM(254, dropout=0.2)(inp)
x = Dense(254, activation='relu', W_regularizer=regularizers.l2(0))(x)
x = Dense(100, activation='relu', W_regularizer=regularizers.l2(0))(x)
x = Dense(50, activation='relu', W_regularizer=regularizers.l2(0))(x)
x = Dense(1, activation='sigmoid', W_regularizer=regularizers.l2(0))(x)


model_lstm = Model(input=inp, output=x)
model_lstm.compile(loss='binary_crossentropy', optimizer= Adam(lr=0.001), metrics=['accuracy'])

reduce_lr = ReduceLROnPlateau(monitor='acc', factor=0.4,
                              patience=2, min_lr=0.0005, verbose=1)


history = model_lstm.fit(X_train_lstm, y_train_lstm, validation_data=(X_test_lstm, y_test_lstm), \
           nb_epoch=100, batch_size=100, shuffle=True,\
           callbacks=[reduce_lr])