In [None]:
import numpy as np 
import pandas as pd 
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Flatten, Conv2D, MaxPooling2D
from keras.utils import normalize, to_categorical
from keras import optimizers
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import ExtraTreesClassifier

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
import warnings 
warnings.simplefilter('ignore')

In [None]:
# python generator which takes X, y and batch_size as parameters and returns batches of X and y 
def gen(X, y, batch_size = 8):
    i = 0
    while True:
        X_b = []
        y_b = []
        for b in range(batch_size):
            if i == len(X)-1:
                 i = 0
            X_b_i = X[i]
            y_b_i = y[i]
            i = i + 1
            X_b.append(X_b_i)
            y_b.append(y_b_i)

        yield np.asarray(X_b), np.asarray(y_b)

In [None]:
# reading the data
fname = '../input/dlproject.csv'
df = pd.read_csv(fname, sep = ',')
features = np.array(df.columns)

In [None]:
# loading pandas dataframe into numpy, X = features, y = labels
data = df.values
X = data[: , :-1]
y = data[: , -1]

In [None]:
# ranking feature importances based on extra tree classifier
forest = ExtraTreesClassifier(n_estimators=100, random_state=78)
forest.fit(X, y)
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
print("Feature ranking:")
for f in range(X.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

In [None]:
# using only 16 most important features
X1 = data[: , indices[0]].reshape(-1, 1)
for i in range(1, 16):
    X1 = np.hstack((X1, data[:, indices[i]].reshape(-1, 1)))

In [None]:
X1.shape

In [None]:
y = y.astype(int)

In [None]:
len(df.label.unique())

In [None]:
# converting the labels to categorical because the dataset we are using has 9 classes
y = to_categorical(y, num_classes = 9)

In [None]:
# normalizing the features
X1 = normalize(X1, order = 2, axis = 0)

In [None]:
# spliting the data into training, cross-validation and test sets
X_train, X_cvtest, y_train, y_cvtest = train_test_split(X1, y, test_size = 0.5, random_state = 78, stratify = y)
X_cv, X_test, y_cv, y_test = train_test_split(X_cvtest, y_cvtest, test_size = 0.5, random_state = 78, stratify = y_cvtest)

In [None]:
print(X_train.shape)
print(X_cv.shape)
print(X_test.shape)

In [None]:
# batch_size and epochs for models without hyper-parameter tuning
batch_size = 128
epochs = 10

In [None]:
# 1-layer neural network
model = Sequential()
model.add(Dense(9, activation='softmax', input_shape=(16,)))
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
model.fit_generator(gen(X_train, y_train, batch_size), 
                    steps_per_epoch = len(X_train)/batch_size, 
                    nb_epoch = epochs)

In [None]:
test_loss, test_acc = model.evaluate_generator(gen(X_test, y_test, batch_size), 
                                               steps = len(X_test)/batch_size)
print("Accuracy score with 1 layer:", test_acc)
print("Loss with 1 layer:", test_loss)

In [None]:
# 4-layer neural network
model = Sequential()
model.add(Dense(9,input_shape=(16,),activation='relu'))
model.add(Dense(8,activation='relu'))
model.add(Dense(6,activation='relu'))
model.add(Dense(9,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer="adam", metrics=['accuracy'])
model.fit_generator(gen(X_train, y_train, batch_size), 
                    steps_per_epoch = len(X_train)/batch_size, 
                    nb_epoch = epochs)

In [None]:
test_loss, test_acc = model.evaluate_generator(gen(X_test, y_test, batch_size), 
                                               steps = len(X_test)/batch_size)
print("Accuracy score with 4 layers:", test_acc)
print("Loss with 4 layer:", test_loss)

In [None]:
# changing shape of cross-validation set to make it suitable for convolutional network
nrows, ncols = X_cv.shape
X_cv1 = X_cv.reshape(nrows, ncols, 1)

In [None]:
# changing shape of training set to make it suitable for convolutional network
nrows, ncols = X_train.shape
X_train1 = X_train.reshape(nrows, ncols, 1)

In [None]:
# changing shape of test set to make it suitable for convolutional network
nrows, ncols = X_test.shape
X_test1 = X_test.reshape(nrows, ncols, 1)

In [None]:
print(X_cv1.shape)
print(X_train1.shape)
print(X_test1.shape)

In [None]:
# convolutional network without hyper-parameter tuning
model = Sequential()
model.add(Conv1D(64, (3), input_shape=(16,1), activation='relu'))
model.add(Conv1D(64, (3), activation='relu'))
model.add(MaxPooling1D(2))
model.add(Conv1D(64, (3), activation='relu'))
model.add(Conv1D(64, (3), activation='relu'))
model.add(MaxPooling1D(2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(9, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer="adam", metrics=['accuracy'])
model.fit_generator(gen(X_train1, y_train, batch_size), 
                    steps_per_epoch = len(X_train)/batch_size, 
                    nb_epoch = epochs)

In [None]:
test_loss, test_acc = model.evaluate_generator(gen(X_test1, y_test, batch_size), 
                                               steps = len(X_test)/batch_size)
print("Accuracy score with ConvNet 1D:", test_acc)
print("Loss with ConvNet:", test_loss)

In [None]:
# classifier to tune epochs and batches to be used by KerasClassifer
def clf():
    act1 = 'relu'
    act2 = 'softmax'
    opt = 'adam'
    model = Sequential()
    model.add(Conv1D(64, (3), input_shape = (16,1), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(64, activation = act1))
    model.add(Dense(9, activation = act2))
    model.compile(loss = 'categorical_crossentropy',optimizer = opt, metrics = ['accuracy'])
    return model

In [None]:
# values of epochs and batch_size to tune 
epochs = [25, 50, 75, 100]
batch_size = [8, 32, 64, 128]

In [None]:
# paramter grid
param_grid = dict(batch_size = batch_size, epochs = epochs)

In [None]:
model = KerasClassifier(build_fn = clf, verbose = 0)

In [None]:
# tuning convolutional network for epochs and batch_size
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs = -1)
grid_result = grid.fit(X_cv1, y_cv)
print("Best score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
# classifier to tune optimizer to be used by KerasClassifer
def clf1(optimizer = 'adam'):
    act1 = 'relu'
    act2 = 'softmax'
    model = Sequential()
    model.add(Conv1D(64, (3), input_shape = (16,1), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(64, activation = act1))
    model.add(Dense(9, activation = act2))
    model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    return model

In [None]:
# values of optimizer to tune
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

In [None]:
# parameter grid
param_grid = dict(optimizer = optimizer)

In [None]:
model = KerasClassifier(build_fn=clf1, epochs=100, batch_size=32, verbose=0)

In [None]:
# tuning convolutional network for optimizer
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs = -1)
grid_result = grid.fit(X_cv1, y_cv)
print("Best score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
# classifier to tune learning rate to be used by KerasClassifer
def clf2(l_rate = 0.01):
    act1 = 'relu'
    act2 = 'softmax'
    model = Sequential()
    model.add(Conv1D(64, (3), input_shape = (16,1), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(Conv1D(64, (3), activation = act1))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(64, activation = act1))
    model.add(Dense(9, activation = act2))
    optimizer = optimizers.Adamax(lr=l_rate)
    model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    return model

In [None]:
# values of learning rate to tune
l_rate = [0.01, 0.1, 0.3, 0.5]

In [None]:
# parameter grid
param_grid = dict(l_rate = l_rate)

In [None]:
model = KerasClassifier(build_fn=clf2, epochs=100, batch_size=32, verbose=0)

In [None]:
# tuning convolutional network for learning rate
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs = -1)
grid_result = grid.fit(X_cv1, y_cv)
print("Best score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
# convolutional network with best values for epochs, batch_size, optimizer and learning rate
# after hyper-parameter tuning
model = Sequential()
model.add(Conv1D(64, (3), input_shape=(16,1), activation='relu'))
model.add(Conv1D(64, (3), activation='relu'))
model.add(MaxPooling1D(2))
model.add(Conv1D(64, (3), activation='relu'))
model.add(Conv1D(64, (3), activation='relu'))
model.add(MaxPooling1D(2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(9, activation='softmax'))
optimizer = optimizers.Adamax(lr=0.01)
model.compile(loss='categorical_crossentropy',optimizer=optimizer, metrics=['accuracy'])
model.fit_generator(gen(X_train1, y_train, 32), 
                    steps_per_epoch = len(X_train)/32, 
                    nb_epoch = 100)

In [None]:
test_loss, test_acc = model.evaluate_generator(gen(X_test1, y_test, 32), 
                                               steps = len(X_test)/32)
print("Accuracy score with ConvNet with Hyper-parameter tuning:", test_acc)
print("Loss with ConvNet with Hyper-parameter tuning::", test_loss)