In [None]:
# #######################################################################
# Importing necessary libraries for creating model

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from sklearn.model_selection import train_test_split   # For test train data spliting
import pandas as pd
import numpy as np
import keras

In [None]:
data = pd.read_csv('/home/sshuser2/gesis.csv')

In [None]:
# #######################################################################
# Separeting data and label and removing pressure on processor

df_x = data.iloc[:,3:].values.reshape(len(data),100,100,1)
y = data.iloc[:,1].values
del data
df_y = keras.utils.to_categorical(y,num_classes=2)
df_x = np.array(df_x)
df_y = np.array(df_y)

In [None]:
# #######################################################################
# Split into a training set and a test set using a stratified k fold

x_train, x_test, y_train, y_test = train_test_split(df_x,df_y,test_size=0.2,random_state=4)
del df_x
del df_y

In [None]:
# #############################################################################
# Creating a Convolutional Neural Network (CNN) MODEL 

model = Sequential()
model.add(Convolution2D(64,3,data_format='channels_last',activation='relu',input_shape=(100,100,1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(32,3,data_format='channels_last',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(16,3,data_format='channels_last',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(8,3,data_format='channels_last',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(4,3,data_format='channels_last',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(100))
model.add(Activation('sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer = 'adadelta', metrics = ['accuracy'])

In [None]:
# #############################################################################
# Doing k fold cross validation for k = 5 and 20 times

v = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=20, batch_size=200)

In [None]:
model.evaluate(x_test,y_test)

In [None]:
# #############################################################################
# Predicting gender and reporting the precision, recall F1-score

from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict_classes(x_test)
con_mat = confusion_matrix(np.argmax(y_test,axis=1),y_pred)
print(classification_report(np.argmax(y_test,axis=1),y_pred))

In [None]:
accuracy = (sum(con_mat.diagonal()))/(con_mat.sum())       # Calculating the accuracy

print("Total accuracy is:")
print('{percent:.2%}'.format(percent=accuracy))

In [None]:
# #############################################################################
# Reporting the 90% CI for k fold cross validation

from scipy.stats import sem, t
from scipy import mean

ac = v.history['acc']
confidence = 0.90
n = len(ac)
m = mean(ac)
std_err = sem(ac)
h = std_err * t.ppf((1 + confidence) / 2, n - 1)
print("%.3f%% (+/- %.3f%%)" % (np.mean(ac)*100, h*100))