In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import mean_squared_error

from tensorflow import keras
from keras import Sequential, layers

# Importing dataset

In [None]:
df_train = pd.read_csv('monks_3_train.csv', sep= ' ', header=None, skipinitialspace=True)
df_train.head(10)

In [None]:
df_train.drop(columns=7, inplace=True)
df_train.head()

In [None]:
df_train = pd.get_dummies(data=df_train, columns=range(1,7))
df_train.head(10)
df_train.shape

In [None]:
X_train = df_train.iloc[:, 1:18]
X_train.head()

# Partitioning

In [None]:
X_train = X_train.to_numpy()

In [None]:
y_train = df_train.iloc[:, 0]
y_train.head()

In [None]:
y_train = y_train.to_numpy()
y_train 

# Neural network

In [None]:
def create_model():
    model = Sequential(
    [
        layers.Dense(6, activation="relu", name="layer1", input_shape=(17,)),
        layers.Dense(1, activation='sigmoid', name="output"), # output layer
    ]
    )
    return model

## Grid search

In [None]:
split = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

In [None]:
l_rates = [10**-i for i in range(2,4)]
momentum = [i*0.1 for i in range(3)]
decays = [0,0.001]
splits = list(split.split(X_train, y_train))

#k-fold
score_means_opt_parameters={}
X_train_p = [X_train[train_idx] for train_idx, val_idx in splits]
y_train_p = [y_train[train_idx] for train_idx, val_idx in splits]
X_val_p = [X_train[val_idx] for train_idx, val_idx in splits]
y_val_p = [y_train[val_idx] for train_idx, val_idx in splits]


for l_r in tqdm(l_rates):
    for m in momentum:
        for d in tqdm(decays):
            scores = []
            for fold in range(len(X_train_p)):
                model = create_model()
                model.compile(
                    optimizer=keras.optimizers.RMSprop(learning_rate=l_r, momentum=m, weight_decay=d),
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
                history = model.fit(X_train_p[fold], y_train_p[fold], epochs=300, batch_size=10, validation_data=(X_val_p[fold], y_val_p[fold]), verbose=0)
                scores.append((history.history['val_accuracy'][-1]))
            score_means_opt_parameters[(l_r, m, d)] = (np.mean(scores), np.std(scores))


## Computing scores

In [None]:
score_means_buone = [i for i in score_means_opt_parameters.items() if i[1][0] >= 0.7]
score_means_buone.sort(key = lambda x:x[1][0]-x[1][1], reverse=True)

score_means_topcut = [i for i in score_means_buone if i[1][0] >= score_means_buone[0][1][0]-score_means_buone[0][1][1]]



## Plotting the best models' accuracy

In [None]:
X_trains, X_val, y_trains, y_val = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train, shuffle=True)

In [None]:
score_means_topcut

In [None]:
for i in range(len(score_means_topcut)):
    print(score_means_buone[i])
    model = create_model()
    model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=score_means_buone[i][0][0], 
                                                     momentum=score_means_buone[i][0][1], 
                                                     weight_decay=score_means_buone[i][0][2]),
                                                     loss='binary_crossentropy',
                                                     metrics=['accuracy'])
    history = model.fit(X_trains, y_trains, epochs=300, batch_size=10, validation_data=(X_val, y_val), verbose=0) 
    accuracy_train = history.history['accuracy']
    accuracy_val = history.history['val_accuracy']
    print('Max train accuracy:', max(accuracy_train))
    print('Max val accuracy:', max(accuracy_val))
    print('Final val accuracy:',accuracy_val[-1])
    plt.plot(accuracy_train, label='training')
    plt.plot(accuracy_val, label='validation')
    plt.legend()
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.show()

# Model assessment

In [None]:
test = pd.read_csv("monks_3_test",sep=" ", header=None, skipinitialspace=True)

In [None]:
test.drop(columns=7, inplace=True) 

In [None]:
X_test = test[[i for i in range(1,7)]]
y_test = test[0]
X_test = pd.get_dummies(X_test,columns=[i for i in range(1,7)])
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

In [None]:
model = create_model()
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=score_means_topcut[0][0][0], # best model sorted by mean_acc - std_acc
                                                 momentum=score_means_topcut[0][0][1], 
                                                 weight_decay=score_means_topcut[0][0][2]),
                                                 loss='binary_crossentropy',
                                                 metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=300, batch_size=10, validation_data=(X_test, y_test), verbose=0)
accuracy_train = history.history['accuracy']
accuracy_val = history.history['val_accuracy']
print('Max accuracy val',max(accuracy_val))
print('Final accuracy val', accuracy_val[-1])
plt.plot(accuracy_train, label='training')
plt.plot(accuracy_val, label='validation')
plt.legend()
plt.xlabel('Epoche')
plt.ylabel('Accuracy')
plt.show()

In [None]:
loss_train = history.history['loss']
loss_val = history.history['val_loss']
print(loss_val[-1])
plt.plot(loss_train, label='training')
plt.plot(loss_val, label='validation')
plt.axhline(min(loss_val), color='orange') # checking whether it is overfetting 
plt.legend()
plt.xlabel('Epoche')
plt.ylabel('Loss')
plt.show()

In [None]:
y_pred_train = model.predict(X_train)
mean_squared_error(y_pred_train, y_train)

In [None]:
y_pred = model.predict(X_test)
mean_squared_error(y_pred, y_test)