**Basic Libraries**

In [None]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay


from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns

**Preprocessing data**

*   Import Dataset

In [None]:
fileName = 'data.xlsx'

try:
 # Confirm file exists.
 sheetValues = pd.read_excel(fileName)
 print(' .. successful parsing of file:', fileName)
 print("Column headings:")
 print(sheetValues.columns)
except FileNotFoundError:
 print(FileNotFoundError)

# Fisrt get only the numeric values; i.e. ignore last two columns, and convert it to ndarray
inputData = sheetValues[sheetValues.columns[:-2]].values
# Labels
outputData = sheetValues[sheetValues.columns[-2]]

print(' .. we have', inputData.shape[0], 'available paradigms.')
print(' .. each paradigm has', inputData.shape[1], 'features')
print(' ... the distribution for the available class lebels is:')

for classIdx in range(0, len(np.unique(outputData))):
  tmpCount = sum(outputData == classIdx)
  tmpPercentage = tmpCount/len(outputData)
  print(' .. class', str(classIdx), 'has', str(tmpCount), 'instances', '(','{:.2f}'.format(tmpPercentage), '%)')

*   Split to training and test set

In [None]:
#Split dataset sto training and testing set
X_train, X_test, y_train, y_test = train_test_split(inputData, outputData, test_size=0.3, random_state=0)

*   Normalize data points

In [None]:
#Normalize data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

**Visualization: TSNE Projection of the dataset**

In [None]:
tsne = TSNE(n_components=2, verbose=1, random_state=0)
z = tsne.fit_transform(inputData)
df = pd.DataFrame()
df["y"] = outputData
df["comp-1"] = z[:,0]
df["comp-2"] = z[:,1]

sns.scatterplot(x="comp-1", y="comp-2", hue=df.y.tolist(),
                palette=sns.color_palette("bright", 2),
                data=df).set(title="Data in T-SNE projection")
plt.show()

**Decision Tree Classifier**

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

clf = DecisionTreeClassifier(max_depth=6, random_state=0)
clf.fit(X_train, y_train) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)

#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of Decision Tree classifier are:',
 'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of Decision Tree classifier are:',
 'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of Decision Tree classifier are:',
 'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of Decision Tree classifier are:',
 'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

#Decision Tree Graph
plt.figure(figsize=(15,10))
_ = tree.plot_tree(clf, filled=True, feature_names=sheetValues.columns[:-2], max_depth =3,
                   class_names = ["1","2"], proportion=True,  fontsize=10)
plt.show()

**k-Nearest Neighbors**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 7)
knn.fit(X_train, y_train) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_train = knn.predict(X_train)
y_pred_test = knn.predict(X_test)

#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of k-Nearest Neighbors classifier are:',
 'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of k-Nearest Neighbors classifier are:',
 'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of k-Nearest Neighbors classifier are:',
 'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of k-Nearest Neighbors classifier are:',
 'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

**Linear Discriminant Analysis**

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_train = lda.predict(X_train)
y_pred_test = lda.predict(X_test)

#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of Linear Discriminant Analysis are:',
 'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of Linear Discriminant Analysis are:',
 'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of Linear Discriminant Analysis are:',
 'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of Linear Discriminant Analysis are:',
 'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(class_weight='balanced')
logreg.fit(X_train, y_train) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_train = logreg.predict(X_train)
y_pred_test = logreg.predict(X_test)

#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of LogisticRegression are:',
 'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of LogisticRegression are:',
 'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of LogisticRegression are:',
 'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of  LogisticRegression are:',
 'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

**Naive Bayes**

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X_train, y_train) #fit the model using the training data
#now check for both train and test data, how well the model learned the patterns
y_pred_train = gnb.predict(X_train)
y_pred_test = gnb.predict(X_test)

#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of Naive Bayes classifier are:',
 'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of Naive Bayes classifier are:',
 'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of Naive Bayes classifier are:',
 'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of Naive Bayes classifier are:',
 'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

**Support Vector Machine (SVM)**

In [None]:
from sklearn.svm import SVC

svm = SVC(kernel='sigmoid')
svm.fit(X_train, y_train) #fit the model using the training data
# now check for both train and test data, how well the model learned thepatterns
y_pred_train = svm.predict(X_train)
y_pred_test = svm.predict(X_test)


#Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, y_pred_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

#Calculate the scores
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
pre_train = precision_score(y_train, y_pred_train, average='macro')
pre_test = precision_score(y_test, y_pred_test, average='macro')
rec_train = recall_score(y_train, y_pred_train, average='macro')
rec_test = recall_score(y_test, y_pred_test, average='macro')
f1_train = f1_score(y_train, y_pred_train, average='macro')
f1_test = f1_score(y_test, y_pred_test, average='macro')

#Print the scores
print('Accuracy scores of SVM classifier are:',
'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of SVM classifier are:',
'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of SVM classifier are:',
'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of SVM classifier are:',
'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))

**Neural Networks**

In [None]:
import keras

CustomModel = keras.models.Sequential()
CustomModel.add(keras.layers.Dense(64, input_dim=X_train.shape[1],activation='relu'))
#CustomModel.add(keras.layers.Dense(32, input_dim=X_train.shape[1],activation='relu'))
#CustomModel.add(keras.layers.Dense(16, input_dim=new_X_train.shape[1],activation='relu'))
#CustomModel.add(keras.layers.Dense(10, input_dim=new_X_train.shape[1],activation='relu'))
CustomModel.add(keras.layers.Dense(3, activation='sigmoid'))

# Display the architecture
# Compile model using accuracy to measure model performance
from keras.optimizers import SGD, Adam
opt = Adam(learning_rate=0.01)
CustomModel.compile(optimizer=opt, loss='categorical_crossentropy',metrics=['accuracy'])

# Train the model
CustomModel.fit(X_train, keras.utils.np_utils.to_categorical(y_train),epochs=300, batch_size = 100, verbose=False) #fit the model using the training data
# Now check for both train and test data, how well the model learned the patterns
y_pred_train = CustomModel.predict(X_train)
classes_train=np.argmax(y_pred_train,axis=1)
y_pred_test = CustomModel.predict(X_test)
classes_test=np.argmax(y_pred_test,axis=1)

# Make confusion matrix to define tp, tn, fp, fn
confMatrix = confusion_matrix(y_test, classes_test)
cmDisplay = ConfusionMatrixDisplay(confusion_matrix = confMatrix, display_labels = ["1", "2"])
cmDisplay.plot()
plt.show()

# Calculate the scores
acc_train = accuracy_score(y_train, classes_train)
acc_test = accuracy_score(y_test, classes_test)
pre_train = precision_score(y_train, classes_train, average='macro')
pre_test = precision_score(y_test, classes_test, average='macro')
rec_train = recall_score(y_train, classes_train, average='macro')
rec_test = recall_score(y_test, classes_test, average='macro')
f1_train = f1_score(y_train, classes_train, average='macro')
f1_test = f1_score(y_test, classes_test, average='macro')

#Print the scores
print('Accuracy scores of Neural Networks are:',
'train: {:.2f}'.format(acc_train), 'and test:{:.2f}.'.format(acc_test))
print('Precision scores of Neural Networks are:',
'train: {:.2f}'.format(pre_train), 'and test:{:.2f}.'.format(pre_test))
print('Recall scores of Neural Networks are:',
'train: {:.2f}'.format(rec_train), 'and test:{:.2f}.'.format(rec_test))
print('F1 scores of Neural Networks are:',
'train: {:.2f}'.format(f1_train), 'and test: {:.2f}.'.format(f1_test))
