In [1]:
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import sklearn.neural_network as nn
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

%matplotlib widget

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

import os
import csv

os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz\\bin\\'

directory = os.getcwd()

In [2]:
accuracy = [0.0 for x in range(1,(5+1))]
precision = [0.0 for x in range(1,(5+1))]
recall = [0.0 for x in range(1,(5+1))]
f1score = [0.0 for x in range(1,(5+1))]

for i in range(1,(5+1)):
    print('Data: ' + str(i))
    with open(directory+'\\..\\Data Cleaning\\train-'+str(i)+'.csv', 'r') as csvfile:
        dataset = list(csv.reader(csvfile))

    datasetLength = len(dataset)
    dataLabels = dataset[0]
    dataset = dataset[1:datasetLength]
    npDataset = np.array(dataset, dtype='float64')

    X_train = npDataset[:,0:(len(dataLabels)-1)]
    Y_train = npDataset[:,len(dataLabels)-1]

    with open(directory+'\\..\\Data Cleaning\\test-'+str(i)+'.csv', 'r') as csvfile:
        dataset = list(csv.reader(csvfile))

    datasetLength = len(dataset)
    dataLabels = dataset[0]
    dataset = dataset[1:datasetLength]
    npDataset = np.array(dataset, dtype='float64')

    X_test = npDataset[:,0:(len(dataLabels)-1)]
    Y_test = npDataset[:,len(dataLabels)-1]


    ### model
    logisticRegr = LogisticRegression(C=1e5, max_iter=100, solver='newton-cg')
    logisticRegr.fit(X_train, Y_train)

    from sklearn.utils import check_array
    check_array(X_train, dtype='numeric')
    check_array(X_test, dtype='numeric')

    lrTrain = logisticRegr.predict(X_train)
    lrTest = logisticRegr.predict(X_test)

    ### model
    num_estimators = 101
    depth = None

    randomForest = RandomForestClassifier(n_estimators=num_estimators, max_depth=depth)
    randomForest.fit(X_train, Y_train)

    rfTrain = randomForest.predict(X_train)
    rfTest = randomForest.predict(X_test)

    X_train = np.append(X_train, [[x] for x in lrTrain], axis = 1)
    X_test = np.append(X_test, [[x] for x in lrTest], axis = 1)
    X_train = np.append(X_train, [[x] for x in rfTrain], axis = 1)
    X_test = np.append(X_test, [[x] for x in rfTest], axis = 1)

    nnModel = nn.MLPClassifier(hidden_layer_sizes=(50,100,50))
    nnModel.fit(X_train, Y_train)

    Y_pred = nnModel.predict(X_test)
    accuracy[i-1] = accuracy_score(Y_test, Y_pred)
    precision[i-1] = precision_score(Y_test, Y_pred, pos_label=float(1))
    recall[i-1] = recall_score(Y_test, Y_pred, pos_label=float(1))
    f1score[i-1] = f1_score(Y_test, Y_pred, pos_label=float(1))
    print("Accuracy:", accuracy[i-1])
    print("Precision:", precision[i-1])
    print("Recall:", recall[i-1])
    print("F1-Score:", f1score[i-1])

    # Create the confusion matrix
    cm = confusion_matrix(Y_test, Y_pred)
    ConfusionMatrixDisplay(confusion_matrix=cm).plot()

print("\nAverage:\n")
print("Accuracy:", np.mean(accuracy))
print("Precision:", np.mean(precision))
print("Recall:", np.mean(recall))
print("F1-Score:", np.mean(f1score))

Data: 1
Accuracy: 0.7080912323302402
Precision: 0.6839584996009577
Recall: 0.6579654510556622
F1-Score: 0.6707102328311485
Data: 2
Accuracy: 0.7040152632035382
Precision: 0.6811506739086702
Recall: 0.6494054468738013
F1-Score: 0.6648993618065782
Data: 3
Accuracy: 0.7106929147515393
Precision: 0.6762490087232356
Recall: 0.6669925694172859
F1-Score: 0.6715888954518605
Data: 4
