# LOAD DATASET
---

In [27]:
import numpy as np
import pandas as pd


def convert_if_possible(val):
    try:
        float_val = float(val)
        if float_val.is_integer():
            return int(float_val)
        else:
            return float_val
    except ValueError:
        return val


# MODES are "grades" or "bc"
MODE = "grades"

test_file = f"{MODE}_test.csv"
train_file = f"{MODE}_train.csv"

# use a converter to make sure that all integers in the file (wihtout .) are stored as integers in to pandas dataframe
test_df = pd.read_csv(
    test_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)
train_df = pd.read_csv(
    train_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)

# Neural Network
---

In [28]:
from FCNN import (
    initialize_network,
    predict,
    predict_multiclass,
    train_network,
    read_configuration_file,
    accuracy_score,
    F1_score,
)

train_dataset = [
    [*row[:-1], int(row[-1])] for row in train_df.itertuples(index=False, name=None)
]
test_dataset = [
    [*row[:-1], int(row[-1])] for row in test_df.itertuples(index=False, name=None)
]


NUM_EPOCHS = 500
n_inputs = len(train_dataset[0]) - 1
n_outputs = 1 if MODE == "bc" else 4
network = initialize_network(n_inputs, 1, n_outputs)
trained_network = train_network(network, train_dataset, 0.1, NUM_EPOCHS, n_outputs)

print(f"All of the hyperparameters of this NN are: {trained_network}")

>epoch=0, lrate=0.100, error=364.435
>epoch=1, lrate=0.100, error=329.420
>epoch=2, lrate=0.100, error=326.723
>epoch=3, lrate=0.100, error=325.118
>epoch=4, lrate=0.100, error=324.031
>epoch=5, lrate=0.100, error=323.303
>epoch=6, lrate=0.100, error=322.799
>epoch=7, lrate=0.100, error=322.419
>epoch=8, lrate=0.100, error=322.095
>epoch=9, lrate=0.100, error=321.775
>epoch=10, lrate=0.100, error=321.414
>epoch=11, lrate=0.100, error=320.962
>epoch=12, lrate=0.100, error=320.344
>epoch=13, lrate=0.100, error=319.430
>epoch=14, lrate=0.100, error=317.940
>epoch=15, lrate=0.100, error=315.199
>epoch=16, lrate=0.100, error=309.436
>epoch=17, lrate=0.100, error=295.126
>epoch=18, lrate=0.100, error=266.603
>epoch=19, lrate=0.100, error=235.158
>epoch=20, lrate=0.100, error=209.583
>epoch=21, lrate=0.100, error=190.593
>epoch=22, lrate=0.100, error=176.628
>epoch=23, lrate=0.100, error=166.130
>epoch=24, lrate=0.100, error=158.008
>epoch=25, lrate=0.100, error=151.556
>epoch=26, lrate=0.100

In [29]:
# TESTING ON UNSEEN DATA

test_predictions = list()
train_predictions = list()


if MODE == "bc":
    for row in test_dataset:
        prediction = predict(trained_network, row)
        test_predictions.append(prediction)

    for row in train_dataset:
        prediction = predict(trained_network, row)
        train_predictions.append(prediction)

    accuracy_on_test_data = accuracy_score(
        [[row[-1]] for row in test_dataset], test_predictions
    )
    accuracy_on_train_data = accuracy_score(
        [[row[-1]] for row in train_dataset], train_predictions
    )
    f1_test = F1_score([row[-1] for row in test_dataset], test_predictions)
    f1_train = F1_score([row[-1] for row in train_dataset], train_predictions)
    print(f"Accuracy on training data: {accuracy_on_train_data}")
    print(f"F1 score on training data: {f1_train}")
    print()
    print(f"Accuracy on test data: {accuracy_on_test_data}")
    print(f"F1 score on test data: {f1_test}")
else:
    for row in test_dataset:
        prediction = predict(trained_network, row)
        test_predictions.append(prediction)

    for row in train_dataset:
        prediction = predict(trained_network, row)
        train_predictions.append(prediction)

    # we now have the last four cols of test to compare to the predictions. we can define accuracy as follows:
    # (1)	Overall accuracy = (A + D) / (A + B + C + D); this is the fraction of examples that are correctly predicted with respect to the current class. Overall accuracy is generally considered a poor evaluation metric for Boolean classification tasks. If most examples do not belong to most classes, a system can achieve a high overall accuracy by trivially predicting that all examples belong to no classes.

    accuracy_on_test_data = accuracy_score(
        [row[-n_outputs:] for row in test_dataset], test_predictions
    )
    accuracy_on_train_data = accuracy_score(
        [row[-n_outputs:] for row in train_dataset], train_predictions
    )

    print(f"Accuracy on training data: {accuracy_on_train_data}")
    print()
    print(f"Accuracy on test data: {accuracy_on_test_data}")

Accuracy on training data: 81.93979933110369

Accuracy on test data: 86.86868686868688


In [38]:
table_df = pd.DataFrame(
    {
        "Actual": [
            row[-n_outputs + 1 :][0] if n_outputs == 2 else row[-n_outputs:]
            for row in train_dataset
        ],
        "Predicted": train_predictions,
    }
)

table_df.head()

Unnamed: 0,Actual,Predicted
0,"[1, 1, 1, 0]","[1, 1, 1, 0]"
1,"[1, 1, 1, 0]","[1, 1, 1, 0]"
2,"[1, 1, 1, 0]","[1, 1, 1, 0]"
3,"[1, 1, 1, 0]","[1, 1, 1, 0]"
4,"[1, 1, 0, 0]","[1, 1, 0, 0]"


In [42]:
false_predictions = table_df[table_df["Actual"] != table_df["Predicted"]]
print(f"Number of false predictions: {len(false_predictions)}")

Number of false predictions: 54


In [30]:
# plot confusion matrix
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

if MODE == "bc":
    cm = confusion_matrix(
        [[row[-1]] for row in test_dataset], test_predictions, normalize="true"
    )

    ax = sns.heatmap(
        cm,
        annot=True,
        xticklabels=["0", "1"],
        yticklabels=["0", "1"],
        cmap="Blues",
        cbar=False,
    )
    ax.set_title
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    plt.show()
    



In [24]:
# print a table where [row[-4:] for row in train_dataset] is the lleft side, and the right side is train_predictions

Unnamed: 0,Actual,Predicted
0,"[1, 1, 1, 0]","[1, 1, 1, 0]"
1,"[1, 1, 1, 0]","[1, 1, 1, 0]"
2,"[1, 1, 1, 0]","[1, 1, 1, 0]"
3,"[1, 1, 1, 0]","[1, 1, 1, 0]"
4,"[1, 1, 0, 0]","[1, 1, 0, 0]"
