# LOAD DATASET
---

In [1]:
import numpy as np
import pandas as pd


def convert_if_possible(val):
    try:
        float_val = float(val)
        if float_val.is_integer():
            return int(float_val)
        else:
            return float_val
    except ValueError:
        return val


# MODES are "grades" or "bc"
MODE = "bc"

test_file = f"{MODE}_test.csv"
train_file = f"{MODE}_train.csv"

# use a converter to make sure that all integers in the file (wihtout .) are stored as integers in to pandas dataframe
test_df = pd.read_csv(
    test_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)
train_df = pd.read_csv(
    train_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)

# Neural Network
---

In [2]:
from FCNN import (
    initialize_network,
    predict,
    predict_multiclass,
    train_network,
    read_configuration_file,
    accuracy_score,
    F1_score,
)

train_dataset = [
    [*row[:-1], int(row[-1])] for row in train_df.itertuples(index=False, name=None)
]
test_dataset = [
    [*row[:-1], int(row[-1])] for row in test_df.itertuples(index=False, name=None)
]


NUM_EPOCHS = 100
n_inputs = len(train_dataset[0]) - 1
n_outputs = 2 if MODE == "bc" else 4
network = initialize_network(n_inputs, 1, n_outputs)
trained_network = train_network(network, train_dataset, 0.1, NUM_EPOCHS, n_outputs)

print(f"All of the hyperparameters of this NN are: {trained_network}")

>epoch=0, lrate=0.100, error=196.807
>epoch=1, lrate=0.100, error=192.955
>epoch=2, lrate=0.100, error=191.175
>epoch=3, lrate=0.100, error=187.025
>epoch=4, lrate=0.100, error=181.304
>epoch=5, lrate=0.100, error=172.599
>epoch=6, lrate=0.100, error=160.575
>epoch=7, lrate=0.100, error=146.716
>epoch=8, lrate=0.100, error=132.984
>epoch=9, lrate=0.100, error=120.706
>epoch=10, lrate=0.100, error=110.344
>epoch=11, lrate=0.100, error=101.810
>epoch=12, lrate=0.100, error=94.813
>epoch=13, lrate=0.100, error=89.041
>epoch=14, lrate=0.100, error=84.229
>epoch=15, lrate=0.100, error=80.167
>epoch=16, lrate=0.100, error=76.695
>epoch=17, lrate=0.100, error=73.691
>epoch=18, lrate=0.100, error=71.063
>epoch=19, lrate=0.100, error=68.742
>epoch=20, lrate=0.100, error=66.673
>epoch=21, lrate=0.100, error=64.812
>epoch=22, lrate=0.100, error=63.127
>epoch=23, lrate=0.100, error=61.590
>epoch=24, lrate=0.100, error=60.180
>epoch=25, lrate=0.100, error=58.878
>epoch=26, lrate=0.100, error=57.670

In [3]:
# TESTING ON UNSEEN DATA

test_predictions = list()
train_predictions = list()


if MODE == "bc":
    for row in test_dataset:
        prediction = predict(trained_network, row)
        test_predictions.append(prediction)

    for row in train_dataset:
        prediction = predict(trained_network, row)
        train_predictions.append(prediction)

    accuracy_on_test_data = accuracy_score(
        [row[-1] for row in test_dataset], test_predictions
    )
    accuracy_on_train_data = accuracy_score(
        [row[-1] for row in train_dataset], train_predictions
    )
    f1_test = F1_score([row[-1] for row in test_dataset], test_predictions)
    f1_train = F1_score([row[-1] for row in train_dataset], train_predictions)
    print(f"Accuracy on training data: {accuracy_on_train_data}")
    print(f"F1 score on training data: {f1_train}")
    print()
    print(f"Accuracy on test data: {accuracy_on_test_data}")
    print(f"F1 score on test data: {f1_test}")
else:
    for row in test_dataset:
        prediction = predict(trained_network, row)
        test_predictions.append(prediction)

    for row in train_dataset:
        prediction = predict(trained_network, row)
        train_predictions.append(prediction)

    # we now have the last four cols of test to compare to the predictions. we can define accuracy as follows:
    # (1)	Overall accuracy = (A + D) / (A + B + C + D); this is the fraction of examples that are correctly predicted with respect to the current class. Overall accuracy is generally considered a poor evaluation metric for Boolean classification tasks. If most examples do not belong to most classes, a system can achieve a high overall accuracy by trivially predicting that all examples belong to no classes.

    accuracy_on_test_data = accuracy_score(
        [row[-4:] for row in test_dataset], test_predictions
    )
    accuracy_on_train_data = accuracy_score(
        [row[-4:] for row in train_dataset], train_predictions
    )

    print(f"Accuracy on training data: {accuracy_on_train_data}")
    print()
    print(f"Accuracy on test data: {accuracy_on_test_data}")

TypeError: predict() takes 2 positional arguments but 3 were given

In [6]:
# print a table where [row[-4:] for row in train_dataset] is the lleft side, and the right side is train_predictions

pd.set_option("display.max_rows", None)
table_df = pd.DataFrame(
    {
        "Actual": [
            str(row[-n_outputs + 1 :][0] if n_outputs == 2 else row[-n_outputs:])
            for row in train_dataset
        ],
        "Predicted": train_predictions,
    }
)

table_df

Unnamed: 0,Actual,Predicted
0,"[1, 1, 1, 0]","[0, 1, 0, 0]"
1,"[1, 1, 1, 0]","[0, 1, 0, 0]"
2,"[1, 1, 1, 0]","[0, 1, 0, 0]"
3,"[1, 1, 1, 0]","[0, 1, 0, 0]"
4,"[1, 1, 0, 0]","[0, 1, 0, 0]"
5,"[1, 1, 1, 1]","[0, 1, 0, 0]"
6,"[1, 1, 0, 0]","[0, 1, 0, 0]"
7,"[1, 1, 1, 1]","[0, 1, 0, 0]"
8,"[1, 1, 1, 1]","[0, 1, 0, 0]"
9,"[1, 1, 1, 0]","[0, 1, 0, 0]"
