# LOAD DATASET
---

In [27]:
import numpy as np
import pandas as pd


def convert_if_possible(val):
    try:
        float_val = float(val)
        if float_val.is_integer():
            return int(float_val)
        else:
            return float_val
    except ValueError:
        return val


# MODES are "grades" or "bc"
MODE = "grades"

test_file = f"{MODE}_test.csv"
train_file = f"{MODE}_train.csv"

# use a converter to make sure that all integers in the file (wihtout .) are stored as integers in to pandas dataframe
test_df = pd.read_csv(
    test_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)
train_df = pd.read_csv(
    train_file, converters={i: convert_if_possible for i in range(1, 10)}, sep=" "
)

# Neural Network
---

In [28]:
from FCNN import initialize_network, predict, train_network, read_configuration_file

from helpers import compute_evaluation_metrics

train_dataset = [
    [*row[:-1], int(row[-1])] for row in train_df.itertuples(index=False, name=None)
]
test_dataset = [
    [*row[:-1], int(row[-1])] for row in test_df.itertuples(index=False, name=None)
]


NUM_EPOCHS = 1000
n_inputs = len(train_dataset[0]) - 1
n_outputs = 1 if MODE == "bc" else 4
network = initialize_network(n_inputs, 1, n_outputs)
trained_network = train_network(network, train_dataset, 0.1, NUM_EPOCHS, n_outputs)

print(f"All of the hyperparameters of this NN are: {trained_network}")

>epoch=0, lrate=0.100, error=318.784
>epoch=1, lrate=0.100, error=311.551
>epoch=2, lrate=0.100, error=302.174
>epoch=3, lrate=0.100, error=283.718
>epoch=4, lrate=0.100, error=254.433
>epoch=5, lrate=0.100, error=225.631
>epoch=6, lrate=0.100, error=202.904
>epoch=7, lrate=0.100, error=186.055
>epoch=8, lrate=0.100, error=173.535
>epoch=9, lrate=0.100, error=164.015
>epoch=10, lrate=0.100, error=156.583
>epoch=11, lrate=0.100, error=150.632
>epoch=12, lrate=0.100, error=145.755
>epoch=13, lrate=0.100, error=141.675
>epoch=14, lrate=0.100, error=138.199
>epoch=15, lrate=0.100, error=135.191
>epoch=16, lrate=0.100, error=132.553
>epoch=17, lrate=0.100, error=130.212
>epoch=18, lrate=0.100, error=128.115
>epoch=19, lrate=0.100, error=126.221
>epoch=20, lrate=0.100, error=124.497
>epoch=21, lrate=0.100, error=122.920
>epoch=22, lrate=0.100, error=121.468
>epoch=23, lrate=0.100, error=120.126
>epoch=24, lrate=0.100, error=118.879
>epoch=25, lrate=0.100, error=117.718
>epoch=26, lrate=0.100

In [29]:
# TESTING ON UNSEEN DATA

test_predictions = []
train_predictions = []

# Make predictions for both the test and train datasets
for row in test_dataset:
    prediction = predict(trained_network, row)
    test_predictions.append(prediction)

for row in train_dataset:
    prediction = predict(trained_network, row)
    train_predictions.append(prediction)

# Compute evaluation metrics for the train and test datasets
train_evals = compute_evaluation_metrics(
    [row[-n_outputs:] for row in train_dataset], train_predictions
)
test_evals = compute_evaluation_metrics(
    [row[-n_outputs:] for row in test_dataset], test_predictions
)

# Print evaluations for the train and test datasets
print(f"Evaluations for the {MODE} dataset on the train set are:")
print(train_evals)
print()
print(f"Evaluations for the {MODE} dataset on the test set are:")
print(test_evals)

Evaluations for the grades dataset on the train set are:
[{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 0.8193979933110368, 'precision': 0, 'recall': 0.0, 'F1': 0}, {'micro_accuracy': 0.9548494983277592, 'micro_precision': 1.0, 'micro_recall': 0.9137380191693291, 'micro_F1': 0.9549248747913188}, {'macro_accuracy': 0.9548494983277592, 'macro_precision': 0.75, 'macro_recall': 0.75, 'macro_F1': 0.75}]

Evaluations for the grades dataset on the test set are:
[{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'F1': 1.0}, {'accuracy': 0.8686868686868687, 'precision': 0, 'recall': 0.0, 'F1': 0}, {'micro_accuracy': 0.9671717171717171, 'micro_precision': 1.0, 'micro_recall': 0.9297297297297298, 'micro_F1': 0.96358543

In [30]:
train_res = [train_dataset[i][-n_outputs:] for i in range(len(train_dataset))]
train_predictions = train_predictions

# number of incorrect predictions
train_incorrect = sum(
    [1 for i in range(len(train_predictions)) if train_predictions[i] != train_res[i]]
)
print(f"Number of incorrect predictions on the train set: {train_incorrect}")

Number of incorrect predictions on the train set: 54
