In [16]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split, GridSearchCV
from skorch import NeuralNetClassifier
import torch.nn as nn
import torch.nn.functional as F

In [17]:
# Redefine your model
class SimpleNNClassifier(nn.Module):
    def __init__(self, input_size, hidden1=512, hidden2=256, num_classes=10):
        super(SimpleNNClassifier, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden1)
        self.layer2 = nn.Linear(hidden1, hidden2)
        self.output = nn.Linear(hidden2, num_classes)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.output(x)
        return x

In [18]:
# Load the data
data_path = "C:\\Users\\htoll\\Desktop\\Uni\\Y3S1\\COMP4702\\report\\Cleaned_data.csv"
data = pd.read_csv(data_path)
X = data.drop(columns=["Species_Population"]).astype(np.float32)
y = data["Species_Population"].astype(np.int64)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [19]:
net = NeuralNetClassifier(
    SimpleNNClassifier,
    criterion=torch.nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,
    lr=0.001,
    batch_size=64,
    max_epochs=50,  # Increase number of epochs from 10 to 50
    module__input_size=X_train.shape[1],
    module__num_classes=len(y.unique()),
    iterator_train__shuffle=True,
    device="cuda" if torch.cuda.is_available() else "cpu",  # for GPU support
)

param_grid = {
    "module__hidden1": [
        2**n for n in range(5, 10)
    ],  # Different sizes for the first hidden layer
    "module__hidden2": [
        2**n for n in range(3, 10)
    ],  # Different sizes for the second hidden layer
}

In [20]:
# Set up GridSearchCV
grid = GridSearchCV(net, param_grid, refit=True, cv=3, scoring="accuracy")
grid.fit(X_train.values, y_train.values)  # Make sure to pass numpy arrays

# Output the results
print("Best parameters found: ", grid.best_params_)
print("Best score: ", grid.best_score_)

# Predict and evaluate the model
y_pred = grid.predict(X_test.values)
from sklearn.metrics import accuracy_score

print("Test Accuracy: ", accuracy_score(y_test, y_pred))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m6.0857[0m       [32m0.1027[0m        [35m3.6552[0m  0.1058
      2        [36m3.1593[0m       [32m0.1081[0m        [35m2.9322[0m  0.0685
      3        [36m2.8039[0m       0.1081        [35m2.6144[0m  0.0745
      4        [36m2.5349[0m       0.0649        [35m2.4516[0m  0.0562
      5        [36m2.4094[0m       0.0811        [35m2.3575[0m  0.0496
      6        [36m2.3440[0m       0.0865        [35m2.3294[0m  0.0450
      7        [36m2.3200[0m       0.0919        [35m2.3121[0m  0.0480
      8        [36m2.3083[0m       0.1081        [35m2.3061[0m  0.0394
      9        [36m2.3055[0m       [32m0.1514[0m        2.3069  0.0420
     10        [36m2.3047[0m       0.1081        [35m2.3050[0m  0.0400
     11        2.3059       0.1081        [35m2.3048[0m  0.0390
     12        [36m2.3034[0m       0.1081        