In [None]:
# Automatically reload modules when they change
%reload_ext autoreload
%autoreload 2

Loading dataset

In [None]:
from nn import *
from sklearn.datasets import load_wine
import matplotlib.pyplot as plt
import pandas as pd
import copy


In [None]:

X, y = wine_data = load_wine(return_X_y=True, as_frame=True)

In [None]:
X.shape, y.shape

In [None]:
X.describe(), y.describe()

In [None]:
X.info(), y.info()

Data exploration: Domain Knowledge, Data Cleaning/Preprocessing, Feature Creation/Construction/Transformation (Sums, Average, Check Existence, Log/Exp), Feature Encoding/Standardization (Scaling, One-Hot-Encodind, Flattening), Feature Selection (Identify Most Relevant Features)

In [None]:
plt.scatter(X[["alcohol"]], X[["ash"]], c=y, cmap='bwr', edgecolors='k')
plt.xlabel("Alcohol")
plt.ylabel("Ash")
plt.title("Wine Dataset")
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
min_max_scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
# X_scaled = pd.DataFrame(min_max_scaler.fit_transform(X_scaled), columns=X.columns)
X_scaled.describe()

In [None]:
y = pd.get_dummies(y)
y.describe()

Data Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.25, random_state=42
)
print(len(X_train), len(y_train), len(X_test), len(y_test), len(X_val), len(y_val))

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = X_train.values.tolist(), X_val.values.tolist(), X_test.values.tolist(), y_train.values.tolist(), y_val.values.tolist(), y_test.values.tolist()

In [None]:
input_dim = X.shape[1]
output_dim = y.shape[1]

print(input_dim, output_dim)

Selecting Model

In [None]:
nn = NeuralNetwork([
    FullyConnected(input_dim, 16, init=RandomNormal(-0.1, 0.1)),
    ReLU(),
    FullyConnected(16, 32, init=RandomNormal(-0.1, 0.1)),
    Dropout(0.2),
    ReLU(),
    FullyConnected(32, 64, init=RandomNormal(-0.1, 0.1)),
    Dropout(0.2),
    ReLU(),
    FullyConnected(64, output_dim, init=RandomNormal(-0.1, 0.1)),
    Softmax()
])

Hyperparameter Turning (Use Cross Validation) 
* Loss Function
* Optimizer (learning_rate, weight_decay, ...)
* Number of Epoch
* Model Specific (Initializations, Number of Neurons/Layers)

In [None]:
learning_rate = 0.003
weight_decay = 0.00

num_epochs = 1000

loss_fn = CrossEntropyLoss()
optimizer = Adam(learning_rate=learning_rate, weight_decay=weight_decay)
metric = Accuracy()

Train Model With Best Hyperparameters (Use Cross Validation to Select the Best Model)

In [None]:
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
# Debug block for NN that outputs softmax probabilities

import numpy as np
from sklearn.metrics import accuracy_score

# 1. Take a small batch
X_sample = X_train[:5]
Y_sample = y_train[:5]  # assume one-hot encoded

nn.init_params()

# 2. Forward pass (network already outputs softmax probs)
Yhat = nn.forward(X_sample)
print("NN output (softmax probabilities) - first 5 samples:")
for i, row in enumerate(Yhat):
    print(f"Sample {i}: {row}")

# 3. Labels
print("\nLabels (first 5 samples):")
for i, row in enumerate(Y_sample):
    print(f"Sample {i}: {row}")

# 4. Compute loss
loss_val = loss_fn.loss(Y_sample, Yhat)
print("\nLoss value:", loss_val)

# 5. Backward pass (gradient w.r.t softmax outputs)
grad = loss_fn.backward()
print("\nGradients (first 5 samples):")
for i, row in enumerate(grad[:5]):
    print(f"Sample {i}: {row}")

# 6. Gradient statistics
grad_array = np.array(grad)
print("\nGradient stats: min =", grad_array.min(), 
      "max =", grad_array.max(), 
      "mean =", grad_array.mean())

# 7. Predicted classes and accuracy
y_pred = np.argmax(Yhat, axis=1)
y_true = np.argmax(Y_sample, axis=1)
acc = accuracy_score(y_true, y_pred)
print("\nPredicted classes:", y_pred)
print("True classes:     ", y_true)
print("Validation accuracy:", acc)


In [125]:
nn.init_params()

best_model = None
best_val_acc = 0.0

for epoch in range(num_epochs):
    nn.train(X_train, y_train, loss_fn, optimizer)
    loss = loss_fn.loss_value

    if (epoch + 1) % 100 == 0 or epoch == 0:
        prob_distribution = nn.forward(X_val, training=False)
        pred_classes = np.array(prob_distribution).argmax(axis=1)
        true_classes = np.array(y_val).argmax(axis=1)
        val_acc = accuracy_score(true_classes, pred_classes)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model = copy.deepcopy(nn)

Epoch 900/1000, Loss: 0.1452, Val Acc: 0.9167
Epoch 1000/1000, Loss: 0.1818, Val Acc: 0.9444


Evaluate the Model with Test

In [126]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np

prob_distribution = best_model.forward(X_test, training=False)
pred_classes = np.array(prob_distribution).argmax(axis=1)
true_classes = np.array(y_test).argmax(axis=1)
accuracy = accuracy_score(true_classes, pred_classes)

f1 = f1_score(true_classes, pred_classes, average='weighted')
precision = precision_score(true_classes, pred_classes, average='weighted')
recall = recall_score(true_classes, pred_classes, average='weighted')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test F1 Score: {f1:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")

Test Accuracy: 0.9444
Test F1 Score: 0.9423
Test Precision: 0.9514
Test Recall: 0.9444
