In [1]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter('ignore')

In [2]:
# If on MacBook:
if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available
    
print(device)

mps


In [3]:
# Step 1: Create a non-linear dataset (make_moons)

# make_moons generates a two-class dataset that is non-linear and slightly challenging but simple to work with

X, y = make_moons(
    n_samples=1000
    , noise=0.2
    , random_state=27)

In [4]:
# Step 2: Split the data into training and testing sets

# Random state ensures reproducibility
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
    , random_state=27
)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((800, 2), (200, 2), (800,), (200,))

In [5]:
# Step 3: Preprocess the data
# Standardize the features to ensure faster and more stable training of the neural network

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Convert the data to PyTorch tensors for compatibility with PyTorch

# NOTE: In some classifications models, 'y' dtype needs to be a 'longTensor' (torch.long) when using CrossEntropyLoss, as this loss_fn needs an index value for the classification, hence needs an integer, not float;

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

In [9]:
# Step 4: Define the neural network model

# This is a simple feedforward neural network with 2 hidden layers
class MoonsNN(nn.Module):
    def __init__(self):
        super(MoonsNN, self).__init__()
        # Input layer: 2 features as input
        # Hidden layer 1: 16 neurons with ReLU activation
        # Hidden layer 2: 8 neurons with ReLU activation
        # Output layer: 2 neurons (one for each class) with Softmax activation
        self.fc1 = nn.Linear(2, 16)  # First hidden layer
        self.fc2 = nn.Linear(16, 8)  # Second hidden layer
        self.fc3 = nn.Linear(8, 2)   # Output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Apply ReLU activation to the first hidden layer
        x = torch.relu(self.fc2(x))  # Apply ReLU activation to the second hidden layer
        x = self.fc3(x)              # Output layer (logits)
        return x

In [11]:
# Initialize the model and move it to the selected device (CPU/GPU)
model = MoonsNN().to(device)
model

MoonsNN(
  (fc1): Linear(in_features=2, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=2, bias=True)
)

In [12]:
# Step 5: Define the loss function and optimizer
# CrossEntropyLoss is used for classification problems

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(
    model.parameters(), lr=0.01)  # Adam optimizer with a learning rate of 0.01

In [13]:
# Step 6: Train the model
# Number of epochs (iterations over the entire dataset)
n_epochs = 100

for epoch in range(n_epochs):
    # Set the model to training mode
    model.train()

    # Forward pass: compute the predictions
    y_pred = model(X_train_tensor)

    # Compute the loss
    loss_fn = criterion(y_pred, y_train_tensor)
    acc = accuracy_score(y_true=y_train_tensor.detach().cpu(), y_pred=y_pred.detach().cpu()) * 100

    # Zero the gradients from the previous step
    optimizer.zero_grad()

    # Backward pass: compute the gradients
    loss.backward()

    # Update the model parameters
    optimizer.step()
    
    '''
        ### Testing
    model.eval()
    with torch.inference_mode():
        # 1. Forward pass
        test_logits = model(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))

        # 2. Caculate loss/accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_score(y_true=y_test.detach().cpu(), y_pred=test_pred.detach().cpu()) * 100
#         test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)
    '''
    # Step 7: Test the model
    # Set the model to evaluation mode
    model.eval()
    
    # Disable gradient computation during testing for efficiency
    with torch.no_grad():
        # Forward pass on the test set
        y_test_pred = model(X_test_tensor)
        # Get the class with the highest score (logits)
        y_test_pred_classes = torch.argmax(y_test_pred, axis=1)

    # Print the loss every 10 epochs
    # Print out what's happening every 10 epochs
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
        
        
#     if (epoch + 1) % 10 == 0:
#         print(f"Epoch [{epoch + 1}/{n_epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 0.4783
Epoch [20/100], Loss: 0.3700
Epoch [30/100], Loss: 0.3063
Epoch [40/100], Loss: 0.2799
Epoch [50/100], Loss: 0.2464
Epoch [60/100], Loss: 0.2092
Epoch [70/100], Loss: 0.1661
Epoch [80/100], Loss: 0.1261
Epoch [90/100], Loss: 0.0986
Epoch [100/100], Loss: 0.0826


In [None]:
# Step 7: Test the model
# Set the model to evaluation mode
model.eval()

# Disable gradient computation during testing for efficiency
with torch.no_grad():
    # Forward pass on the test set
    y_test_pred = model(X_test_tensor)
    # Get the class with the highest score (logits)
    y_test_pred_classes = torch.argmax(y_test_pred, axis=1)
