<a href="https://colab.research.google.com/github/christophergaughan/PyTorch/blob/main/PyTorch_cvlassification_exercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Device agnostic code

In [None]:
import torch
from torch import nn

# Setup **device agnostic code**
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
!nvidia-smi

In [None]:
# Create a dataset with Scikit-Learn's make_moons()
import sklearn
from sklearn.datasets import make_moons
# Make 1500 circles
n_samples = 1000

# Create circles
X, y = make_moons(n_samples,
                    noise = 0.1,# we'll increase the noise in this data set
                    random_state=42)


In [None]:
# Turn data into a DataFrame
import pandas as pd
moons = pd.DataFrame({'X1': X[:, 0],
                        'X2': X[:, 1],
                        'label': y})
moons.head()

In [None]:
import matplotlib.pyplot as plt

plt.scatter(x=X[:, 0],
            y=X[:, 1],
            c=y,
            cmap=plt.cm.RdYlBu);

In [None]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

# Normalize features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# The data is in numpy arrays, we need to turn into pytorch tensors
import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

In [None]:
# Split data randomly
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)


In [None]:
# Define the neural network model for binary classification
class MoonModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        # Input layer to the first hidden layer (2 input features, 64 hidden units)
        self.layer_1 = nn.Linear(2, 64)
        self.relu_1 = nn.ReLU()  # Apply ReLU activation for non-linearity

        # Second hidden layer (64 hidden units)
        self.layer_2 = nn.Linear(64, 64)
        self.relu_2 = nn.ReLU()  # Apply ReLU activation for non-linearity

        # Output layer (1 unit for binary classification logits)
        self.layer_3 = nn.Linear(64, 1)

    def forward(self, x):
        # Pass the input through the layers with activations
        x = self.relu_1(self.layer_1(x))
        x = self.relu_2(self.layer_2(x))
        return self.layer_3(x)  # Return raw logits for use with BCEWithLogitsLoss



In [None]:
# Instantiate the model and move it to the appropriate device (GPU)
model_1a = MoonModelV2().to(device)

# Ensure training and testing data are also on the same device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)


In [None]:
# Function to initialize weights for the linear layers
def initialize_weights(m):
    if isinstance(m, nn.Linear):
        # Xavier initialization for weights (good for layers with ReLU activations)
        nn.init.xavier_uniform_(m.weight)
        # Set biases to zero
        nn.init.zeros_(m.bias)

# Apply the weight initialization to all layers of the model
model_1a.apply(initialize_weights)


In [None]:
# Define the Binary Cross-Entropy Loss with Logits
# This loss function is designed for binary classification and expects raw logits
loss_fn = nn.BCEWithLogitsLoss()

# Use the Adam optimizer with a learning rate of 0.01 for efficient training
# Adam dynamically adjusts learning rates for each parameter
optimizer = torch.optim.Adam(params=model_1a.parameters(), lr=0.01)


In [None]:
# Set manual seeds for reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Define the number of epochs for training- this is an easy model so it doesn't require much computing power
epochs = 100

for epoch in range(epochs):
    # Set the model to training mode
    model_1a.train()

    # Perform a forward pass to calculate logits
    y_logits = model_1a(X_train).squeeze()  # Squeeze to ensure dimensions match

    # Calculate the loss using BCEWithLogitsLoss
    loss = loss_fn(y_logits, y_train.squeeze())

    # Zero gradients to prevent accumulation
    optimizer.zero_grad()

    # Backpropagate the loss to compute gradients
    loss.backward()

    # Update model weights using the optimizer
    optimizer.step()

    # Set the model to evaluation mode for testing
    model_1a.eval()
    with torch.no_grad():  # Disable gradient computation for efficiency
        # Forward pass for the test data
        test_logits = model_1a(X_test).squeeze()  # Logits for test data

        # Calculate test loss
        test_loss = loss_fn(test_logits, y_test.squeeze())

        # Convert logits to probabilities and round to binary predictions
        test_pred = torch.round(torch.sigmoid(test_logits))

        # Calculate test accuracy
        test_acc = (test_pred == y_test.squeeze()).float().mean().item() * 100

    # Print results every 100 epochs
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss:.4f}, Test Loss = {test_loss:.4f}, Test Acc = {test_acc:.2f}%")

In [None]:
model_1a.eval()
with torch.inference_mode():
    y_preds = torch.round(torch.sigmoid(model_1a(X_test))).squeeze()
y_preds[:10], y_test[:10]

In [None]:
import requests
from pathlib import Path

# 1. (Optional) Remove the existing (likely invalid) helper_functions.py
# !rm helper_functions.py

# 2. Use the *raw* GitHub URL
url_to_download = "https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py"

if Path("helper_functions.py").is_file():
    print("helper_functions.py already exists, skipping download")
else:
    print("Downloading helper_functions.py")
    request = requests.get(url_to_download)
    with open("helper_functions.py", "wb") as f:
        f.write(request.content)


In [None]:
from helper_functions import plot_predictions, plot_decision_boundary


In [None]:
# plot decision Boundaries
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_1a, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_1a, X_test, y_test)