In [None]:
 #  Q1 & Q2: Setup and Reproducibility
# Import required libraries following ISLP conventions
import numpy as np, pandas as pd, random, torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from pytorch_lightning import seed_everything
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torchinfo import summary
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from pathlib import Path

# Set seeds for reproducibility across all random number generators
# This ensures consistent results across runs
seed_everything(57, workers=True)
torch.use_deterministic_algorithms(True, warn_only=True)

print("✓ All libraries imported and seeds set for reproducibility")


In [None]:
# Q1 Part A: Data Preparation - Load and split Wage dataset
# Load the Wage dataset from ISLP package
from ISLP import load_data
Wage = load_data('Wage')

# Select features and target as specified
# Predictors: year, age, education, jobclass
# Target: wage

# Encode categorical variables (education and jobclass) using one-hot encoding
# This converts categories into binary columns that neural networks can process

# Relevant Prints

In [None]:
# Q1 Part A (continued): Split data 75/25 and standardize
# Split into training (75%) and test (25%) sets
# random_state=57 ensures reproducible splits (matching global seed)


# Standardize features: subtract mean and divide by std
# IMPORTANT: Fit scaler ONLY on training data to prevent data leakage
# The test set should be transformed using training statistics



In [None]:
# Q1 Part A (continued): Convert to PyTorch tensors and create data module
# Convert numpy arrays to PyTorch tensors with appropriate dtypes
# float32 is standard for neural network training (balance of precision and speed)
# IMPORTANT: Keep targets as 1D tensors (no reshape) for regression


# Create TensorDatasets that pair features with targets


# Create SimpleDataModule with 25% validation split and batch size of 64
# validation=0.25 means 25% of training data is used for validation
# batch_size=64 means the model processes 64 samples at a time
from ISLP.torch import SimpleDataModule

# Get one batch from training data to verify shapes

# Relevant Prints


In [None]:
# Q1 Part B: Model Definition - Neural network for wage regression
class WageModel(nn.Module):
    """
    Feedforward neural network for wage prediction.
    
    Architecture:
      - Input layer: number of features from standardized data
      - Hidden layer 1: 64 units with ReLU activation and 30% dropout
      - Hidden layer 2: 32 units with ReLU activation and 30% dropout
      - Output layer: 1 unit (continuous wage prediction)
    
    Dropout prevents overfitting by randomly deactivating neurons during training.
    ReLU (Rectified Linear Unit) adds non-linearity: f(x) = max(0, x)
    """


In [None]:

# Instantiate the model with correct input dimension


# Test model with one batch to verify output shape
# Relevant Prints


# Print detailed model summary showing layers, parameters, and output shapes



In [None]:
# Q1 Part C: Training - Train the wage regression model
from ISLP.torch import SimpleModule, ErrorTracker
from pytorch_lightning.loggers import CSVLogger
from torchmetrics import MeanAbsoluteError
from torch.optim import Adam

# Create Adam optimizer with learning rate of 0.001 as specified

# Create SimpleModule for regression task
# SimpleModule.regression() automatically uses MSELoss for loss function
# metrics: Mean Absolute Error (more interpretable than MSE)


# Setup logging to track training progress
# CSVLogger saves metrics to a CSV file for later analysis
# ErrorTracker monitors both training and validation errors


# Create PyTorch Lightning Trainer
# max_epochs: number of passes through the entire training dataset
# logger: saves training history
# callbacks: ErrorTracker monitors performance
# deterministic: ensures reproducible results
from pytorch_lightning import Trainer



# Train the model
# fit() runs the training loop: forward pass, compute loss, backward pass, update weights

# Evaluate on test set

# Extract and report test MAE (rounded to 2 decimal places)


In [None]:
# Q1 Part D: Learning Curve & Interpretation
# Load training log from CSV file saved during training
# Use the logger's experiment.metrics_file_path to get the correct version path

# Extract training and validation MAE across epochs
# PyTorch Lightning logs training metrics as 'train_mae_epoch' and validation as 'valid_mae'

# Create learning curve plot

#Interpretation (print the whole thing cleanly)
print("\nInterpretation:")


In [None]:
# Q2 Part A: Data Preparation - Load image dataset
# Define image transformations following ISLP conventions
# Compose chains multiple transformations together
transform = transforms.Compose([
    transforms.Resize((64, 64)),              # Resize all images to 64x64 pixels
    transforms.ToTensor(),                     # Convert PIL Image to tensor (0-1 range)
    transforms.Normalize(mean=[0.5, 0.5, 0.5], # Normalize each RGB channel
                        std=[0.5, 0.5, 0.5])   # to range [-1, 1]
])

# Load datasets using ImageFolder
# ImageFolder expects structure: root/class_name/image_files
# It automatically assigns labels: 0 for first folder (cats), 1 for second (dogs)
train_path = 'cats_vs_dogs_dataset/train'
test_path = 'cats_vs_dogs_dataset/test'

train_image_dataset = datasets.ImageFolder(root=train_path, transform=transform)
test_image_dataset = datasets.ImageFolder(root=test_path, transform=transform)

# Create DataLoaders for batch processing
# shuffle=True randomizes training data order each epoch (prevents learning order)
# shuffle=False for test set (order doesn't matter for evaluation)

# Relevant Prints

# Get one batch to verify shapes + print 


In [None]:
# Q2 Part A (continued): Display 6 sample images with labels
# Helper function to denormalize images for display
# Our normalization: (x - 0.5) / 0.5, so to reverse: x * 0.5 + 0.5
def denormalize(tensor):
    """Convert normalized tensor back to [0, 1] range for visualization"""
    return tensor * 0.5 + 0.5

# Get a batch of images and labels

# Create figure with 2 rows, 3 columns

#Loop to denormalize and plot 

plt.suptitle('Sample Images from Cats vs Dogs Dataset', 
             fontsize=14, fontweight='bold', y=0.98)
plt.tight_layout()
plt.savefig('sample_images.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Sample images displayed and saved to 'sample_images.png'")


In [None]:
# Q2 Part A (continued): Convert to TensorDataset format
# SimpleDataModule expects TensorDatasets, so we convert from ImageFolder format

# Extract all images and labels from ImageFolder datasets
def dataset_to_tensors(dataset):
    """Convert ImageFolder dataset to tensors"""
    images_list = []
    labels_list = []
    
    # Create a DataLoader to batch process
    loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)
    
    # Get all data in one batch
    for images, labels in loader:
        images_list.append(images)
        labels_list.append(labels)
    
    return torch.cat(images_list), torch.cat(labels_list)

print("Converting datasets to tensors...")
train_images, train_labels = dataset_to_tensors(train_image_dataset)
test_images, test_labels = dataset_to_tensors(test_image_dataset)

# Create TensorDatasets
train_tensor_dataset = TensorDataset(train_images, train_labels)
test_tensor_dataset = TensorDataset(test_images, test_labels)

# Create SimpleDataModule with 25% validation split
# This will automatically split training data into train/validation

#Relevant Prints

# Verify batch shape from DataModule

#Relevant Prints 


In [None]:
# Q2 Part B: Model Definition - Convolutional Neural Network
class CatDogCNN(nn.Module):
    """
    Convolutional Neural Network for binary image classification.
    
    Architecture:
      Conv Block 1:
        - Conv2d: 3→16 channels, 3×3 kernel, padding=1 (preserves size)
        - ReLU activation
        - MaxPool2d: 2×2 (reduces spatial size by half: 64×64 → 32×32)
      
      Conv Block 2:
        - Conv2d: 16→32 channels, 3×3 kernel, padding=1
        - ReLU activation
        - MaxPool2d: 2×2 (reduces size: 32×32 → 16×16)
      
      Fully Connected Layers:
        - Flatten: 32×16×16 = 8192 → flattened vector
        - Linear: 8192 → 128 neurons
        - ReLU activation
        - Dropout: 30% regularization
        - Linear: 128 → 2 (output logits for 2 classes)
    
    Why CNNs for images?
      - Convolutional layers detect spatial patterns (edges, textures, shapes)
      - Pooling layers reduce dimensionality while preserving features
      - Local connectivity exploits spatial structure unlike fully connected layers
    """
 

In [None]:

# Instantiate the model

# Test with one batch + print output shape 

#Print model summary

# Print detailed model summary



In [None]:
# Q2 Part C: Training - Train the CNN classifier

# Create Adam optimizer with learning rate of 0.001 as specified
from torch.optim import Adam

# Create SimpleModule for classification
# num_classes=2: binary classification (cat vs dog)
# SimpleModule.classification() automatically uses:
#   - loss_fn: CrossEntropyLoss (standard for classification)
#   - metrics: Accuracy (percentage of correct predictions)

# Setup logging

# Create Trainer for CNN

# Train the CNN

# Evaluate on test set

# Extract and report test accuracy (rounded to 2 decimal places)



In [None]:
# Q2 Part D: Learning Curve & Interpretation
# Load CNN training log from CSV file saved during training

# Extract training and validation accuracy

# PyTorch Lightning logs training accuracy as 'train_accuracy_epoch' and validation as 'valid_accuracy'

# Create learning curve plot

# Interpretation (print the whole thing cleanly)
print("\nInterpretation:")
