In [12]:
%pip install torch torchvision matplotlib pandas numpy seaborn scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [13]:
import torch
from torchvision import transforms
import torch.nn as nn
from torch.utils.data import DataLoader
import time
from datetime import datetime
import os
from collections import Counter
import numpy as np
import json
import matplotlib.pyplot as plt
from src.data import WasteClassificationDataset
from src.helper_functions import plot_random_image_and_label, print_model_parameter_breakdown, save_model, save_config_to_json, save_run_to_csv
from src.model import NeuralNetwork, train_model, evaluate_model
from src.plots import plot_loss_curves, plot_accuracy_curves, plot_confusion_matrix, plot_combined_metrics, plot_f1_curves, plot_precision_curves, plot_recall_curves
from src.metrics import print_metrics_summary, print_classification_report
from src.config import TRANSFORM_CONFIG, MODEL_CONFIG, MODEL_ARCHITECTURE_CONFIG
from src.helper_functions import EarlyStopping
from src.config import calculate_input_size

In [14]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


# Creating Custom Dataset:

A custom Dataset class must implement three functions: __init__, __len__, and __getitem__

In [15]:
# train_transforms = transforms.Compose([
#     transforms.Resize(TRANSFORM_CONFIG["image_size"]),
#     transforms.ToTensor(),
#     transforms.Normalize(
#         mean=TRANSFORM_CONFIG["normalize_mean"], 
#         std=TRANSFORM_CONFIG["normalize_std"]
#     )
# ])

# test_transforms = transforms.Compose([
#     transforms.Resize(TRANSFORM_CONFIG["image_size"]),
#     transforms.ToTensor(),
#     transforms.Normalize(
#         mean=TRANSFORM_CONFIG["normalize_mean"], 
#         std=TRANSFORM_CONFIG["normalize_std"]
#     )
# ])

In [16]:
# train_dataset = WasteClassificationDataset("./data/train", transform=train_transforms)
# test_dataset = WasteClassificationDataset("./data/test", transform=test_transforms)

# train_loader = DataLoader(train_dataset, batch_size=MODEL_CONFIG["batch_size"], shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=MODEL_CONFIG["batch_size"], shuffle=False)



In [17]:
# plot_random_image_and_label(
#     train_dataset, 
#     train_dataset.classes,
#     mean=TRANSFORM_CONFIG["normalize_mean"],
#     std=TRANSFORM_CONFIG["normalize_std"]
# )

In [18]:
# # Plot sample images from each class using the plot_stuff method
# train_dataset.plot_stuff(num_samples_per_class=4, show_transformed=True)


# EDA

In [19]:
# labels = [label for _, label in train_dataset]
# class_counts = Counter(labels)
# class_names = train_dataset.classes
# counts = {class_names[k]: v for k, v in class_counts.items()}
# counts
# # no class imbalance

In [20]:
# def plot_average_images(dataset, classes):
#     # Initialize sums
#     # Assuming images are 224x224 based on your config
#     class_sums = {c: np.zeros((224, 224, 3)) for c in classes}
#     class_counts = {c: 0 for c in classes}

#     print("Calculating average images... this might take a minute.")
    
#     for img, label_idx in dataset:
#         # Image comes in as Tensor (C, H, W) -> Convert to numpy (H, W, C)
#         img = img.permute(1, 2, 0).numpy()
        
#         # If your images are already normalized (mean=0, std=1), 
#         # they might look weird. Un-normalize them for visualization if needed.
#         # For just checking structure, raw values are fine.
        
#         label_name = classes[label_idx]
#         class_sums[label_name] += img
#         class_counts[label_name] += 1

#     plt.figure(figsize=(15, 5))
#     for i, class_name in enumerate(classes):
#         if class_counts[class_name] > 0:
#             avg_img = class_sums[class_name] / class_counts[class_name]
            
#             # Normalize to 0-1 for display
#             avg_img = (avg_img - avg_img.min()) / (avg_img.max() - avg_img.min())
            
#             plt.subplot(1, 4, i+1)
#             plt.imshow(avg_img)
#             plt.title(f"Average {class_name}")
#             plt.axis('off')
#     plt.show()

# # Run the function
# plot_average_images(train_dataset, train_dataset.classes)

In [21]:
# from sklearn.manifold import TSNE
# import seaborn as sns
# import pandas as pd

# def visualize_embeddings(dataset, num_samples=500):
#     print("Extracting data for t-SNE...")
#     images = []
#     labels = []
    
#     # Get a subset to save time
#     indices = np.random.choice(len(dataset), num_samples, replace=False)
    
#     for idx in indices:
#         img, label = dataset[idx]
#         # Flatten the image: (3, 224, 224) -> 150528
#         images.append(img.numpy().flatten())
#         labels.append(dataset.classes[label])
    
#     # Convert to numpy array
#     X = np.array(images)
    
#     # Run t-SNE
#     print("Running t-SNE (this converts 150k pixels -> 2 coordinates)...")
#     tsne = TSNE(n_components=2, random_state=42, init='pca', learning_rate='auto')
#     X_embedded = tsne.fit_transform(X)
    
#     # Plot
#     df = pd.DataFrame(X_embedded, columns=['x', 'y'])
#     df['label'] = labels
    
#     plt.figure(figsize=(10, 8))
#     sns.scatterplot(data=df, x='x', y='y', hue='label', palette='tab10', alpha=0.7)
#     plt.title("t-SNE Projection of Raw Images")
#     plt.show()

# # Run it (might take 1-2 mins)
# visualize_embeddings(train_dataset)

# Introduction
- summarize your work, clearly stating the project's objectives. 

<div style = "color:blue">
    We are given a Waste Dataset, divided into training and test set with each having for class labels (glass, metal, paper, and plastic) and our goal is to develop a Deep Neural Network to classify images correctly into one of these types. 
    For this project, I will be using PyTorch for my Implementation
</div>

# Deep Learning Architecture:

Explain your Deep Learning Architecture: What kind of Deep Learning architecture did you use in your project, and why? Number of layers? Number of hidden units? etc.


<div style = "color:purple">
https://medium.com/@sahin.samia/mastering-the-basics-of-torch-nn-a-comprehensive-guide-to-pytorchs-neural-network-module-9f2d704e8c7f <br>

torch.nn = It provides a rich collection of tools to define, manage, and manipulate deep learning models efficiently. With torch.nn, you can construct everything from simple feedforward networks to highly complex architectures like convolutional and recurrent neural networks. <br>

Every model in PyTorch is essentially a subclass of nn.Module, making it the essential blueprint for neural network design. It provides:

Initialization (__init__): Define the layers and components of your network. <br>
Forward Pass (forward): Specify how data flows through the layers of your network. <br>
Parameter Management: Automatically tracks and optimizes model parameters.<br>

<br>
- Common Layers in nn <br>

1. nn.Linear: Fully Connected Layer <br>
Purpose: Applies a linear transformation: y = xW^T + b <br>
Common Use: Used in feedforward networks for dense connections between layers.<br>

2. nn.ReLU: Activation Function <br>
Purpose: Applies the ReLU function: ReLU(x)=max⁡(0,x) <br>
Common Use: Adds non-linearity after linear <br>

3. nn.Dropout: Dropout Regularization <br>
Purpose: Randomly sets a fraction of input neurons to zero during training to prevent overfitting. <br>
Common Use: Used after fully connected layers or in deeper architectures. <br>
</div>

In [22]:
# image, label = train_dataset[0]
# image.shape # need to flatten the image



# Loss function:

Which loss function did you select to use and why?


Plot the change in your loss function (both training loss and test loss) vs epoch. Your code shall generate the “Change of Training and Test losses vs Epoch’ graph

Discuss these loss curves

<div style = "color:purple">
    Used to quantify the difference between predicted and actual. <br><br>
    Common Activation Functions <br>
    1. nn.MSELoss: Mean Squared Error Loss<br>
    Purpose: Measures the average squared difference between the predicted values and the actual target values. <br>
    Common Use: Used in regression tasks where the goal is to predict continuous values. <br><br>
    2. nn.CrossEntropyLoss: Cross-Entropy Loss <br>
    Purpose: Combines nn.LogSoftmax and nn.NLLLoss to calculate the cross-entropy between predicted probabilities and true class labels. <br>
    Common Use: Used in multi-class classification tasks. <br><br>
    3. nn.BCELoss: Binary Cross-Entropy for binary classification problems. <br><br>
    4. nn.BCEWithLogitsLoss: Combines a Sigmoid activation with Binary Cross-Entropy, more numerically stable than using nn.BCELoss with a separate Sigmoid.<br><br>
    5. nn.HingeEmbeddingLoss: Useful for tasks like similarity learning or when using SVM-like objectives.<br><br>
    6. nn.SmoothL1Loss: Combines MSE and L1 losses to handle outliers more gracefully.<br><br>
</div>


<div style = "color:darkblue">
    Optimizers and torch.optim Integration <br><br>
    Models built with torch.nn automatically track their trainable parameters (e.g., weights and biases). <br>
    These parameters are passed to an optimizer from torch.optim to compute and apply parameter updates. <br><br>
    torch.nn defines the structure and forward computation of the model. <br>
    torch.optim takes care of adjusting the model's parameters during training based on the gradients computed by backpropagation. <br><br>
    torch.nn computes the forward pass and loss.<br>
    Gradients are computed via loss.backward().<br>
    torch.optim updates parameters using optimizer.step().<br><br>
    1. Stochastic Gradient Descent (SGD)<br>
        Description: Updates parameters by moving them in the direction of the negative gradient of the loss function.<br>
        Configuration:<br>
        lr (learning rate): Controls the step size for updates.<br>
        momentum: Adds inertia to the updates, helping to overcome small local minima.<br><br>
    2.  Adam Optimizer <br>
        Description: Combines the benefits of SGD with momentum and adaptive learning rates. <br>
        Configuration: <br>
        lr: Learning rate.<br>
        betas: Coefficients for computing moving averages of gradient and its square (default: (0.9, 0.999)).<br>
        eps: Small value to prevent division by zero.<br><br>
    3. RMSprop<br>
        Description: Similar to Adam but uses only squared gradients to adjust learning rates.<br>
        Common Use: Suitable for recurrent neural networks.<br><br>
    4. Adagrad: Adjusts learning rates for individual parameters based on their updates.<br><br>
    5. AdamW: A variant of Adam with decoupled weight decay for better generalization.<br><br>
</div>

<img src="./images/hyperparametersopt.png" alt="Machine Learning Recipe" width="800">

<img >

# Hyperparameters

<img src = "./images/hyperparameters.png">


The number of layers


Number of nodes at each layer


Learning rate

<div style = "blue">
We shoud slowly reduce alpha

<img src = "./images/lr1.png">
<img src = "./images/lr2.png">

</div>


“Mini batch” size
- What is the mini-batch size you used in your code?


- Why did you select this value?



Activation functions
- What is the activation function(s) used in your code?



- Why did you select them to use?



Optimization function
- Which optimization function did you use? Why?



- What are the optimization parameters used?


Regularization function 
- Did you use any regularization function?




- Why used/ not used?




- If used, what are the regularization parameters?




Epochs
- What is the number of epochs used?




- Why did you select this value?





In [23]:
config_path = 'hyperparameters.json'
with open(config_path, 'r') as f:
    hyperparameter_configs = json.load(f)

In [24]:
best_test_accuracy = 0.0
best_run_id = None
best_run_dir = None
RUNS_BASE_DIR = "runs"
os.makedirs(RUNS_BASE_DIR, exist_ok=True)
for run_idx, hp_config in enumerate(hyperparameter_configs):
    # Create unique run ID and directory
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    run_id = f"run_{run_idx+1}_{timestamp}"
    run_dir = os.path.join(RUNS_BASE_DIR, run_id)
    os.makedirs(run_dir, exist_ok=True)
    
    print(f"\n{'='*60}")
    print(f"Starting {run_id}")
    print(f"Run directory: {run_dir}")
    print(f"Config: {hp_config}")
    print(f"{'='*60}\n")
    
    # Update configs
    MODEL_CONFIG["learning_rate"] = hp_config["learning_rate"]
    MODEL_CONFIG["batch_size"] = hp_config["batch_size"]
    MODEL_CONFIG["epochs"] = hp_config["epochs"]
    MODEL_CONFIG["weight_decay"] = hp_config["weight_decay"]
    MODEL_ARCHITECTURE_CONFIG["hidden_sizes"] = hp_config["hidden_sizes"]
    MODEL_ARCHITECTURE_CONFIG["dropout_rates"] = hp_config["dropout_rates"]
    TRANSFORM_CONFIG["image_size"] = tuple(hp_config["image_size"])

    train_transforms = transforms.Compose([
        transforms.Resize(TRANSFORM_CONFIG["image_size"]),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=TRANSFORM_CONFIG["normalize_mean"], 
            std=TRANSFORM_CONFIG["normalize_std"]
        )
    ])

    test_transforms = transforms.Compose([
        transforms.Resize(TRANSFORM_CONFIG["image_size"]),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=TRANSFORM_CONFIG["normalize_mean"], 
            std=TRANSFORM_CONFIG["normalize_std"]
        )
    ])

    train_dataset = WasteClassificationDataset("./data/train", transform=train_transforms)
    test_dataset = WasteClassificationDataset("./data/test", transform=test_transforms)
    
    train_loader = DataLoader(train_dataset, batch_size=MODEL_CONFIG["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=MODEL_CONFIG["batch_size"], shuffle=False)
    
    model = NeuralNetwork(
        input_size=MODEL_ARCHITECTURE_CONFIG["input_size"],
        hidden_sizes=MODEL_ARCHITECTURE_CONFIG["hidden_sizes"],
        num_classes=MODEL_ARCHITECTURE_CONFIG["num_classes"],
        dropout_rates=MODEL_ARCHITECTURE_CONFIG["dropout_rates"]
    ).to(device)
    
    # optimizer = torch.optim.Adam(model.parameters(), lr=MODEL_CONFIG["learning_rate"])
    optimizer = torch.optim.AdamW(model.parameters(), lr=MODEL_CONFIG["learning_rate"], weight_decay=MODEL_CONFIG.get("weight_decay", 0.01))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min',           # minimize the loss
        factor=0.5,           # reduce LR by half
        patience=5,           # wait 5 epochs before reducing
    )

    criterion = nn.CrossEntropyLoss()

    es_config = hp_config.get("early_stopping", {})
    early_stopping = EarlyStopping(
        patience=es_config.get("patience", 7),
        min_delta=es_config.get("min_delta", 0.001),
        mode=es_config.get("mode", "min"),
        restore_best_weights=es_config.get("restore_best_weights", True)
    )
 
    start_time = time.time()
    history = train_model(      
        model=model,
        train_loader=train_loader,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        num_epochs=MODEL_CONFIG["epochs"],
        test_loader=test_loader,
        verbose=True,
        scheduler=scheduler,
        early_stopping=early_stopping
    )
    total_time = time.time() - start_time
    
    final_train_metrics = {
        'loss': history['train_losses'][-1],
        'accuracy': history['train_accuracies'][-1],
        'f1_macro': history['train_f1_macro'][-1],
        'precision_macro': history['train_precision_macro'][-1],
        'recall_macro': history['train_recall_macro'][-1],
    }
    
    final_test_metrics = {
        'loss': history['test_losses'][-1],
        'accuracy': history['test_accuracies'][-1],
        'f1_macro': history['test_f1_macro'][-1],
        'precision_macro': history['test_precision_macro'][-1],
        'recall_macro': history['test_recall_macro'][-1],
    }
    
    plot_loss_curves(history, save_path=os.path.join(run_dir, "loss_curve.png"), show=False)
    plot_accuracy_curves(history, save_path=os.path.join(run_dir, "accuracy_curve.png"), show=False)
    plot_f1_curves(history, save_path=os.path.join(run_dir, "f1_curve.png"), show=False)
    plot_precision_curves(history, save_path=os.path.join(run_dir, "precision_curve.png"), show=False)
    plot_recall_curves(history, save_path=os.path.join(run_dir, "recall_curve.png"), show=False)
    
    test_eval = evaluate_model(model, test_loader, criterion, device)
    plot_confusion_matrix(
        test_eval['y_true'], 
        test_eval['y_pred'],
        class_names=train_dataset.classes,
        save_path=os.path.join(run_dir, "confusion_matrix.png"),
        show=False
    )
    
    model_path = save_model(model, run_dir, final_test_metrics, model_name="model.pth")
    
    full_config = {
        'hyperparameters': hp_config,
        'model_config': MODEL_CONFIG,
        'architecture_config': MODEL_ARCHITECTURE_CONFIG,
        'transform_config': TRANSFORM_CONFIG,
    }
    save_config_to_json(full_config, run_dir, filename="config.json")
    
    run_data = {
        'run_id': run_id,
        'run_dir': run_dir,  # Include directory path
        'timestamp': datetime.now().isoformat(),
        'total_time': total_time,
        'hyperparameters': hp_config,
        'config': {
            'input_size': MODEL_ARCHITECTURE_CONFIG["input_size"],
            'num_classes': MODEL_ARCHITECTURE_CONFIG["num_classes"],
        },
        'train_metrics': final_train_metrics,
        'test_metrics': final_test_metrics,
        "notes": hp_config.get("notes", "")
    }
    save_run_to_csv(run_data, csv_path="model_runs.csv")
    
    if final_test_metrics['accuracy'] > best_test_accuracy:
        best_test_accuracy = final_test_metrics['accuracy']
        best_run_id = run_id
        best_run_dir = run_dir
        # Optionally create a symlink or copy to "best_model" directory
        print(f"\n*** New best model! Test Accuracy: {best_test_accuracy:.4f} ***\n")
    
    print(f"Run {run_id} completed in {total_time:.2f} seconds")
    print(f"Test Accuracy: {final_test_metrics['accuracy']:.4f}")
    print(f"All artifacts saved to: {run_dir}\n")

print(f"\n{'='*60}")
print(f"Hyperparameter search complete!")
print(f"Best run: {best_run_id}")
print(f"Best run directory: {best_run_dir}")
print(f"Best test accuracy: {best_test_accuracy:.4f}")
print(f"{'='*60}")


Starting run_1_20251129_193314
Run directory: runs/run_1_20251129_193314
Config: {'learning_rate': 0.0005, 'batch_size': 32, 'epochs': 60, 'hidden_sizes': [512, 256, 128], 'dropout_rates': [0.4, 0.4, 0.3], 'weight_decay': 0.01, 'notes': 'Lower Dropout and Higher Hidden Size', 'image_size': [64, 64], 'early_stopping': {'patience': 10, 'min_delta': 0.0009, 'mode': 'min', 'restore_best_weights': True}}

[Dataset] Loaded 1540 examples from 4 classes: ['glass', 'metal', 'paper', 'plastic']
[Dataset] Loaded 100 examples from 4 classes: ['glass', 'metal', 'paper', 'plastic']
Epoch [1/60] - LR: 0.000500
  Train - Loss: 1.5001, Acc: 0.2857, F1: 0.2837
  Test  - Loss: 1.2704, Acc: 0.4700, F1: 0.4403
  Early Stop Counter: 0/10
--------------------------------------------------
Epoch [2/60] - LR: 0.000500
  Train - Loss: 1.3572, Acc: 0.3519, F1: 0.3497
  Test  - Loss: 1.2395, Acc: 0.3800, F1: 0.3230
  Early Stop Counter: 0/10
--------------------------------------------------
Epoch [3/60] - LR: 0

<img src="./images/biasvariance.png" alt="Machine Learning Recipe" width="800">


<p> I got high training data but low bias... I need to increase my model size? L1 is not typically used in NN,</p>

We donot use dropout regularization in test

<li>
<ul>Data Augmentation</ul>
<ul>Early Stopping</ul>
<ul>Normalization of train sets</ul>
</li>

# Explanation of the developed code

Explain the essential functions/classes you developed in your code.

Explain how to run your code if you are using the “command prompt” or any environment other than VS Code and Jupyter. 

<div style = "color:blue">
- WasteClassificationDataset Class
This class was was created by inheriting the Dataset Class provided by PyTorch. Three required methods were created which include _init__, __len__ and __getitem__ which are required by PyTorch Dataset interface.

The images were organized by class folders(paper, metal, glass, plastic), which made it easier to create class labels for the dataset.

This could've been done using base Python as well, but PyTorch provides additional helpful tooks like Automatic Differentiation for back propogation, optimizers etc which is why we need PyTorch to convert the dataset into tensors.

<div>


# Hyperparameter Optimization 

Optimize your “learning rate” and “number of layers
What is the effect of “optimization” on “learning rate” and “number of layers”?  (indicate starting values and the optimized values)





How did you optimize your hyperparameters?






What are the optimization results (show the numerical values and state how you improved them)




# Discuss the Performance of Your Model:

- Confusion matrix 

What is the metric selected to evaluate your model?


What is the best metric value obtained? 


What kind of error(s) do you have in your model? Discuss your errors, state what they mean. 


Due to the errors stated above, what shall be done to improve your model’s performance?


Which techniques have you applied to improve your model’s performance? 


Do you have overfitting or underfitting/overfitting in your model? Which processes did you apply to prevent underfitting/overfitting? 



# Error analysis and misclassified examples

Please discuss the error analysis you performed in your project.


# Deep Learning Strategies

What are the Deep Learning Strategies you performed in this project? 




If they exist, why did you select these strategies to apply? 


# Discussion and Conclusion

Discuss your results and draw conclusions from your work. State what you learned in this project. 
