#### Andrew Taylor
#### atayl136
#### EN705.601.83 Applied Machine Learning
## Homework 14 - CNN for Intel Image Classification Dataset

In [4]:
# Question 1: Classify Intel Dataset with a CNN

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

class IntelImageClassifier(nn.Module):
    def __init__(self):
        super(IntelImageClassifier, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 9 * 9, 512)
        self.fc2 = nn.Linear(512, 6)

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with max pooling
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))

        # Flatten the output for the fully connected layers
        x = x.view(-1, 256 * 9 * 9)

        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x

# Hyperparameters
learning_rate = 0.001
batch_size = 64
num_epochs = 10


import os
import cv2
from torch.utils.data import Dataset

class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):  # Make sure this matches with the argument names
        super(CustomImageDataset, self).__init__()
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.labels = {class_name: i for i, class_name in enumerate(self.classes)}
        self.images = []
        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            self.images += [(os.path.join(class_dir, file), self.labels[class_name]) 
                            for file in os.listdir(class_dir) if file.endswith('.jpg')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

        if self.transform:
            image = self.transform(image)

        return image, label

# Transformation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
])

# Load dataset
train_dataset = CustomImageDataset(root_dir='archive/seg_train/seg_train', transform=transform)  # root_dir is used here
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomImageDataset(root_dir='archive/seg_test/seg_test', transform=transform)  # root_dir is used here
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


# Initialize the model, loss function, and optimizer
model = IntelImageClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for images, labels in train_loader:
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Function to evaluate the model
def evaluate_model(model, data_loader):
    model.eval()
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_preds.extend(predicted.numpy())
            
    return all_labels, all_preds

# Evaluate the model
y_true, y_pred = evaluate_model(model, test_loader)

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

   


Epoch [1/10], Loss: 0.3722
Epoch [2/10], Loss: 0.6137
Epoch [3/10], Loss: 0.4249
Epoch [4/10], Loss: 0.3207
Epoch [5/10], Loss: 0.6800
Epoch [6/10], Loss: 0.2104
Epoch [7/10], Loss: 0.2726
Epoch [8/10], Loss: 0.0532
Epoch [9/10], Loss: 0.1805
Epoch [10/10], Loss: 0.0104
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.82      0.79       437
           1       0.95      0.91      0.93       474
           2       0.67      0.84      0.75       553
           3       0.79      0.72      0.75       525
           4       0.85      0.80      0.82       510
           5       0.88      0.75      0.81       501

    accuracy                           0.81      3000
   macro avg       0.82      0.81      0.81      3000
weighted avg       0.81      0.81      0.81      3000

Confusion Matrix:
[[358   5  17   6  12  39]
 [  9 430  23   3   2   7]
 [  6   2 467  56  21   1]
 [  3   1 113 377  30   1]
 [ 12   5  52  33 407   1]
 [ 80  11 

In [5]:
def calculate_accuracy(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No gradients needed
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

# Calculate the reclassification accuracy on the training set
train_accuracy = calculate_accuracy(model, train_loader)
print(f'Training Reclassification Accuracy: {train_accuracy * 100:.2f}%')


Training Reclassification Accuracy: 97.03%


In [6]:
# Question 2: Add Regularization and Dropout to the CNN

class IntelImageClassifier(nn.Module):
    def __init__(self):
        super(IntelImageClassifier, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Dropout layer
        self.dropout = nn.Dropout(0.1)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 9 * 9, 512)
        self.fc2 = nn.Linear(512, 6)

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with max pooling and dropout
        x = self.dropout(self.pool(self.relu(self.conv1(x))))
        x = self.dropout(self.pool(self.relu(self.conv2(x))))
        x = self.dropout(self.pool(self.relu(self.conv3(x))))
        x = self.dropout(self.pool(self.relu(self.conv4(x))))

        # Flatten the output for the fully connected layers
        x = x.view(-1, 256 * 9 * 9)

        # Fully connected layers with dropout
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)

        return x

    # Initialize the model
model = IntelImageClassifier()

# L2 Regularization is added here
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # 1e-5 is a commonly used value for weight_decay

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0

    for images, labels in train_loader:
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')


# Evaluate the model
y_true, y_pred = evaluate_model(model, test_loader)

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)


Epoch [1/10], Loss: 1.1136
Epoch [2/10], Loss: 0.7735
Epoch [3/10], Loss: 0.6185
Epoch [4/10], Loss: 0.5321
Epoch [5/10], Loss: 0.4601
Epoch [6/10], Loss: 0.3961
Epoch [7/10], Loss: 0.3342
Epoch [8/10], Loss: 0.2840
Epoch [9/10], Loss: 0.2436
Epoch [10/10], Loss: 0.2160
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.76      0.80       437
           1       0.94      0.96      0.95       474
           2       0.87      0.65      0.74       553
           3       0.80      0.80      0.80       525
           4       0.69      0.93      0.80       510
           5       0.86      0.85      0.85       501

    accuracy                           0.82      3000
   macro avg       0.83      0.83      0.82      3000
weighted avg       0.83      0.82      0.82      3000

Confusion Matrix:
[[333  13   3   6  26  56]
 [  0 454   3   5   4   8]
 [  7   4 360  69 108   5]
 [  4   1  41 418  61   0]
 [  8   2   5  18 475   2]
 [ 45  11 

In [7]:
# Question 3: Include Batch Normalization and Early Stopping Features

class IntelImageClassifier(nn.Module):
    def __init__(self):
        super(IntelImageClassifier, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Dropout layer
        self.dropout = nn.Dropout(0.1)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 9 * 9, 512)
        self.fc2 = nn.Linear(512, 6)

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with max pooling, batch normalization and dropout
        x = self.dropout(self.pool(self.bn1(self.relu(self.conv1(x)))))
        x = self.dropout(self.pool(self.bn2(self.relu(self.conv2(x)))))
        x = self.dropout(self.pool(self.bn3(self.relu(self.conv3(x)))))
        x = self.dropout(self.pool(self.bn4(self.relu(self.conv4(x)))))

        # Flatten the output for the fully connected layers
        x = x.view(-1, 256 * 9 * 9)

        # Fully connected layers with dropout
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)

        return x

class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.delta = delta

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

from torch.utils.data.dataset import random_split

# Assuming train_dataset is already created
total_size = len(train_dataset)
val_size = int(0.2 * total_size)  # for example, 20% of the training set
train_size = total_size - val_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create data loaders for training and validation sets
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

            
# Initialize the model, criterion, optimizer, and early stopping
model = IntelImageClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
early_stopping = EarlyStopping(patience=5, delta=0.01)

for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Validation phase
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    # Early stopping check
    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break

    print(f'Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(val_loader):.4f}')

from sklearn.metrics import classification_report, confusion_matrix

# Function to evaluate the model
def evaluate_model(model, data_loader):
    model.eval()
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_preds.extend(predicted.numpy())
            
    return all_labels, all_preds

# Evaluate the model
y_true, y_pred = evaluate_model(model, test_loader)

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)



Epoch 1, Training Loss: 1.6037, Validation Loss: 1.0408
Epoch 2, Training Loss: 0.6798, Validation Loss: 0.8027
Epoch 3, Training Loss: 0.5589, Validation Loss: 0.6662
Epoch 4, Training Loss: 0.4668, Validation Loss: 0.6224
Epoch 5, Training Loss: 0.3975, Validation Loss: 0.5756
Epoch 6, Training Loss: 0.3480, Validation Loss: 0.6165
Epoch 7, Training Loss: 0.2933, Validation Loss: 0.5674
Epoch 8, Training Loss: 0.2549, Validation Loss: 0.6493
Epoch 9, Training Loss: 0.2084, Validation Loss: 0.6559
Epoch 10, Training Loss: 0.2270, Validation Loss: 0.6068
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.83      0.78       437
           1       0.92      0.97      0.95       474
           2       0.82      0.73      0.77       553
           3       0.80      0.74      0.77       525
           4       0.75      0.85      0.80       510
           5       0.86      0.78      0.82       501

    accuracy                        

As shown above, the regularization and drop-out improved the model slightly, but the batch normalization and early stopping brought performance back down. I'm not sure why.