# 3-Layer CNN for Colored MNIST Classification

This notebook implements a 3-layer Convolutional Neural Network to train and test on the colored MNIST dataset.

## 1. Import Required Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

PyTorch version: 2.9.0+cu126
CUDA available: True
Using device: cuda


## 2. Load Data from NPZ Files

In [2]:
# Load training data
train_data = np.load('colored_data/train_data.npz')
X_train = train_data['images']
y_train = train_data['labels']

# Load test dataset 1
test_data1 = np.load('colored_data/test_data.npz')
X_test1 = test_data1['images']
y_test1 = test_data1['labels']

# Load test dataset 2
test_data2 = np.load('data_gr/test_data.npz')
X_test2 = test_data2['images']
y_test2 = test_data2['labels']

print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"\nTest dataset 1 shape: {X_test1.shape}")
print(f"Test dataset 1 labels shape: {y_test1.shape}")
print(f"\nTest dataset 2 shape: {X_test2.shape}")
print(f"Test dataset 2 labels shape: {y_test2.shape}")
print(f"\nNumber of classes: {len(np.unique(y_train))}")

FileNotFoundError: [Errno 2] No such file or directory: 'colored_data/train_data.npz'

## 3. Preprocess and Prepare Data

In [None]:
# Normalize pixel values to [0, 1]
X_train = X_train.astype('float32') / 255.0
X_test1 = X_test1.astype('float32') / 255.0
X_test2 = X_test2.astype('float32') / 255.0

# Create validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)

# Convert to PyTorch tensors and change format from (N, H, W, C) to (N, C, H, W)
X_train_tensor = torch.FloatTensor(X_train).permute(0, 3, 1, 2)
X_val_tensor = torch.FloatTensor(X_val).permute(0, 3, 1, 2)
X_test1_tensor = torch.FloatTensor(X_test1).permute(0, 3, 1, 2)
X_test2_tensor = torch.FloatTensor(X_test2).permute(0, 3, 1, 2)

y_train_tensor = torch.LongTensor(y_train)
y_val_tensor = torch.LongTensor(y_val)
y_test1_tensor = torch.LongTensor(y_test1)
y_test2_tensor = torch.LongTensor(y_test2)

# Create DataLoaders
batch_size = 128
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test1_dataset = TensorDataset(X_test1_tensor, y_test1_tensor)
test2_dataset = TensorDataset(X_test2_tensor, y_test2_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test1_loader = DataLoader(test1_dataset, batch_size=batch_size, shuffle=False)
test2_loader = DataLoader(test2_dataset, batch_size=batch_size, shuffle=False)

print(f"Training set: {X_train_tensor.shape}")
print(f"Validation set: {X_val_tensor.shape}")
print(f"Test set 1: {X_test1_tensor.shape}")
print(f"Test set 2: {X_test2_tensor.shape}")
print(f"Input shape: {X_train_tensor.shape[1:]}")

## 4. Build the 3-Layer CNN Model

In [None]:
class CNN3Layer(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN3Layer, self).__init__()
        
        # First Convolutional Layer
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        # Second Convolutional Layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        # Third Convolutional Layer
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 3 * 3, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)
        
    def forward(self, x):
        # First conv block
        x = self.pool1(F.relu(self.conv1(x)))
        
        # Second conv block
        x = self.pool2(F.relu(self.conv2(x)))
        
        # Third conv block
        x = self.pool3(F.relu(self.conv3(x)))
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize model and move to device
model = CNN3Layer(num_classes=10).to(device)

# Display model architecture
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")

## 5. Compile the Model

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("Model compiled successfully!")
print(f"Optimizer: Adam")
print(f"Loss function: CrossEntropyLoss")

## 6. Train the Model

In [None]:
# Training parameters
epochs = 10

# Lists to store history
train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []

# Training loop
for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        output = model(data)
        loss = criterion(output, target)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Statistics
        train_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        train_total += target.size(0)
        train_correct += (predicted == target).sum().item()
    
    # Calculate average training metrics
    avg_train_loss = train_loss / len(train_loader)
    train_accuracy = train_correct / train_total
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            
            val_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            val_total += target.size(0)
            val_correct += (predicted == target).sum().item()
    
    # Calculate average validation metrics
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = val_correct / val_total
    
    # Store history
    train_loss_history.append(avg_train_loss)
    train_acc_history.append(train_accuracy)
    val_loss_history.append(avg_val_loss)
    val_acc_history.append(val_accuracy)
    
    # Print epoch results
    print(f'Epoch [{epoch+1}/{epochs}] - '
          f'Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f} - '
          f'Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}')

print("\nTraining completed!")

## 7. Evaluate on Test Dataset 1

In [None]:
# Evaluate on test dataset 1 (no learning - evaluation mode only)
model.eval()
test1_loss = 0.0
test1_correct = 0
test1_total = 0

with torch.no_grad():
    for data, target in test1_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = criterion(output, target)
        
        test1_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        test1_total += target.size(0)
        test1_correct += (predicted == target).sum().item()

avg_test1_loss = test1_loss / len(test1_loader)
test1_accuracy = test1_correct / test1_total

print(f"\nTest Dataset 1 Results:")
print(f"Test Loss: {avg_test1_loss:.4f}")
print(f"Test Accuracy: {test1_accuracy:.4f} ({test1_accuracy*100:.2f}%)")

## 8. Predictions and Visualization - Test Dataset 1

In [None]:
# Make predictions on test dataset 1 (no learning)
model.eval()
test1_predictions = []
test1_targets = []

with torch.no_grad():
    for data, target in test1_loader:
        data = data.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        test1_predictions.extend(predicted.cpu().numpy())
        test1_targets.extend(target.numpy())

predicted_classes_test1 = np.array(test1_predictions)
y_test1_array = np.array(test1_targets)

# Visualize some predictions from test dataset 1
num_samples = 20
fig, axes = plt.subplots(4, 5, figsize=(12, 10))
axes = axes.ravel()

for i in range(num_samples):
    img_display = X_test1[i]
    
    axes[i].imshow(img_display)
    axes[i].axis('off')
    
    pred_label = predicted_classes_test1[i]
    true_label = y_test1_array[i]
    
    color = 'green' if pred_label == true_label else 'red'
    axes[i].set_title(f'Pred: {pred_label}\nTrue: {true_label}', 
                      fontsize=10, color=color, fontweight='bold')

plt.suptitle('Test Dataset 1 - Sample Predictions (Green=Correct, Red=Incorrect)', 
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

correct = np.sum(predicted_classes_test1 == y_test1_array)
total = len(y_test1_array)
print(f"\nTest Dataset 1 - Correct predictions: {correct}/{total} ({correct/total*100:.2f}%)")

## 9. Evaluate on Test Dataset 2

In [None]:
# Evaluate on test dataset 2 (no learning - evaluation mode only)
model.eval()
test2_loss = 0.0
test2_correct = 0
test2_total = 0

with torch.no_grad():
    for data, target in test2_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = criterion(output, target)
        
        test2_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        test2_total += target.size(0)
        test2_correct += (predicted == target).sum().item()

avg_test2_loss = test2_loss / len(test2_loader)
test2_accuracy = test2_correct / test2_total

print(f"\nTest Dataset 2 Results:")
print(f"Test Loss: {avg_test2_loss:.4f}")
print(f"Test Accuracy: {test2_accuracy:.4f} ({test2_accuracy*100:.2f}%)")

## 10. Predictions and Visualization - Test Dataset 2

In [None]:
# Make predictions on test dataset 2 (no learning)
model.eval()
test2_predictions = []
test2_targets = []

with torch.no_grad():
    for data, target in test2_loader:
        data = data.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        test2_predictions.extend(predicted.cpu().numpy())
        test2_targets.extend(target.numpy())

predicted_classes_test2 = np.array(test2_predictions)
y_test2_array = np.array(test2_targets)

# Visualize some predictions from test dataset 2
num_samples = 20
fig, axes = plt.subplots(4, 5, figsize=(12, 10))
axes = axes.ravel()

for i in range(num_samples):
    img_display = X_test2[i]
    
    axes[i].imshow(img_display)
    axes[i].axis('off')
    
    pred_label = predicted_classes_test2[i]
    true_label = y_test2_array[i]
    
    color = 'green' if pred_label == true_label else 'red'
    axes[i].set_title(f'Pred: {pred_label}\nTrue: {true_label}', 
                      fontsize=10, color=color, fontweight='bold')

plt.suptitle('Test Dataset 2 - Sample Predictions (Green=Correct, Red=Incorrect)', 
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

correct = np.sum(predicted_classes_test2 == y_test2_array)
total = len(y_test2_array)
print(f"\nTest Dataset 2 - Correct predictions: {correct}/{total} ({correct/total*100:.2f}%)")

## 11. Visualize Training History

In [None]:
# Plot 1: Training vs Validation Accuracy
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_acc_history, label='Training Accuracy', marker='o')
plt.plot(val_acc_history, label='Validation Accuracy', marker='s')
plt.title('Epoch-wise Training vs Validation Accuracy', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)

# Plot 2: Training vs Validation Loss
plt.subplot(1, 2, 2)
plt.plot(train_loss_history, label='Training Loss', marker='o')
plt.plot(val_loss_history, label='Validation Loss', marker='s')
plt.title('Epoch-wise Training vs Validation Loss', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(loc='upper right')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 12. Save the Model (Optional)

In [None]:
# Save the trained model
import os
os.makedirs('models', exist_ok=True)

# Save complete model
torch.save(model.state_dict(), 'task1_v1.pth')
print("Model saved successfully!")

# To load the model later:
# model = CNN3Layer(num_classes=10)
# model.load_state_dict(torch.load('task1_v1.pth'))
# model.to(device)

## Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Set model to evaluation mode
model.eval()

# Get predictions on test set
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Display confusion matrix
fig, ax = plt.subplots(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(10))
disp.plot(cmap='Blues', ax=ax, values_format='d')
plt.title('Confusion Matrix', fontsize=16)
plt.tight_layout()
plt.show()

# Print classification metrics per class
print("\nPer-class Accuracy:")
for i in range(10):
    if cm[i].sum() > 0:
        class_acc = cm[i, i] / cm[i].sum() * 100
        print(f"Digit {i}: {class_acc:.2f}%")