<a href="https://colab.research.google.com/github/ak-hannou/compsci-4ml3/blob/main/kaggle-final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch  # Core PyTorch library for tensor operations
from torchvision import datasets
from torchvision.transforms import v2  # Datasets and transformations for computer vision
import torch.nn as nn  # Neural network components
import torch.optim as optim  # Optimization algorithms
import numpy as np  # Numerical operations
import matplotlib.pyplot as plt  # Plotting
from torch.utils.data import DataLoader, Dataset, random_split, Subset, TensorDataset  # Data handling utilities
import pandas as pd

In [36]:
batch_size = 64 #
learning_rate = 0.0001      # Learning rate for optimizer
num_epochs = 250

In [3]:
cifar100_mean = (0.5071, 0.4867, 0.4408)
cifar100_std = (0.2675, 0.2565, 0.2761)

train_transform = v2.Compose([
    v2.RandomHorizontalFlip(),
    # v2.RandomZoomOut(),
    v2.RandomRotation(20),
    # v2.RandomResizedCrop(32),
      v2.ToTensor(),
      v2.Normalize(cifar100_mean, cifar100_std),
      ###### Add your transformations here ########
])
test_transform = v2.Compose([
      v2.ToTensor(),
      v2.Normalize(cifar100_mean, cifar100_std),
      ###### Add your transformations here ########
])

train_dataset = datasets.CIFAR100(
    root='./data',       # Change this path if needed
    train=True,          # Set to True to download the training set
    download=True,       # Set to True to download if not already downloaded
    transform=train_transform  # Apply transformations
)
test_dataset = datasets.CIFAR100(
    root='./data',       # Change this path if needed
    train=False,          # Set to True to download the training set
    download=True,       # Set to True to download if not already downloaded
    transform=test_transform  # Apply transformations
)
##### Hyper-parameters


# Create a DataLoader for batch processing
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,      # Batch size (you can modify this as needed)
    shuffle=True        # Shuffle data for training
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,      # Batch size (you can modify this as needed)
    shuffle=False        # Shuffle data for training
)




Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:13<00:00, 12.8MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [37]:
class CIFARClassifier(nn.Module):
  def __init__(self):
        super(CIFARClassifier, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, padding=1)
        #self.batch_norm1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.2)

        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        #self.batch_norm2 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.2)

        self.fc1 = nn.Linear(16384, 512)
        self.dropout3 = nn.Dropout(0.2)
        self.batch_norm = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 100)

        self.relu = nn.ReLU()
  def forward(self, x):
        x = self.relu((self.conv1(x)))
        x = self.relu((self.conv2(x)))
        x = self.pool1(x)
        x = self.dropout1(x)

        x = self.relu((self.conv3(x)))
        x = self.relu((self.conv4(x)))
        x = self.pool2(x)
        x = self.dropout2(x)

        x = x.view(x.size(0), -1)

        x = self.relu(self.fc1(x))
        x = self.dropout3(x)
        x = self.batch_norm(x)
        x = self.fc2(x)

        return x

In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CIFARClassifier().to(device)
print(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # CrossEntropy includes softmax
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

cuda


In [39]:
def evaluate(model, data_loader, criterion):
  model.eval()
  correct = 0
  total = 0
  loss_batches = []

  # Switch to evaluation mode and turn off gradient calculation
  # since parameters are not updated during testing.
  with torch.no_grad():
      for images_batch, labels_batch in data_loader:
          images_batch, labels_batch = images_batch.to(device), labels_batch.to(device)
          outputs = model(images_batch) # Forward pass
          # The predicted label is the output with the highest activation.
          _, predicted = torch.max(outputs.data, 1)
          total += labels_batch.size(0)
          correct += (predicted == labels_batch).sum().item()

          # Use provided criterion to calculate the loss for the mini batch
          # Append the mini-batch loss to loss_batches array
          batch_loss = criterion(outputs, labels_batch)
          loss_batches.append(batch_loss.item())

      accuracy = 100 * correct / total
      avg_loss = np.mean(loss_batches)

      model.train()


      return accuracy, avg_loss

In [40]:
# Training the model
model.train()
train_losses, test_losses = [], []
train_accuracies, test_accuracies = [], []

for epoch in range(num_epochs):
    for i, (images_batch, labels_batch) in enumerate(train_loader):
        optimizer.zero_grad() # Clear the gradients
        images_batch, labels_batch = images_batch.to(device), labels_batch.to(device)
        outputs = model(images_batch) # Forward pass
        loss = criterion(outputs, labels_batch) # Calculate loss
        loss.backward() # Backward pass
        optimizer.step() # Update weights

    # Evaluate on train and test sets after each epoch

    train_accuracy, train_loss = evaluate(model, train_loader, criterion)
    test_accuracy, test_loss = evaluate(model, test_loader, criterion)


    print(f'Epoch {epoch+1:02d}/{num_epochs:02d} - Train Loss: {train_loss:.6f}, Train Acc: {train_accuracy:.2f}%')
    print(f'            - Test Loss: {test_loss:.6f}, Test Acc: {test_accuracy:.2f}%')
    print("-" * 60)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

Epoch 01/250 - Train Loss: 3.312363, Train Acc: 23.24%
            - Test Loss: 3.300554, Test Acc: 23.86%
------------------------------------------------------------
Epoch 02/250 - Train Loss: 2.865386, Train Acc: 31.12%
            - Test Loss: 2.838215, Test Acc: 31.91%
------------------------------------------------------------
Epoch 03/250 - Train Loss: 2.350265, Train Acc: 40.78%
            - Test Loss: 2.379840, Test Acc: 39.79%
------------------------------------------------------------
Epoch 04/250 - Train Loss: 2.045105, Train Acc: 48.47%
            - Test Loss: 2.159988, Test Acc: 45.28%
------------------------------------------------------------
Epoch 05/250 - Train Loss: 1.845189, Train Acc: 51.56%
            - Test Loss: 1.995529, Test Acc: 48.28%
------------------------------------------------------------
Epoch 06/250 - Train Loss: 1.617640, Train Acc: 57.38%
            - Test Loss: 1.862278, Test Acc: 50.77%
-----------------------------------------------------

In [41]:
torch.save(model.state_dict(), "state")
test_data = pd.read_csv("test.csv")

test_ids = test_data["ID"]
image_data = test_data.drop(columns=["ID"]).values

num_samples = image_data.shape[0]
images = image_data.reshape(num_samples, 3, 32, 32).astype('float32')

images_tensor = torch.tensor(images)

In [42]:
test_dataset = TensorDataset(images_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [43]:
model.eval()

predictions = []
with torch.no_grad():
    for images_batch in test_loader:
        images_batch = images_batch[0].to(device)
        outputs = model(images_batch)
        predicted_labels = torch.argmax(outputs, dim=1)
        predictions.extend(predicted_labels.cpu().numpy())

In [44]:
submission = pd.DataFrame({
    "ID": test_ids,
    "Label": predictions
})

submission.to_csv("submission.csv", index=False)
print("Predictions saved to submission.csv")

Predictions saved to submission.csv
