<a href="https://colab.research.google.com/github/jeet1912/ms/blob/main/ds677assignments/DS677_Week3HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Week3 Homework1 Image Classification


# Check GPU Type

In [None]:
!nvidia-smi

Thu Sep 12 18:17:10 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Download Data


In [None]:
!pip install gdown --upgrade
!gdown --id '19ZlT0qm-3rdMRe60ya25xiN-ELOohr8M' --output data.zip

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 5.1.0
    Uninstalling gdown-5.1.0:
      Successfully uninstalled gdown-5.1.0
Successfully installed gdown-5.2.0
Downloading...
From (original): https://drive.google.com/uc?id=19ZlT0qm-3rdMRe60ya25xiN-ELOohr8M
From (redirected): https://drive.google.com/uc?id=19ZlT0qm-3rdMRe60ya25xiN-ELOohr8M&confirm=t&uuid=19556538-6ccb-431d-9471-42281c40860f
To: /content/data.zip
100% 1.03G/1.03G [00:23<00:00, 43.3MB/s]


In [None]:
! unzip data.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data/train/5_1294.jpg   
  inflating: data/train/2_237.jpg    
  inflating: data/train/5_908.jpg    
  inflating: data/train/2_223.jpg    
  inflating: data/train/7_5.jpg      
  inflating: data/train/9_286.jpg    
  inflating: data/train/5_1280.jpg   
  inflating: data/train/10_97.jpg    
  inflating: data/train/6_161.jpg    
  inflating: data/train/3_756.jpg    
  inflating: data/train/2_545.jpg    
  inflating: data/train/0_695.jpg    
  inflating: data/train/3_811.jpg    
  inflating: data/train/2_1150.jpg   
  inflating: data/train/9_319.jpg    
  inflating: data/train/5_883.jpg    
  inflating: data/train/3_37.jpg     
  inflating: data/train/4_448.jpg    
  inflating: data/train/5_897.jpg    
  inflating: data/train/3_23.jpg     
  inflating: data/train/2_1144.jpg   
  inflating: data/train/8_678.jpg    
  inflating: data/train/3_805.jpg    
  inflating: data/train/5_129.jpg    
  inflating: data/train

# Import Packages

In [None]:
_exp_name = "sample"

In [None]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
from torchvision.transforms import v2
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from sklearn.model_selection import KFold
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [None]:
def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

seed = 1213
same_seeds(seed)

# Transforms
Torchvision provides lots of useful utilities for image preprocessing, data *wrapping* as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [None]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = v2.Compose([
    v2.Resize((128, 128)),
    v2.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = v2.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    v2.RandomResizedCrop((128,128), scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=v2.InterpolationMode.BICUBIC),
    # You may add some transforms here.
    v2.Pad(2, fill=255),
    v2.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=v2.InterpolationMode.BILINEAR, fill=0),
    # ToTensor() should be the last one of the transforms.
    v2.ToTensor(),
    v2.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
])




# Datasets
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [None]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files

        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)

        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label

        return im,label

# Model

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input dimension [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]

            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# Configurations

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 3

# If no improvement in 'patience' epochs, early stop.
patience = 5

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Dataloader

In [None]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("./data/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset("./data/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# Start Training

In [None]:
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for train_index, valid_index in kfold.split(train_set):  # Iterate over folds

    # Create train and validation subsets
    train_subset = Subset(train_set, train_index)
    valid_subset = Subset(train_set, valid_index)

    # Create DataLoaders for train and validation subsets
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_subset, batch_size=batch_size, shuffle=False)

    # Reset model state (optional)
    model.load_state_dict(model.state_dict())

    for epoch in range(n_epochs):

      # ---------- Training ----------
      # Make sure the model is in train mode before training.
      model.train()

      # These are used to record information in training.
      train_loss = []
      train_accs = []

      for batch in tqdm(train_loader):

          # A batch consists of image data and corresponding labels.
          imgs, labels = batch
          #imgs = imgs.half()
          #print(imgs.shape,labels.shape)

          # Forward the data. (Make sure data and model are on the same device.)
          logits = model(imgs.to(device))

          # Calculate the cross-entropy loss.
          # We don't need to apply softmax before computing cross-entropy as it is done automatically.
          loss = criterion(logits, labels.to(device))

          # Gradients stored in the parameters in the previous step should be cleared out first.
          optimizer.zero_grad()

          # Compute the gradients for parameters.
          loss.backward()

          # Clip the gradient norms for stable training.
          grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

          # Update the parameters with computed gradients.
          optimizer.step()

          # Compute the accuracy for current batch.
          acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

          # Record the loss and accuracy.
          train_loss.append(loss.item())
          train_accs.append(acc)

      train_loss = sum(train_loss) / len(train_loss)
      train_acc = sum(train_accs) / len(train_accs)

      # Print the information.
      print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

      # ---------- Validation ----------
      # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
      model.eval()

      # These are used to record information in validation.
      valid_loss = []
      valid_accs = []

      # Iterate the validation set by batches.
      for batch in tqdm(valid_loader):

          # A batch consists of image data and corresponding labels.
          imgs, labels = batch
          #imgs = imgs.half()

          # We don't need gradient in validation.
          # Using torch.no_grad() accelerates the forward process.
          with torch.no_grad():
              logits = model(imgs.to(device))

          # We can still compute the loss (but not the gradient).
          loss = criterion(logits, labels.to(device))

          # Compute the accuracy for current batch.
          acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

          # Record the loss and accuracy.
          valid_loss.append(loss.item())
          valid_accs.append(acc)
          #break

      # The average loss and accuracy for entire validation set is the average of the recorded values.
      valid_loss = sum(valid_loss) / len(valid_loss)
      valid_acc = sum(valid_accs) / len(valid_accs)

      # Print the information.
      print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


      # update logs
      if valid_acc > best_acc:
          with open(f"./{_exp_name}_log.txt","a"):
              print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
      else:
          with open(f"./{_exp_name}_log.txt","a"):
              print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


      # save models
      if valid_acc > best_acc:
          print(f"Best model found at epoch {epoch}, saving model")
          torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
          best_acc = valid_acc
          stale = 0
      else:
          stale += 1
          if stale > patience:
              print(f"No improvment {patience} consecutive epochs, early stopping")
              break

  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 001/003 ] loss = 1.71786, acc = 0.39428


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 001/003 ] loss = 1.75713, acc = 0.39378
[ Valid | 001/003 ] loss = 1.75713, acc = 0.39378 -> best
Best model found at epoch 0, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 002/003 ] loss = 1.65701, acc = 0.41757


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 002/003 ] loss = 1.96574, acc = 0.36214
[ Valid | 002/003 ] loss = 1.96574, acc = 0.36214


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 003/003 ] loss = 1.62120, acc = 0.43700


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 003/003 ] loss = 1.68678, acc = 0.41514
[ Valid | 003/003 ] loss = 1.68678, acc = 0.41514 -> best
Best model found at epoch 2, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 001/003 ] loss = 1.56543, acc = 0.45381


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 001/003 ] loss = 1.63454, acc = 0.45235
[ Valid | 001/003 ] loss = 1.63454, acc = 0.45235 -> best
Best model found at epoch 0, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 002/003 ] loss = 1.51749, acc = 0.47156


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 002/003 ] loss = 1.56738, acc = 0.45218
[ Valid | 002/003 ] loss = 1.56738, acc = 0.45218


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 003/003 ] loss = 1.45645, acc = 0.49830


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 003/003 ] loss = 1.63253, acc = 0.44658
[ Valid | 003/003 ] loss = 1.63253, acc = 0.44658


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 001/003 ] loss = 1.44808, acc = 0.50158


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 001/003 ] loss = 1.43564, acc = 0.49480
[ Valid | 001/003 ] loss = 1.43564, acc = 0.49480 -> best
Best model found at epoch 0, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 002/003 ] loss = 1.39241, acc = 0.52389


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 002/003 ] loss = 1.45551, acc = 0.48623
[ Valid | 002/003 ] loss = 1.45551, acc = 0.48623


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 003/003 ] loss = 1.34409, acc = 0.54189


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 003/003 ] loss = 1.41692, acc = 0.50983
[ Valid | 003/003 ] loss = 1.41692, acc = 0.50983 -> best
Best model found at epoch 2, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 001/003 ] loss = 1.35426, acc = 0.53169


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 001/003 ] loss = 1.43508, acc = 0.52556
[ Valid | 001/003 ] loss = 1.43508, acc = 0.52556 -> best
Best model found at epoch 0, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 002/003 ] loss = 1.30024, acc = 0.55035


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 002/003 ] loss = 1.42828, acc = 0.51044
[ Valid | 002/003 ] loss = 1.42828, acc = 0.51044


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 003/003 ] loss = 1.27505, acc = 0.56875


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 003/003 ] loss = 1.38690, acc = 0.52603
[ Valid | 003/003 ] loss = 1.38690, acc = 0.52603 -> best
Best model found at epoch 2, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 001/003 ] loss = 1.26530, acc = 0.56773


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 001/003 ] loss = 1.27537, acc = 0.56912
[ Valid | 001/003 ] loss = 1.27537, acc = 0.56912 -> best
Best model found at epoch 0, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 002/003 ] loss = 1.24447, acc = 0.57644


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 002/003 ] loss = 1.22924, acc = 0.58514
[ Valid | 002/003 ] loss = 1.22924, acc = 0.58514 -> best
Best model found at epoch 1, saving model


  0%|          | 0/124 [00:00<?, ?it/s]

[ Train | 003/003 ] loss = 1.20475, acc = 0.58438


  0%|          | 0/31 [00:00<?, ?it/s]

[ Valid | 003/003 ] loss = 1.27508, acc = 0.56236
[ Valid | 003/003 ] loss = 1.27508, acc = 0.56236


# Dataloader for test

In [None]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("./data/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [None]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))


  0%|          | 0/24 [00:00<?, ?it/s]

In [None]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Label"] = prediction
df.to_csv("submission.csv",index = False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df.to_csv('/content/drive/MyDrive/submission.csv', index=False)