<a href="https://colab.research.google.com/github/Zachary-Wenhao/IML-Team-Attack-Diffusion-Model/blob/data-preparation/Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classifier


In [None]:
! pip install torch torchvision



In [None]:
import torch
from torchvision import models
import torch.nn as nn

In [None]:
# Set device based on GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
device

device(type='cuda')

# EfficientNet-B5 Loading

# Fine-tuning on Tiny ImageNet (Classifier)

In [None]:
!pip install datasets
!pip install numpy

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [None]:


import numpy as np
import torch
from datasets import load_dataset

from torch.utils.data import Dataset
import torchvision.transforms as transforms
# Define data transforms (resize to 224x224 for EfficientNet)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

from datasets import load_dataset

imagenet_train = load_dataset('Maysee/tiny-imagenet', split='train')
imagenet_val_combined = load_dataset('Maysee/tiny-imagenet', split='valid')

imagenet_val_test = imagenet_val_combined.train_test_split(test_size=0.5, stratify_by_column='label')
imagenet_val = imagenet_val_test['train']
imagenet_test = imagenet_val_test['test']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

dataset_infos.json:   0%|          | 0.00/3.52k [00:00<?, ?B/s]

(…)-00000-of-00001-1359597a978bc4fa.parquet:   0%|          | 0.00/146M [00:00<?, ?B/s]

(…)-00000-of-00001-70d52db3c749a935.parquet:   0%|          | 0.00/14.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [None]:
imagenet_val_combined

Dataset({
    features: ['image', 'label'],
    num_rows: 10000
})

In [None]:
import torch
from torch.utils.data import Dataset


class ImageNetDataset(Dataset):
    def __init__(self, huggingface_dataset, transform=None):
        """
        Args:
            huggingface_dataset: Our ImageNet dataset from huggingface
            transform: Potential transformation for the images
        """
        self.dataset = huggingface_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image = self.dataset[idx]['image']
        label = self.dataset[idx]['label']

        # Apply the transform if specified
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
from torch.utils.data import DataLoader
from torchvision import transforms


# Example transformation function: this is for use with Vision Transformers
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x), # apparently some images are not RGB
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = ImageNetDataset(imagenet_train, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = ImageNetDataset(imagenet_val, transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
import tqdm
def train(model, train_dataloader, criterion, optimizer):
  model.train()
  train_loss = 0
  num_correct = 0
  for images, labels in tqdm.tqdm(train_dataloader, dynamic_ncols=True, leave=False, position=0):
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    loss = criterion(outputs, labels)

    train_loss += loss.item()
    correct = (torch.argmax(outputs, dim=1) == labels).sum().item()
    num_correct += correct

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  print(f"Train loss: {train_loss/len(train_dataloader.dataset)} | Accuracy: {num_correct/len(train_dataloader.dataset)}")
  return train_loss/len(train_dataloader.dataset), num_correct/len(train_dataloader.dataset)

def test(model, val_dataloader, criterion):
  model.eval()
  correct = 0
  test_loss = 0
  total = 0
  with torch.no_grad():
    for images, labels in tqdm.tqdm(val_dataloader, dynamic_ncols=True, leave=False, position=0):
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      loss = criterion(outputs, labels)
      test_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  print(f"Test loss: {test_loss/len(val_dataloader.dataset)} | Accuracy: {correct/total}")
  return test_loss/len(val_dataloader.dataset), correct / total

def run(model, n_epoch, train_dataloader, val_dataloader, criterion, optimizer):
  train_losses = []
  train_accs = []
  test_losses = []
  test_accs = []
  for epoch in range(n_epoch):
    print(f"Epoch: {epoch}")
    train_loss, train_acc = train(model, train_dataloader, criterion, optimizer)
    test_loss, test_acc = test(model, val_dataloader, criterion)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    print(f"Epoch {epoch+1}/{n_epoch}, Test Accuracy: {test_acc:.4f}")
  return train_losses, train_accs, test_losses, test_accs


In [None]:
model = models.efficientnet_b0(pretrained=True)
num_classes = 200
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model = model.to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 135MB/s] 


In [None]:
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
n_epoch = 10

train_losses, train_accs, test_losses, test_accs = run(model, n_epoch, train_dataloader, val_dataloader, criterion, optimizer)

Epoch: 0




Epoch 1/10, Accuracy: 0.7616
Epoch: 1




KeyboardInterrupt: 

In [None]:
# Plot
epochs = [i for i in range(n_epoch)]

plt.plot(epochs, train_accs, label='train acc', color='blue')

# Plot the second line
plt.plot(epochs, test_accs, label='test acc', color='red')

# Add labels and title
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('acc')

# Show legend to differentiate between the lines
plt.legend()

# Display the plot
plt.show()

In [None]:
# Plot
epochs = [i for i in range(n_epoch)]

plt.plot(epochs, train_losses, label='train loss', color='blue')

# Plot the second line
plt.plot(epochs, test_losses, label='test loss', color='red')

# Add labels and title
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('loss')

# Show legend to differentiate between the lines
plt.legend()

# Display the plot
plt.show()