## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [1]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from google.colab import drive

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

import numpy as np

# useful libraries
import torchvision
import torchvision.transforms as transforms
drive.mount('/content/drive')
import os
os.chdir("/content/drive/My Drive/ece661/final_project/")


Mounted at /content/drive


In [2]:
import torchvision.models as models
import torch.nn as nn


# Define the base encoder (e.g., ResNet)
# base_encoder = models.resnet50(pretrained=False)
# Remove the final fully connected layer
# base_encoder = nn.Sequential(*list(base_encoder.children())[:-1])


class ResNetEncoder(nn.Module):
    def __init__(self, base_model='resnet18'):
        super(ResNetEncoder, self).__init__()

        # Load the pre-trained base model
        if base_model == 'resnet18':
            self.base_model = models.resnet18(pretrained=True)
        # Add more models as needed

        # Modify the first convolution layer
        self.base_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)

        # Remove the fully connected layer
        self.base_model.fc = nn.Identity()

    def forward(self, x):
        # Forward through the base model
        x = self.base_model.conv1(x)
        x = self.base_model.bn1(x)
        x = self.base_model.relu(x)

        # Skip the maxpool layer
        x = self.base_model.layer1(x)
        x = self.base_model.layer2(x)
        x = self.base_model.layer3(x)
        x = self.base_model.layer4(x)

        # Global average pooling
        x = self.base_model.avgpool(x)
        x = torch.flatten(x, 1)

        return x



# Define the projection head
class ProjectionHead(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ProjectionHead, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        return x

# Assuming output of ResNet50 is 2048, hidden layer of 512, and output of 128
projection_head = ProjectionHead(input_dim=512, output_dim=4)

# Define the SimCLR model
class RotNet(nn.Module):
    def __init__(self, encoder, projection_head):
        super(RotNet, self).__init__()
        self.encoder = encoder
        self.projection_head = projection_head

    def forward(self, x):
        x = self.encoder(x)
        x = torch.flatten(x, start_dim=1)  # Flatten the output
        x = self.projection_head(x)
        return x



In [3]:
def model_size_in_mib(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.numel() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.numel() * buffer.element_size()
    total_size = param_size + buffer_size
    total_size_mib = total_size / (1024 ** 2)  # Convert bytes to MiB
    return total_size_mib

## Step 1: Set up preprocessing functions
Preprocessing is very important as discussed in the lecture.
You will need to write preprocessing functions with the help of *torchvision.transforms* in this step.
You can find helpful tutorial/API at [here](https://pytorch.org/vision/stable/transforms.html).

## Step 2: Set up dataset and dataloader



In [4]:
class RotNetDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, _ = self.dataset[idx]

        # Randomly choose a rotation angle (0, 90, 180, 270 degrees)
        rotation_label = np.random.choice([0, 1, 2, 3])
        rotated_image = image.rotate(rotation_label * 90)

        # Convert PIL image to tensor
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        rotated_image = transform(rotated_image)

        return rotated_image, rotation_label

In [5]:
# Data transformation
mean = (0.4914, 0.4822, 0.4465)
std = (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


# Load CIFAR10 dataset
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

# Load the dataset
original_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=None)
original_rotational_detection_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=None)

# Wrap it with RotNetDataset
rotnet_dataset = RotNetDataset(original_dataset)
detection_dataset = RotNetDataset(original_rotational_detection_dataset)

# DataLoader
trainloader = torch.utils.data.DataLoader(rotnet_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=0)
detectionloader = torch.utils.data.DataLoader(detection_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


## Step 3: Instantiate your model and deploy it to GPU devices.


In [12]:
# specify the device for computation
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Initialize the model
base_encoder = ResNetEncoder()
model = RotNet(base_encoder, projection_head)

model = model.to(device)

next(model.parameters()).device

#############################################

device(type='cuda', index=0)

## Step 4: Set up the loss function and optimizer


In [7]:
import torch.nn as nn
import torch.optim as optim

import torch.nn as nn
import torch.optim as optim

# Hyperparameters
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

# Initialize the NT-Xent loss (contrastive loss) for SimCLR
temperature = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()

# Add optimizer
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)


## Step 5: Start the training process.



In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
DECAY_EPOCHS = 10
DECAY = 0.9
current_learning_rate = INITIAL_LR
EPOCHS = 50
CHECKPOINT_FOLDER = "/content/drive/My Drive/ece661/final_project/saved_model_rotnet"

# Training loop
start_time = time.time()
for epoch in range(50, 50+EPOCHS):

    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     current_learning_rate *= DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = current_learning_rate
    #     print("Current learning rate has decayed to %f" % current_learning_rate)

    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(trainloader):

        optimizer.zero_grad()

        # Concatenate the pairs of images along the batch dimension
        image = image.to(device)
        label = label.to(device)

        ## Forward pass and calculate loss
        representations = model(image)

        # Calculate loss using both halves
        loss = criterion(representations, label)
        total_loss += loss.item()

        # Backward and optimize
        loss.backward()
        optimizer.step()



    avg_loss = total_loss / len(trainloader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS+50, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS+50, end_time-start_time))
    start_time = end_time


    correct_examples = 0
    total_examples = 0
    val_loss = 0

    model.eval()


    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(detectionloader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(detectionloader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))


    # Save checkpoint
    if not os.path.exists(CHECKPOINT_FOLDER):
        os.makedirs(CHECKPOINT_FOLDER)
    torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'rotnet_epoch_%d.pth' % epoch))

print("=" * 50)
print("==> Training complete")

==> Training starts!
Epoch [51/100], Loss: 0.4001
Epoch [51/100], Time: 23.8746
Validation loss: 0.0110, Validation accuracy: 0.8168
Epoch [52/100], Loss: 0.3862
Epoch [52/100], Time: 28.3914
Validation loss: 0.0254, Validation accuracy: 0.8114
Epoch [53/100], Loss: 0.3816
Epoch [53/100], Time: 28.5796
Validation loss: 0.0153, Validation accuracy: 0.8035
Epoch [54/100], Loss: 0.3804
Epoch [54/100], Time: 28.3502
Validation loss: 0.0068, Validation accuracy: 0.8075
Epoch [55/100], Loss: 0.3792
Epoch [55/100], Time: 28.3964
Validation loss: 0.0133, Validation accuracy: 0.8043
Epoch [56/100], Loss: 0.3671
Epoch [56/100], Time: 28.7215
Validation loss: 0.0043, Validation accuracy: 0.8300
Epoch [57/100], Loss: 0.3627
Epoch [57/100], Time: 29.0695
Validation loss: 0.0135, Validation accuracy: 0.8100
Epoch [58/100], Loss: 0.3632
Epoch [58/100], Time: 28.6798
Validation loss: 0.0116, Validation accuracy: 0.8131
Epoch [59/100], Loss: 0.3645
Epoch [59/100], Time: 29.1189
Validation loss: 0.0178,

In [8]:
model.load_state_dict(torch.load('/content/drive/My Drive/ece661/final_project/saved_model_rotnet/rotnet_epoch_99.pth'))

<All keys matched successfully>

# Step 6 Linear Evaluation

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model.projection_head = nn.Linear(512, 10)
model = model.to(device)

for param in model.encoder.parameters():
    param.requires_grad = False

In [None]:
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

random.seed(0)
torch.manual_seed(0)

transform_classify = transforms.Compose(
                            [transforms.ToTensor(),
                             transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])



tra_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)
val_set = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform_classify)

normal_train_loader = DataLoader(
    tra_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
normal_validate_loader = DataLoader(
    val_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
INITIAL_LR = 0.001
MOMENTUM = 0.09
REG = 1e-4

DECAY_EPOCHS = 8
DECAY = 0.75
current_learning_rate = INITIAL_LR
EPOCHS = 23
CHECKPOINT_FOLDER = "/content/drive/My Drive/ece661/final_project/saved_model"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    if epoch % DECAY_EPOCHS == 0 and epoch != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(normal_train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'rotnet_linear_eval.pth'))

==> Training starts!
Epoch [1/23], Loss: 1.5796
Epoch [1/23], Time: 4.9916
Validation loss: 0.0451, Validation accuracy: 0.4756
Epoch [2/23], Loss: 1.5762
Epoch [2/23], Time: 5.9922
Validation loss: 0.0427, Validation accuracy: 0.4740
Epoch [3/23], Loss: 1.5708
Epoch [3/23], Time: 6.1978
Validation loss: 0.0406, Validation accuracy: 0.4756
Epoch [4/23], Loss: 1.5699
Epoch [4/23], Time: 6.0361
Validation loss: 0.0389, Validation accuracy: 0.4733
Epoch [5/23], Loss: 1.5725
Epoch [5/23], Time: 6.0276
Validation loss: 0.0443, Validation accuracy: 0.4727
Epoch [6/23], Loss: 1.5722
Epoch [6/23], Time: 6.0397
Validation loss: 0.0320, Validation accuracy: 0.4716
Epoch [7/23], Loss: 1.5712
Epoch [7/23], Time: 6.2754
Validation loss: 0.0339, Validation accuracy: 0.4757
Epoch [8/23], Loss: 1.5690
Epoch [8/23], Time: 6.1549
Validation loss: 0.0383, Validation accuracy: 0.4751
Current learning rate has decayed to 0.000750
Epoch [9/23], Loss: 1.5694
Epoch [9/23], Time: 6.3158
Validation loss: 0.0477

# Step 7: Classifier Finetune

In [13]:
model.load_state_dict(torch.load('/content/drive/My Drive/ece661/final_project/saved_model_rotnet/rotnet_epoch_99.pth'))

<All keys matched successfully>

In [20]:
# Define the new classifier as a separate module
class RotNetClassifier(nn.Module):
    def __init__(self, input_features=512, features=200, num_classes=10):
        super(RotNetClassifier, self).__init__()
        self.fc1 = nn.Linear(input_features, features)
        self.bn1 = nn.BatchNorm1d(features)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(features, features)
        self.bn2 = nn.BatchNorm1d(features)
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(features, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        return x

In [21]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

classifier = RotNetClassifier()

model.projection_head = classifier
model = model.to(device)

In [22]:
model

RotNet(
  (encoder): ResNetEncoder(
    (base_model): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): B

In [15]:
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

random.seed(0)
torch.manual_seed(0)

transform_classify = transforms.Compose(
                            [transforms.ToTensor(),
                             transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])



tra_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)
val_set = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform_classify)


classifier_raw_set_50000 = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)

size = 5000

indices = torch.randperm(len(classifier_raw_set_50000))[:size].tolist()
subset = torch.utils.data.Subset(classifier_raw_set_50000, indices)


normal_train_loader = DataLoader(
    subset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
normal_validate_loader = DataLoader(
    val_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

DECAY_EPOCHS = 8
DECAY = 0.75
current_learning_rate = INITIAL_LR
EPOCHS = 20
CHECKPOINT_FOLDER = "/content/drive/My Drive/ece661/final_project/saved_model"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    if epoch % DECAY_EPOCHS == 0 and epoch != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(normal_train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'ROTNET_with_decoder_size_%d.pth' % size))

==> Training starts!
Epoch [1/20], Loss: 0.1508
Epoch [1/20], Time: 1.8194
Validation loss: 0.0908, Validation accuracy: 0.7498
Epoch [2/20], Loss: 0.0766
Epoch [2/20], Time: 3.3094
Validation loss: 0.1370, Validation accuracy: 0.5887
Epoch [3/20], Loss: 1.6997
Epoch [3/20], Time: 3.2417
Validation loss: 2.2714, Validation accuracy: 0.1011
Epoch [4/20], Loss: 1.1509
Epoch [4/20], Time: 3.1981
Validation loss: 0.1163, Validation accuracy: 0.4768
Epoch [5/20], Loss: 0.6596
Epoch [5/20], Time: 3.2477
Validation loss: 0.0365, Validation accuracy: 0.6121


Validation loss: 0.0073, Validation accuracy: 0.7585 for 5000

Validation loss: 0.0027, Validation accuracy: 0.7719
for 7500
> 缩进块



In [None]:
model