## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [1]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from google.colab import drive

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

import numpy as np

# useful libraries
import torchvision
import torchvision.transforms as transforms
drive.mount('/content/drive')
import os
os.chdir("/content/drive/MyDrive/ece661/final_project/")


Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
import torchvision.models as models

class ResNetEncoder(nn.Module):
    def __init__(self, base_model='resnet50'):
        super(ResNetEncoder, self).__init__()

        # Load the pre-trained base model
        if base_model == 'resnet50':
            self.base_model = models.resnet50(pretrained=True)

        # Modify the first convolution layer if needed
        # self.base_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)

        # Remove the fully connected layer
        self.base_model.fc = nn.Identity()

    def forward(self, x):
        # Forward through the base model
        x = self.base_model(x)
        return x

# Define the projection head
class ProjectionHead(nn.Module):
    def __init__(self, input_dim=2048, output_dim=4):
        super(ProjectionHead, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        return x

# Define the RotNet model
class RotNet(nn.Module):
    def __init__(self, encoder, projection_head):
        super(RotNet, self).__init__()
        self.encoder = encoder
        self.projection_head = projection_head

    def forward(self, x):
        x = self.encoder(x)
        x = torch.flatten(x, start_dim=1)  # Flatten the output
        x = self.projection_head(x)
        return x


In [3]:
def model_size_in_mib(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.numel() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.numel() * buffer.element_size()
    total_size = param_size + buffer_size
    total_size_mib = total_size / (1024 ** 2)  # Convert bytes to MiB
    return total_size_mib

In [None]:
# print(f"ResNet-18 size: {model_size_in_mib(model)} MiB")

NameError: ignored

## Step 1: Set up preprocessing functions
Preprocessing is very important as discussed in the lecture.
You will need to write preprocessing functions with the help of *torchvision.transforms* in this step.
You can find helpful tutorial/API at [here](https://pytorch.org/vision/stable/transforms.html).

## Step 2: Set up dataset and dataloader



In [4]:
class RotNetDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, _ = self.dataset[idx]

        # Randomly choose a rotation angle (0, 90, 180, 270 degrees)
        rotation_label = np.random.choice([0, 1, 2, 3])
        rotated_image = image.rotate(rotation_label * 90)

        # Convert PIL image to tensor
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        rotated_image = transform(rotated_image)

        return rotated_image, rotation_label

In [5]:
# Data transformation
mean = (0.4914, 0.4822, 0.4465)
std = (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


# Load CIFAR10 dataset
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

# Load the dataset
original_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=None)
original_rotational_detection_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=None)

# Wrap it with RotNetDataset
rotnet_dataset = RotNetDataset(original_dataset)
detection_dataset = RotNetDataset(original_rotational_detection_dataset)

# DataLoader
trainloader = torch.utils.data.DataLoader(rotnet_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=0)
detectionloader = torch.utils.data.DataLoader(detection_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


## Step 3: Instantiate your model and deploy it to GPU devices.


In [25]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Initialize the base encoder and projection head
base_encoder = ResNetEncoder()
projection_head = ProjectionHead(input_dim=2048, output_dim=4)

# Initialize the RotNet model
model = RotNet(base_encoder, projection_head)

# Move the model to the specified device
model = model.to(device)

# Verify the device
print(next(model.parameters()).device)


cuda:0


In [7]:
print(f"ResNet-50 size: {model_size_in_mib(model)} MiB")

ResNet-50 size: 89.91033172607422 MiB


## Step 4: Set up the loss function and optimizer


In [8]:
import torch.nn as nn
import torch.optim as optim

import torch.nn as nn
import torch.optim as optim

# Hyperparameters
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

# Initialize the NT-Xent loss (contrastive loss) for SimCLR
temperature = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()

# Add optimizer
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)


## Step 5: Start the training process.



In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
DECAY_EPOCHS = 10
DECAY = 0.9
current_learning_rate = INITIAL_LR
EPOCHS = 100
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model_rotnet"

# Training loop
start_time = time.time()
for epoch in range(EPOCHS):

    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     current_learning_rate *= DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = current_learning_rate
    #     print("Current learning rate has decayed to %f" % current_learning_rate)

    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(trainloader):

        optimizer.zero_grad()

        # Concatenate the pairs of images along the batch dimension
        image = image.to(device)
        label = label.to(device)

        ## Forward pass and calculate loss
        representations = model(image)

        # Calculate loss using both halves
        loss = criterion(representations, label)
        total_loss += loss.item()

        # Backward and optimize
        loss.backward()
        optimizer.step()



    avg_loss = total_loss / len(trainloader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time


    correct_examples = 0
    total_examples = 0
    val_loss = 0

    model.eval()


    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(detectionloader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(detectionloader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))


    # Save checkpoint
    if not os.path.exists(CHECKPOINT_FOLDER):
        os.makedirs(CHECKPOINT_FOLDER)
    torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'rotnet_epoch_%d.pth' % epoch))

print("=" * 50)
print("==> Training complete")

==> Training starts!
Epoch [1/100], Loss: 2.0426
Epoch [1/100], Time: 35.9546
Validation loss: 0.0349, Validation accuracy: 0.2588
Epoch [2/100], Loss: 1.3751
Epoch [2/100], Time: 34.1123
Validation loss: 0.0342, Validation accuracy: 0.2723
Epoch [3/100], Loss: 1.3663
Epoch [3/100], Time: 34.2305
Validation loss: 0.0348, Validation accuracy: 0.3007
Epoch [4/100], Loss: 1.3003
Epoch [4/100], Time: 35.8389
Validation loss: 0.0318, Validation accuracy: 0.4027
Epoch [5/100], Loss: 1.1651
Epoch [5/100], Time: 34.4657
Validation loss: 0.0276, Validation accuracy: 0.4699
Epoch [6/100], Loss: 1.1325
Epoch [6/100], Time: 34.4168
Validation loss: 0.0255, Validation accuracy: 0.5142
Epoch [7/100], Loss: 1.1107
Epoch [7/100], Time: 32.9174
Validation loss: 0.0232, Validation accuracy: 0.5119
Epoch [8/100], Loss: 1.1004
Epoch [8/100], Time: 33.1349
Validation loss: 0.0330, Validation accuracy: 0.5123
Epoch [9/100], Loss: 1.0851
Epoch [9/100], Time: 32.1909
Validation loss: 0.0298, Validation accura

In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model_rotnet/rotnet_epoch_99.pth'))

<All keys matched successfully>

# Step 6: Linear Evaluation

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model.projection_head = nn.Linear(2048, 10)
model = model.to(device)

for param in model.encoder.parameters():
    param.requires_grad = False

In [None]:
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

random.seed(0)
torch.manual_seed(0)

transform_classify = transforms.Compose(
                            [transforms.ToTensor(),
                             transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])



tra_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)
val_set = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform_classify)


classifier_raw_set_50000 = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)

size = 10000

indices = torch.randperm(len(classifier_raw_set_50000))[:size].tolist()
subset = torch.utils.data.Subset(classifier_raw_set_50000, indices)





normal_train_loader = DataLoader(
    subset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
normal_validate_loader = DataLoader(
    val_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

DECAY_EPOCHS = 8
DECAY = 0.75
current_learning_rate = INITIAL_LR
EPOCHS = 20
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model_rotnet"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    if epoch % DECAY_EPOCHS == 0 and epoch != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(normal_train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'retnet_linear_eval.pth'))

==> Training starts!
Epoch [1/20], Loss: 2.0997
Epoch [1/20], Time: 2.9236
Validation loss: 0.0536, Validation accuracy: 0.2120
Epoch [2/20], Loss: 2.0573
Epoch [2/20], Time: 5.5360
Validation loss: 0.0558, Validation accuracy: 0.2301
Epoch [3/20], Loss: 2.0440
Epoch [3/20], Time: 7.2666
Validation loss: 0.0569, Validation accuracy: 0.2323
Epoch [4/20], Loss: 2.0326
Epoch [4/20], Time: 5.3200
Validation loss: 0.0573, Validation accuracy: 0.2482
Epoch [5/20], Loss: 2.0014
Epoch [5/20], Time: 5.9454
Validation loss: 0.0533, Validation accuracy: 0.2333
Epoch [6/20], Loss: 2.0150
Epoch [6/20], Time: 7.5056
Validation loss: 0.0465, Validation accuracy: 0.2441
Epoch [7/20], Loss: 1.9967
Epoch [7/20], Time: 5.4371
Validation loss: 0.0445, Validation accuracy: 0.2557
Epoch [8/20], Loss: 1.9960
Epoch [8/20], Time: 7.2493
Validation loss: 0.0570, Validation accuracy: 0.2581
Current learning rate has decayed to 0.375000
Epoch [9/20], Loss: 1.9788
Epoch [9/20], Time: 5.4144
Validation loss: 0.0564

Validation loss: 0.0073, Validation accuracy: 0.7585 for 5000

Validation loss: 0.0027, Validation accuracy: 0.7719
for 7500
> 缩进块



# Step 7: Classifier Finetune

In [26]:
model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model_rotnet/rotnet_epoch_99.pth'))

<All keys matched successfully>

In [27]:
# Define the new classifier as a separate module
class RotNetClassifier(nn.Module):
    def __init__(self, input_features=2048, features=200, num_classes=10):
        super(RotNetClassifier, self).__init__()
        self.fc1 = nn.Linear(input_features, features)
        self.bn1 = nn.BatchNorm1d(features)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(features, features)
        self.bn2 = nn.BatchNorm1d(features)
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(features, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        return x

In [28]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

classifier = RotNetClassifier()

model.projection_head = classifier
model = model.to(device)

In [29]:
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

random.seed(0)
torch.manual_seed(0)

transform_classify = transforms.Compose(
                            [transforms.ToTensor(),
                             transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])



tra_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)
val_set = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform_classify)


classifier_raw_set_50000 = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)

size = 50000

indices = torch.randperm(len(classifier_raw_set_50000))[:size].tolist()
subset = torch.utils.data.Subset(classifier_raw_set_50000, indices)


normal_train_loader = DataLoader(
    subset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
normal_validate_loader = DataLoader(
    val_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [30]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

DECAY_EPOCHS = 8
DECAY = 0.75
current_learning_rate = INITIAL_LR
EPOCHS = 20
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model_rotnet"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    if epoch % DECAY_EPOCHS == 0 and epoch != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(normal_train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'ROTNET_with_decoder_size_%d.pth' % size))

==> Training starts!
Epoch [1/20], Loss: 1.3000
Epoch [1/20], Time: 20.3440
Validation loss: 0.0410, Validation accuracy: 0.6006
Epoch [2/20], Loss: 0.9170
Epoch [2/20], Time: 23.4999
Validation loss: 0.0358, Validation accuracy: 0.6231
Epoch [3/20], Loss: 0.8043
Epoch [3/20], Time: 21.8906
Validation loss: 0.0250, Validation accuracy: 0.6589
Epoch [4/20], Loss: 0.7452
Epoch [4/20], Time: 24.3264
Validation loss: 0.0202, Validation accuracy: 0.6612
Epoch [5/20], Loss: 0.6829
Epoch [5/20], Time: 22.7442
Validation loss: 0.0287, Validation accuracy: 0.6562
Epoch [6/20], Loss: 0.6428
Epoch [6/20], Time: 22.9242
Validation loss: 0.0199, Validation accuracy: 0.6938
Epoch [7/20], Loss: 0.6085
Epoch [7/20], Time: 22.5653
Validation loss: 0.0139, Validation accuracy: 0.7220
Epoch [8/20], Loss: 0.5802
Epoch [8/20], Time: 21.4286
Validation loss: 0.0256, Validation accuracy: 0.6896
Current learning rate has decayed to 0.375000
Epoch [9/20], Loss: 0.4678
Epoch [9/20], Time: 23.0314
Validation los