## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [43]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir("/content/drive/MyDrive/ece661/final_project/")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
import torchvision.models as models
import torch.nn as nn

def modify_resnet50_for_cifar10(model):
    # Replace the first convolutional layer
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    # Remove the first max pooling layer
    model.maxpool = nn.Identity()
    return model

base_encoder = models.resnet50(pretrained=False)
base_encoder = modify_resnet50_for_cifar10(base_encoder)
base_encoder = nn.Sequential(*list(base_encoder.children())[:-1])

# Define the projection head
class ProjectionHead(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ProjectionHead, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Assuming output of ResNet50 is 2048, hidden layer of 512, and output of 128
projection_head = ProjectionHead(input_dim=2048, hidden_dim=512, output_dim=128)

# Define the SimCLR model
class SimCLR(nn.Module):
    def __init__(self, encoder, projection_head):
        super(SimCLR, self).__init__()
        self.encoder = encoder
        self.projection_head = projection_head

    def forward(self, x):
        x = self.encoder(x)
        x = torch.flatten(x, start_dim=1)  # Flatten the output
        x = self.projection_head(x)
        return x

# Initialize the model
model = SimCLR(base_encoder, projection_head)


In [None]:
print("base_encoder:", base_encoder)

base_encoder: Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Identity()
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=

In [45]:
class NTXentLoss(torch.nn.Module):
    def __init__(self, temperature, device):
        super(NTXentLoss, self).__init__()
        self.temperature = temperature
        self.device = device
        self.cossim = torch.nn.CosineSimilarity(dim=2)

    def forward(self, z_i, z_j):
        N = 2 * z_i.size(0)  # Dynamically calculate based on input size
        z = torch.cat((z_i, z_j), dim=0)

        sim = self.cossim(z.unsqueeze(1), z.unsqueeze(0)) / self.temperature
        sim_i_j = torch.diag(sim, z_i.size(0))
        sim_j_i = torch.diag(sim, -z_i.size(0))

        # Ensure the mask is created based on the dynamic size
        mask = torch.ones((N, N), dtype=bool, device=self.device)
        mask = mask.fill_diagonal_(0)
        for i in range(z_i.size(0)):
            mask[i, z_i.size(0) + i] = 0
            mask[z_i.size(0) + i, i] = 0

        positive_samples = torch.cat((sim_i_j, sim_j_i), dim=0).reshape(N, 1)
        negative_samples = sim[mask].reshape(N, -1)

        labels = torch.zeros(N, device=self.device).long()
        logits = torch.cat((positive_samples, negative_samples), dim=1)
        loss = F.cross_entropy(logits, labels)
        return loss


## Step 1: Set up preprocessing functions
Preprocessing is very important as discussed in the lecture.
You will need to write preprocessing functions with the help of *torchvision.transforms* in this step.
You can find helpful tutorial/API at [here](https://pytorch.org/vision/stable/transforms.html).

## Step 2: Set up dataset and dataloader



In [46]:
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

class SimCLRDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]

        # Apply the transformation to get two augmented versions of the same image
        if self.transform:
            image1 = self.transform(image)
            image2 = self.transform(image)

        return image1, image2

In [47]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

# Data transformation
mean = (0.4914, 0.4822, 0.4465)
std = (0.2470, 0.2435, 0.2616)

from torchvision import transforms

def get_color_distortion(s=1.0):
    # s is the strength of color distortion.
    color_jitter = transforms.ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.2*s)
    rnd_color_jitter = transforms.RandomApply([color_jitter], p=0.8)
    rnd_gray = transforms.RandomGrayscale(p=0.2)
    color_distort = transforms.Compose([
        rnd_color_jitter,
        rnd_gray
    ])
    return color_distort

# Data transformation for CIFAR-10
transform_train = transforms.Compose([
    # Inception-style crop and resize to 32x32
    transforms.RandomResizedCrop(32, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)),
    transforms.RandomHorizontalFlip(),
    get_color_distortion(s=0.5),  # Color distortion with strength 0.5
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


transform_classify = transforms.Compose(
                            [transforms.ToTensor(),
                             transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])

# Load CIFAR10 dataset
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256

train_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=None)
train_dataset = SimCLRDataset(train_set, transform=transform_train)

tra_set = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform_classify)
val_set = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform_classify)

# DataLoader for SimCLR
train_loader = DataLoader(
    train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


## Step 3: Instantiate your model and deploy it to GPU devices.


In [48]:
# specify the device for computation
#############################################
# Determine if a GPU is available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = model.to(device)

next(model.parameters()).device

#############################################

device(type='cuda', index=0)

## Step 4: Set up the loss function and optimizer


In [49]:
import torch.nn as nn
import torch.optim as optim

import torch.nn as nn
import torch.optim as optim

# Hyperparameters
INITIAL_LR = 0.5
MOMENTUM = 0.9
REG = 1e-4

# Initialize the NT-Xent loss (contrastive loss) for SimCLR
temperature = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = NTXentLoss(temperature, device)

# Add optimizer
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)


## Step 5: Start the training process.



In [None]:
import torch.nn as nn
import torch.optim as optim

model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model/simclr_epoch_59.pth'))

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
current_learning_rate = INITIAL_LR
EPOCHS = 40
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model"

# Training loop
for epoch in range(60, 60+EPOCHS):
    model.train()
    total_loss = 0

    for batch_idx, (image1, image2) in enumerate(train_loader):
        # Check if image1 and image2 have the same dimensions
        if image1.dim() != 4 or image2.dim() != 4:
            raise ValueError(f"Expected images to have 4 dimensions. Got {image1.dim()} and {image2.dim()}")

        optimizer.zero_grad()

        image1 = image1.to(device)
        image2 = image2.to(device)

        # Concatenate the pairs of images along the batch dimension
        concatenated_images = torch.cat((image1, image2), dim=0)

        ## Forward pass and calculate loss
        representations = model(concatenated_images)

        # Split the representations into two halves
        z_i, z_j = torch.split(representations, representations.shape[0] // 2, dim=0)

        # Calculate loss using both halves
        loss = criterion(z_i, z_j)
        total_loss += loss.item()

        # Backward and optimize
        loss.backward()
        optimizer.step()



    avg_loss = total_loss / len(train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS+60, avg_loss))

    # Save checkpoint
    if not os.path.exists(CHECKPOINT_FOLDER):
        os.makedirs(CHECKPOINT_FOLDER)
    torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'simclr_epoch_%d.pth' % epoch))

print("=" * 50)
print("==> Training complete")

==> Training starts!
Epoch [61/100], Loss: 4.8436
Epoch [62/100], Loss: 4.8421
Epoch [63/100], Loss: 4.8464
Epoch [64/100], Loss: 4.8400
Epoch [65/100], Loss: 4.8376
Epoch [66/100], Loss: 4.8331
Epoch [67/100], Loss: 4.8341
Epoch [68/100], Loss: 4.8372
Epoch [69/100], Loss: 4.8345
Epoch [70/100], Loss: 4.8340
Epoch [71/100], Loss: 4.8383
Epoch [72/100], Loss: 4.8369
Epoch [73/100], Loss: 4.8292
Epoch [74/100], Loss: 4.8314
Epoch [75/100], Loss: 4.8276
Epoch [76/100], Loss: 4.8316
Epoch [77/100], Loss: 4.8285
Epoch [78/100], Loss: 4.8332
Epoch [79/100], Loss: 4.8288
Epoch [80/100], Loss: 4.8228
Epoch [81/100], Loss: 4.8290
Epoch [82/100], Loss: 4.8207
Epoch [83/100], Loss: 4.8250
Epoch [84/100], Loss: 4.8258
Epoch [85/100], Loss: 4.8219
Epoch [86/100], Loss: 4.8213
Epoch [87/100], Loss: 4.8227
Epoch [88/100], Loss: 4.8195
Epoch [89/100], Loss: 4.8183
Epoch [90/100], Loss: 4.8199
Epoch [91/100], Loss: 4.8221
Epoch [92/100], Loss: 4.8163
Epoch [93/100], Loss: 4.8190
Epoch [94/100], Loss: 

In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model/simclr_epoch_99.pth'))

<All keys matched successfully>

In [None]:
model.projection_head = nn.Linear(2048, 10)
model = model.to(device)

In [39]:

normal_train_loader = DataLoader(
    tra_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
normal_validate_loader = DataLoader(
    val_set,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


In [None]:
import torch.nn as nn
import torch.optim as optim

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
DECAY_EPOCHS = 5
DECAY = 0.8
current_learning_rate = INITIAL_LR
EPOCHS = 20
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, 20):

    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     current_learning_rate *= DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = current_learning_rate
    #     print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):
        # Check if image1 and image2 have the same dimensions
        # if image1.dim() != 4 or image2.dim() != 4:
        #     raise ValueError(f"Expected images to have 4 dimensions. Got {image1.dim()} and {image2.dim()}")

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'simclr_with_decoder_epoch_%d.pth' % epoch))

==> Training starts!
Epoch [1/20], Loss: 0.7591
Epoch [1/20], Time: 160.9204
Validation loss: 0.0188, Validation accuracy: 0.7111
Epoch [2/20], Loss: 0.5068
Epoch [2/20], Time: 166.8976
Validation loss: 0.0407, Validation accuracy: 0.7356
Epoch [3/20], Loss: 0.4046
Epoch [3/20], Time: 167.5639
Validation loss: 0.0123, Validation accuracy: 0.7821
Epoch [4/20], Loss: 0.3500
Epoch [4/20], Time: 167.6127
Validation loss: 0.0343, Validation accuracy: 0.7460
Epoch [5/20], Loss: 0.3000
Epoch [5/20], Time: 167.5665
Validation loss: 0.0357, Validation accuracy: 0.7798
Epoch [6/20], Loss: 0.2679
Epoch [6/20], Time: 167.6067
Validation loss: 0.0141, Validation accuracy: 0.7672
Epoch [7/20], Loss: 0.2440
Epoch [7/20], Time: 166.7209
Validation loss: 0.0477, Validation accuracy: 0.7554
Epoch [8/20], Loss: 0.2258
Epoch [8/20], Time: 166.4949
Validation loss: 0.0143, Validation accuracy: 0.8230
Epoch [9/20], Loss: 0.2045
Epoch [9/20], Time: 167.4019
Validation loss: 0.0071, Validation accuracy: 0.828

In [None]:
# sample_sizes = [2500, 5000, 7500, 10000]
# sample_sizes = [500, 1000, 2000, 3000, 4000]
sample_size = 50000
model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model/simclr_epoch_99.pth'))

model.projection_head = nn.Linear(2048, 10)
model = model.to(device)

In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset


# 选择子集
subset_indices = torch.randperm(len(tra_set))[:sample_size]
train_subset = Subset(tra_set, subset_indices)

# 创建新的DataLoader
normal_train_loader_splite = DataLoader(
    train_subset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=4
)


print(f"==> Training with {sample_size} samples starts!")
# print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
DECAY_EPOCHS = 5
DECAY = 0.8
current_learning_rate = INITIAL_LR
EPOCHS = 20
CHECKPOINT_FOLDER = "/content/drive/MyDrive/ece661/final_project/saved_model"

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     current_learning_rate *= DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = current_learning_rate
    #     print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader_splite):
        # Check if image1 and image2 have the same dimensions
        # if image1.dim() != 4 or image2.dim() != 4:
        #     raise ValueError(f"Expected images to have 4 dimensions. Got {image1.dim()} and {image2.dim()}")

        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



    print("=" * 50)

# Save checkpoint
if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
torch.save(model.state_dict(), os.path.join(CHECKPOINT_FOLDER, 'New_simclr_with_decoder_epoch_%d_size_%d.pth' % (20, sample_size)))
print(f"==> Training with {sample_size} samples complete")

==> Training with 50000 samples starts!
Epoch [1/20], Loss: 0.7550
Epoch [1/20], Time: 154.1889
Validation loss: 0.0403, Validation accuracy: 0.6672
Epoch [2/20], Loss: 0.4984
Epoch [2/20], Time: 171.7484
Validation loss: 0.0120, Validation accuracy: 0.7618
Epoch [3/20], Loss: 0.4083
Epoch [3/20], Time: 171.8981
Validation loss: 0.0075, Validation accuracy: 0.7626
Epoch [4/20], Loss: 0.3430
Epoch [4/20], Time: 172.6007
Validation loss: 0.0084, Validation accuracy: 0.8198
Epoch [5/20], Loss: 0.2983
Epoch [5/20], Time: 171.5429
Validation loss: 0.0238, Validation accuracy: 0.7361
Epoch [6/20], Loss: 0.2671
Epoch [6/20], Time: 172.0915
Validation loss: 0.0201, Validation accuracy: 0.7719
Epoch [7/20], Loss: 0.2332
Epoch [7/20], Time: 173.0508
Validation loss: 0.0419, Validation accuracy: 0.7385
Epoch [8/20], Loss: 0.2139
Epoch [8/20], Time: 171.5201
Validation loss: 0.0291, Validation accuracy: 0.7696
Epoch [9/20], Loss: 0.1925
Epoch [9/20], Time: 171.7919
Validation loss: 0.0241, Validat

In [50]:
sample_sizes = 10000
model.load_state_dict(torch.load('/content/drive/MyDrive/ece661/final_project/saved_model/simclr_epoch_99.pth'))

model.projection_head = nn.Linear(2048, 10)
model = model.to(device)

In [51]:
for param in model.encoder.parameters():
    param.requires_grad = False


In [52]:
import torch.nn as nn
import torch.optim as optim

# Uncomment when continuing...
# model.load_state_dict(torch.load('./trained_SIMCLR_model_scripted_classifier.pt'))
model = model.to(device)

# initial learning rate
INITIAL_LR = 0.01
# momentum for optimizer
MOMENTUM = 0.9
# L2 regularization strength
REG = 0

print("==> Training starts!")
print("=" * 50)

# Learning rate decay setup
DECAY_EPOCHS = 7
DECAY = 0.9
current_learning_rate = INITIAL_LR
EPOCHS = 20

# Training loop
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)
for epoch in range(0, EPOCHS):

    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     current_learning_rate *= DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = current_learning_rate
    #     print("Current learning rate has decayed to %f" % current_learning_rate)


    model.train()
    total_loss = 0

    for batch_idx, (image, label) in enumerate(normal_train_loader):
        # Check if image1 and image2 have the same dimensions
        image = image.to(device)
        label = label.to(device)

        # compute the output and loss
        output = model.forward(image)
        loss = criterion(output, label)
        train_loss = loss.detach()

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        total_loss += train_loss


    avg_loss = total_loss / len(normal_train_loader)
    print("Epoch [%d/%d], Loss: %.4f" % (epoch + 1, EPOCHS, avg_loss))
    end_time = time.time()
    print("Epoch [%d/%d], Time: %.4f" % (epoch + 1, EPOCHS, end_time-start_time))
    start_time = end_time

    model.eval()
    correct_examples = 0
    total_examples = 0
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(normal_validate_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute the output and loss
            output = model.forward(inputs)
            loss = criterion(output, targets)
            val_loss = loss.detach()

            # count the number of correctly predicted samples in the current batch
            _, predicted_results = torch.max(output, 1)
            correct_predictions = (predicted_results == targets)
            correct_examples += correct_predictions.sum().item()
            total_examples += len(targets)
            ####################################

    avg_loss = val_loss / len(normal_validate_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))



print("=" * 50)
print("==> Training complete")

# Save checkpoint
torch.save(model.state_dict(), '/content/drive/MyDrive/ece661/final_project/saved_model/trained_SIMCLR_classifier_linear_eval.pt')

==> Training starts!
Epoch [1/20], Loss: 1.5312
Epoch [1/20], Time: 49.6071
Validation loss: 0.0245, Validation accuracy: 0.6682
Epoch [2/20], Loss: 1.0794
Epoch [2/20], Time: 56.8878
Validation loss: 0.0284, Validation accuracy: 0.6799
Epoch [3/20], Loss: 0.9815
Epoch [3/20], Time: 57.5189
Validation loss: 0.0279, Validation accuracy: 0.6895
Epoch [4/20], Loss: 0.9345
Epoch [4/20], Time: 57.4437
Validation loss: 0.0154, Validation accuracy: 0.6949
Epoch [5/20], Loss: 0.9053
Epoch [5/20], Time: 57.4022
Validation loss: 0.0345, Validation accuracy: 0.7000
Epoch [6/20], Loss: 0.8860
Epoch [6/20], Time: 57.0577
Validation loss: 0.0287, Validation accuracy: 0.7013
Epoch [7/20], Loss: 0.8710
Epoch [7/20], Time: 57.3084
Validation loss: 0.0193, Validation accuracy: 0.7072
Epoch [8/20], Loss: 0.8576
Epoch [8/20], Time: 57.3659
Validation loss: 0.0249, Validation accuracy: 0.7067
Epoch [9/20], Loss: 0.8467
Epoch [9/20], Time: 57.3925
Validation loss: 0.0159, Validation accuracy: 0.7090
Epoch [