https://github.com/sthalles/SimCLR/blob/simclr-refactor/feature_eval/mini_batch_logistic_regression_evaluator.ipynb

In [2]:
import torch
import sys
import numpy as np
import os
import matplotlib.pyplot as plt
import torchvision

In [3]:
!pip install gdown

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Collecting beautifulsoup4 (from gdown)
  Downloading beautifulsoup4-4.12.2-py3-none-any.whl (142 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.0/143.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting soupsieve>1.2 (from beautifulsoup4->gdown)
  Downloading soupsieve-2.4.1-py3-none-any.whl (36 kB)
Collecting PySocks!=1.5.7,>=1.5.6 (from requests[socks]->gdown)
  Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Installing collected packages: soupsieve, PySocks, beautifulsoup4, gdown
Successfully installed PySocks-1.7.1 beautifulsoup4-4.12.2 gdown-4.7.1 soupsieve-2.4.1


In [4]:
def get_file_id_by_model(folder_name):
  file_id = {'resnet18_100-epochs_stl10': '14_nH2FkyKbt61cieQDiSbBVNP8-gtwgF',
             'resnet18_100-epochs_cifar10': '1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C',
             'resnet50_50-epochs_stl10': '1ByTKAUsdm_X7tLcii6oAEl5qFRqRMZSu'}
  return file_id.get(folder_name, "Model not found.")

In [5]:
folder_name = 'resnet18_100-epochs_cifar10'
file_id = get_file_id_by_model(folder_name)
print(folder_name, file_id)

resnet18_100-epochs_cifar10 1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C


In [22]:
# download and extract model files
os.system('gdown https://drive.google.com/uc?id={}'.format(file_id))
os.system('mv {}.zip pretrained/'.format(folder_name))
os.system('unzip pretrained/{} -d pretrained/'.format(folder_name))

Downloading...
From (uriginal): https://drive.google.com/uc?id=1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C
From (redirected): https://drive.google.com/uc?id=1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C&confirm=t&uuid=876148ef-8213-469c-a29c-ffe5c75d1bf0
To: /home/josegfer/ggnn/resnet18_100-epochs_cifar10.zip
100%|██████████| 101M/101M [00:09<00:00, 11.2MB/s] 


Archive:  pretrained/resnet18_100-epochs_cifar10.zip
  inflating: pretrained/checkpoint_0100.pth.tar  
  inflating: pretrained/config.yml   
  inflating: pretrained/events.out.tfevents.1610901418.4cb2c837708d.2683796.0  
  inflating: pretrained/run.log      


0

In [23]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets

In [24]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

Using device: cuda


In [26]:
model = torchvision.models.resnet18(pretrained=False, num_classes=10).to(device)

In [28]:
checkpoint = torch.load('pretrained/checkpoint_0100.pth.tar', map_location=device)
state_dict = checkpoint['state_dict']

for k in list(state_dict.keys()):

  if k.startswith('backbone.'):
    if k.startswith('backbone') and not k.startswith('backbone.fc'):
      # remove prefix
      state_dict[k[len("backbone."):]] = state_dict[k]
  del state_dict[k]

In [29]:
log = model.load_state_dict(state_dict, strict=False)
assert log.missing_keys == ['fc.weight', 'fc.bias']

In [32]:
BATCH_SIZE = 256

In [33]:
train_dataset = datasets.CIFAR10('./data', train=True, download=True,
                                transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                        num_workers=0, drop_last=False, shuffle=True)

test_dataset = datasets.CIFAR10('./data', train=False, download=True,
                                transform=transforms.ToTensor())

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                        num_workers=10, drop_last=False, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [34]:
# freeze all layers but the last fc
for name, param in model.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
assert len(parameters) == 2  # fc.weight, fc.bias

In [35]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=0.0008)
criterion = torch.nn.CrossEntropyLoss().to(device)

In [36]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [37]:
epochs = 100
for epoch in range(epochs):
  top1_train_accuracy = 0
  for counter, (x_batch, y_batch) in enumerate(train_loader):
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    logits = model(x_batch)
    loss = criterion(logits, y_batch)
    
    top1 = accuracy(logits, y_batch, topk=(1,))
    top1_train_accuracy += top1[0]

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  top1_train_accuracy /= (counter + 1)
  top1_accuracy = 0
  top5_accuracy = 0
  for counter, (x_batch, y_batch) in enumerate(test_loader):
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    logits = model(x_batch)
  
    top1, top5 = accuracy(logits, y_batch, topk=(1,5))
    top1_accuracy += top1[0]
    top5_accuracy += top5[0]
  
  top1_accuracy /= (counter + 1)
  top5_accuracy /= (counter + 1)
  print(f"Epoch {epoch}\tTop1 Train accuracy {top1_train_accuracy.item()}\tTop1 Test accuracy: {top1_accuracy.item()}\tTop5 test acc: {top5_accuracy.item()}")

Epoch 0	Top1 Train accuracy 48.626033782958984	Top1 Test accuracy: 56.73828125	Top5 test acc: 95.048828125
Epoch 1	Top1 Train accuracy 58.77471160888672	Top1 Test accuracy: 59.013671875	Top5 test acc: 95.48828125
Epoch 2	Top1 Train accuracy 60.48469161987305	Top1 Test accuracy: 60.341796875	Top5 test acc: 95.8203125
Epoch 3	Top1 Train accuracy 61.465240478515625	Top1 Test accuracy: 61.005859375	Top5 test acc: 96.064453125
Epoch 4	Top1 Train accuracy 62.026065826416016	Top1 Test accuracy: 61.54296875	Top5 test acc: 96.03515625
Epoch 5	Top1 Train accuracy 62.578125	Top1 Test accuracy: 62.001953125	Top5 test acc: 96.19140625
Epoch 6	Top1 Train accuracy 63.00502014160156	Top1 Test accuracy: 62.177734375	Top5 test acc: 96.25
Epoch 7	Top1 Train accuracy 63.3729248046875	Top1 Test accuracy: 62.587890625	Top5 test acc: 96.171875
Epoch 8	Top1 Train accuracy 63.54153060913086	Top1 Test accuracy: 62.5390625	Top5 test acc: 96.2890625
Epoch 9	Top1 Train accuracy 63.81496810913086	Top1 Test accuracy

In [58]:
train_loader = DataLoader(train_dataset, batch_size = train_dataset.__len__() // 5, shuffle = False)
test_loader = DataLoader(test_dataset, batch_size = test_dataset.__len__(), shuffle = False)

In [59]:
yhat = torch.tensor([])
model.eval()
with torch.no_grad():
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        yhat = torch.cat((yhat, logits.cpu()), 0)
torch.save(yhat, 'data/yhat_train.pt')

In [54]:
model.eval()
with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        yhat = model(x)
torch.save(yhat.cpu(), 'data/yhat_test.pt')

In [56]:
x.shape, y.shape, yhat.shape

(torch.Size([50000, 3, 32, 32]), torch.Size([50000]), torch.Size([10000, 10]))