# Prepare Dataset

In [None]:
!pip install kaggle
!echo '{"username":"drdrshao","key":"4d9b39053b1a5d361c41dc5d14a5501d"}' > /root/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c cs480winter2022
!unzip cs480winter2022.zip

## Note:
1. The dataset is split as train and test set
2. In train dataset, there are 5 pics for each label
3. In test dataset, there are only pics waiting to be marked

# Import necessary libraries

In [3]:
import os
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.nn import functional as F
import torch.optim as optim

# Load dataset

In [4]:
from torchvision.transforms.transforms import RandomVerticalFlip
input_path = "5_shot/5_shot/"
normalize = transforms.Normalize(mean=[0.4205, 0.4369, 0.4446],
                  std=[0.1140, 0.1138, 0.1145])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
        
    ]),
    'test':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
    ])
}

image_datasets = {
    'train': 
    datasets.ImageFolder(input_path + 'train', data_transforms['train']),
}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                  batch_size=32,
                  shuffle=True,
                  num_workers=0)
}

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Model

In [6]:
model = models.resnet50(pretrained=True).to(device)
    
for param in model.parameters():
    param.requires_grad = False   
    
model.fc = nn.Sequential(
        nn.Linear(2048, 22)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters())

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [7]:
def train_model(model, criterion, optimizer, num_epochs=3):
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        for phase in ['train']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
    return model

In [8]:
model_trained = train_model(model, criterion, optimizer, num_epochs=140)

Epoch 1/140
----------
train loss: 3.1811, acc: 0.0545
Epoch 2/140
----------
train loss: 2.7226, acc: 0.1909
Epoch 3/140
----------
train loss: 2.3262, acc: 0.5727
Epoch 4/140
----------
train loss: 1.9675, acc: 0.7273
Epoch 5/140
----------
train loss: 1.5904, acc: 0.8909
Epoch 6/140
----------
train loss: 1.3332, acc: 0.9182
Epoch 7/140
----------
train loss: 1.0584, acc: 0.9636
Epoch 8/140
----------
train loss: 0.7901, acc: 0.9818
Epoch 9/140
----------
train loss: 0.6644, acc: 1.0000
Epoch 10/140
----------
train loss: 0.5385, acc: 0.9909
Epoch 11/140
----------
train loss: 0.4182, acc: 0.9818
Epoch 12/140
----------
train loss: 0.2875, acc: 1.0000
Epoch 13/140
----------
train loss: 0.2833, acc: 1.0000
Epoch 14/140
----------
train loss: 0.1921, acc: 1.0000
Epoch 15/140
----------
train loss: 0.1834, acc: 1.0000
Epoch 16/140
----------
train loss: 0.1439, acc: 1.0000
Epoch 17/140
----------
train loss: 0.1153, acc: 1.0000
Epoch 18/140
----------
train loss: 0.1099, acc: 1.0000
E

In [9]:
from PIL import Image
validation_img_paths = []
for i in range(517):
  validation_img_paths.append(input_path + 'test/{0}.jpg'.format(i))
img_list = [Image.open(img_path) for img_path in validation_img_paths]

validation_batch = torch.stack([data_transforms['test'](img).to(device)
                  for img in img_list])



In [10]:
pred_logits_tensor = model(validation_batch)
pred_logits_tensor

tensor([[ 6.1249, -3.8705, -1.5837,  ..., -1.5303, -1.6536, -2.7447],
        [ 9.5199, -1.8672, -3.7043,  ..., -1.8341, -3.4324, -4.6263],
        [ 7.5376, -4.2895, -0.5940,  ...,  0.2937, -0.2039, -4.6322],
        ...,
        [-3.3767,  0.7200, -6.8059,  ..., -3.7916, -6.3538, -3.2238],
        [-3.5396, -2.1822, -4.6488,  ..., -3.3869, -4.7890, -1.4765],
        [-2.3685, -0.7151, -7.5703,  ..., -2.4664, -3.9343, -3.1692]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [24]:
pred_probs = F.softmax(pred_logits_tensor, dim=1).cpu().data.numpy()

dic = {}
tmp = image_datasets['train'].class_to_idx
for i in tmp:
  dic[tmp[i]] = i
res = list(map(lambda x: int(dic[x]), pred_probs.argmax(axis=1)))
print(res)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 2, 2, 21, 2, 2, 2, 2, 21, 2, 2, 2, 2, 2, 21, 2, 16, 2, 2, 2, 5, 2, 12, 2, 2, 2, 2, 21, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 4, 4, 18, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 5, 5, 4, 18, 4, 4, 5, 5, 5, 8, 19, 5, 5, 5, 5, 5, 4, 5, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 10, 7, 6, 7, 7, 7, 7, 7, 16, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 16, 16, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1

In [25]:
# output
f = open('submission.csv', 'w')
f.write('id,category\n')
for i, c in enumerate(res):
  f.write(str(i)+','+str(c)+"\n")
f.close()

ans = []
#test for ground truth
f = open('ground_truth.csv', 'w')
f.write('id,category\n')
for i in range(28):
  f.write(str(i)+",0\n")
  ans.append(0)
for i in range(28, 55):
  f.write(str(i)+",1\n")
  ans.append(1)
for i in range(55, 84):
  f.write(str(i)+",2\n")
  ans.append(2)
for i in range(84, 101):
  f.write(str(i)+",3\n")
  ans.append(3)
for i in range(101, 130):
  f.write(str(i)+",4\n")
  ans.append(4)
for i in range(130, 147):
  f.write(str(i)+",5\n")
  ans.append(5)
for i in range(147, 171):
  f.write(str(i)+",6\n")
  ans.append(6)
for i in range(171, 196):
  f.write(str(i)+",7\n")
  ans.append(7)
for i in range(196, 223):
  f.write(str(i)+",8\n")
  ans.append(8)
for i in range(223, 242):
  f.write(str(i)+",9\n")
  ans.append(9)
for i in range(242, 263):
  f.write(str(i)+",10\n")
  ans.append(10)
for i in range(263, 290):
  f.write(str(i)+",11\n")
  ans.append(11)
for i in range(290, 307):
  f.write(str(i)+",12\n")
  ans.append(12)
for i in range(307, 327):
  f.write(str(i)+",13\n")
  ans.append(13)
for i in range(327, 352):
  f.write(str(i)+",14\n")
  ans.append(14)
for i in range(352, 377):
  f.write(str(i)+",15\n")
  ans.append(15)
for i in range(377, 398):
  f.write(str(i)+",16\n")
  ans.append(16)
for i in range(398, 427):
  f.write(str(i)+",17\n")
  ans.append(17)
for i in range(427, 452):
  f.write(str(i)+",18\n")
  ans.append(18)
for i in range(452, 469):
  f.write(str(i)+",19\n")
  ans.append(19)
for i in range(469, 492):
  f.write(str(i)+",20\n")
  ans.append(20)
for i in range(492, 517):
  f.write(str(i)+",21\n")
  ans.append(21)
f.close()

In [26]:
# Compare
corr = 0
for i in range(517):
  if ans[i] == res[i]:
    corr += 1
print(corr / 517)

0.851063829787234


## Submit

In [27]:
# UPLOAD DANGER!!
!kaggle competitions submit -c cs480winter2022 -f submission.csv -m "Final Attempt"

  0% 0.00/3.18k [00:00<?, ?B/s]100% 3.18k/3.18k [00:00<00:00, 16.1kB/s]
Successfully submitted to UW-CS480-winter22