In [2]:
from google.colab import drive
drive.mount('/content/drive')

FOLDERNAME = 'learn'
assert FOLDERNAME is not None, "[!] Enter the foldername."


import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive/$FOLDERNAME/datasets/
!bash get_datasets.sh
%cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/learn/datasets
/content/drive/My Drive/learn


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import numpy as np

USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

print_every = 100
print('using device:', device)

using device: cuda


In [4]:

NUM_TRAIN = 49000

transform = T.Compose([
              T.ToTensor(),
              T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
          ])

cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size = 64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_val = DataLoader(cifar10_train, batch_size = 64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./datasets', train=False, download=True,
                             transform=transform)
loader_test = DataLoader(cifar10_test, batch_size = 64)

In [18]:
NUM_TINY_TRAIN = 5000
NUM_TINY_VAL = 5200
loader_train_tiny = DataLoader(cifar10_train, batch_size = 64,
                               sampler=sampler.SubsetRandomSampler(range(NUM_TINY_TRAIN)))
loader_val_tiny = DataLoader(cifar10_train, batch_size = 64,
                             sampler=sampler.SubsetRandomSampler(range(NUM_TINY_TRAIN, NUM_TINY_VAL)))

In [4]:
def flatten(x):
  N = x.shape[0]
  return x.view(N, -1)

In [14]:
def check_accuracy(loader, model):
  if loader.dataset.train:
      print('Checking accuracy on validation set')
  else:
      print('Checking accuracy on test set')

  num_correct = 0
  num_samples = 0
  model.eval()
  with torch.no_grad():
      for x, y in loader:
          x = x.to(device=device, dtype=dtype)
          y = y.to(device=device, dtype=torch.long)
          scores = model(x)
          _, preds = scores.max(1)
          num_correct += (preds==y).sum()
          num_samples += preds.size(0)
      acc = float(num_correct) / num_samples
      print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100*acc))

In [25]:
def train(model, params, epochs=1):
  model = model.to(device=device)
  for e in range(epochs):
      print(f"epoch {e} / {epochs}:")
      for t, (x, y) in enumerate(loader_train):
          model.train()
          x = x.to(device=device, dtype=dtype)
          y = y.to(device=device, dtype=torch.long)

          scores = model(x)
          loss = F.cross_entropy(scores, y)

          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          if t % print_every == 0:
            print('Iteration %d, loss = %.4f' % (t, loss.item()))
            check_accuracy(loader_val, model)
            print()

In [10]:
loader_train_tiny

<torch.utils.data.dataloader.DataLoader at 0x7a5b585025a0>

In [22]:
torch.manual_seed(42)

class DeepNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super().__init__()
    self.w1 = nn.Linear(input_size, hidden_size[0])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w2 = nn.Linear(hidden_size[0], hidden_size[1])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w3 = nn.Linear(hidden_size[1], hidden_size[2])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w4 = nn.Linear(hidden_size[2], hidden_size[3])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w5 = nn.Linear(hidden_size[3], hidden_size[4])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w6 = nn.Linear(hidden_size[4], num_classes)

  def forward(self, x):
    x = flatten(x)
    x = F.relu(self.w1(x))
    x = F.relu(self.w2(x))
    x = F.relu(self.w3(x))
    x = F.relu(self.w4(x))
    x = F.relu(self.w5(x))
    scores = self.w6(x)
    return scores

def testDeepNet():
  input_size = 50
  hidden_size = [100, 100, 100, 100, 100]
  x = torch.zeros((64, input_size), dtype=dtype)
  model = DeepNet(input_size, hidden_size, 10)
  scores = model(x)
  print(scores.size()) #[64, 10]
testDeepNet()


torch.Size([64, 10])


In [25]:
learning_rate = 9e-4
hidden_size = [256, 256, 128, 64, 32]

model = DeepNet(3 * 32 * 32, hidden_size, 10)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

Iteration 0, loss = 2.3208
Checking accuracy on validation set
Got 79 / 1000 correct (7.90)

Iteration 100, loss = 1.8002
Checking accuracy on validation set
Got 335 / 1000 correct (33.50)

Iteration 200, loss = 1.7219
Checking accuracy on validation set
Got 363 / 1000 correct (36.30)

Iteration 300, loss = 1.7700
Checking accuracy on validation set
Got 404 / 1000 correct (40.40)

Iteration 400, loss = 1.7041
Checking accuracy on validation set
Got 441 / 1000 correct (44.10)

Iteration 500, loss = 1.5875
Checking accuracy on validation set
Got 436 / 1000 correct (43.60)

Iteration 600, loss = 1.6532
Checking accuracy on validation set
Got 431 / 1000 correct (43.10)

Iteration 700, loss = 1.5248
Checking accuracy on validation set
Got 473 / 1000 correct (47.30)



In [27]:
torch.manual_seed(42)

class ResNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super().__init__()
    self.w1 = nn.Linear(input_size, hidden_size[0])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w2 = nn.Linear(hidden_size[0], hidden_size[1])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w3 = nn.Linear(hidden_size[1], hidden_size[2])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w4 = nn.Linear(hidden_size[2], hidden_size[3])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w5 = nn.Linear(hidden_size[3], hidden_size[4])
    nn.init.kaiming_normal_(self.w1.weight)
    self.w6 = nn.Linear(hidden_size[4], num_classes)

  def forward(self, x):
    x = flatten(x)
    x = F.relu(self.w1(x)) + self.w1(x)
    x = F.relu(self.w2(x)) + self.w2(x)
    x = F.relu(self.w3(x)) + self.w3(x)
    x = F.relu(self.w4(x)) + self.w4(x)
    x = F.relu(self.w5(x)) + self.w5(x)
    scores = self.w6(x) + self.w6(x)
    return scores

def testResNet():
  input_size = 50
  hidden_size = [100, 100, 100, 100, 100]
  x = torch.zeros((64, input_size), dtype=dtype)
  model = DeepNet(input_size, hidden_size, 10)
  scores = model(x)
  print(scores.size()) #[64, 10]
testDeepNet()


torch.Size([64, 10])


In [29]:
learning_rate = 9e-4
hidden_size = [256, 256, 128, 64, 32]

model = ResNet(3 * 32 * 32, hidden_size, 10)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

Iteration 0, loss = 3.4114
Checking accuracy on validation set
Got 170 / 1000 correct (17.00)

Iteration 100, loss = 1.7818
Checking accuracy on validation set
Got 352 / 1000 correct (35.20)

Iteration 200, loss = 2.1045
Checking accuracy on validation set
Got 370 / 1000 correct (37.00)

Iteration 300, loss = 1.8096
Checking accuracy on validation set
Got 371 / 1000 correct (37.10)

Iteration 400, loss = 1.5503
Checking accuracy on validation set
Got 397 / 1000 correct (39.70)

Iteration 500, loss = 1.6976
Checking accuracy on validation set
Got 391 / 1000 correct (39.10)

Iteration 600, loss = 1.7590
Checking accuracy on validation set
Got 411 / 1000 correct (41.10)

Iteration 700, loss = 1.5825
Checking accuracy on validation set
Got 435 / 1000 correct (43.50)



In [33]:
class ResNet2(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
       super().__init__()
       self.conv1 = nn.Conv2d(in_channel, channel_1, 5, padding=2, bias=True)
       self.conv2 = nn.Conv2d(channel_1, channel_2, 3, padding=1, bias=True)
       self.proj1 = nn.Conv2d(in_channel, channel_1, 5, padding=2, bias=False)
       self.proj2 = nn.Conv2d(channel_1, channel_2, 3, padding=1, bias=False)
       self.fc = nn.Linear(channel_2 * 32 * 32, num_classes, bias=False)
       nn.init.kaiming_normal_(self.conv1.weight)
       nn.init.kaiming_normal_(self.conv2.weight)
       nn.init.kaiming_normal_(self.fc.weight)

    def forward(self, x):
      x = self.conv2(F.relu(self.conv1(x))) + self.proj2(self.proj1(x))
      x = F.relu(x)
      x = flatten(x)
      scores = self.fc(x)
      return scores

def testResNet2():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)
    model = ResNet2(3, 12, 8, 10)
    scores = model(x)
    print(scores.size())

testResNet2()

torch.Size([64, 10])


In [37]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16

model = ResNet2(3, channel_1, channel_2, 10)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

Iteration 0, loss = 3.9482
Checking accuracy on validation set
Got 86 / 1000 correct (8.60)

Iteration 100, loss = 1.7070
Checking accuracy on validation set
Got 330 / 1000 correct (33.00)

Iteration 200, loss = 1.8507
Checking accuracy on validation set
Got 377 / 1000 correct (37.70)

Iteration 300, loss = 1.7634
Checking accuracy on validation set
Got 412 / 1000 correct (41.20)

Iteration 400, loss = 1.7366
Checking accuracy on validation set
Got 426 / 1000 correct (42.60)

Iteration 500, loss = 1.5221
Checking accuracy on validation set
Got 466 / 1000 correct (46.60)

Iteration 600, loss = 1.5241
Checking accuracy on validation set
Got 459 / 1000 correct (45.90)

Iteration 700, loss = 1.5697
Checking accuracy on validation set
Got 372 / 1000 correct (37.20)



In [34]:

class ThreeLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
       super().__init__()
       self.conv1 = nn.Conv2d(in_channel, channel_1, 5, padding=2, bias=True)
       self.conv2 = nn.Conv2d(channel_1, channel_2, 3, padding=1, bias=True)
       self.fc = nn.Linear(channel_2 * 32 * 32, num_classes, bias=False)
       nn.init.kaiming_normal_(self.conv1.weight)
       nn.init.kaiming_normal_(self.conv2.weight)
       nn.init.kaiming_normal_(self.fc.weight)

    def forward(self, x):
      x = F.relu(self.conv1(x))
      x = F.relu(self.conv2(x))
      x = flatten(x)
      scores = self.fc(x)
      return scores

def test_ThreadLayerConvNet():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)
    model = ThreeLayerConvNet(3, 12, 8, 10)
    scores = model(x)
    print(scores.size())

test_ThreadLayerConvNet()

torch.Size([64, 10])


In [36]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16

model = ThreeLayerConvNet(3, channel_1, channel_2, 10)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

Iteration 0, loss = 3.0150
Checking accuracy on validation set
Got 109 / 1000 correct (10.90)

Iteration 100, loss = 2.2864
Checking accuracy on validation set
Got 145 / 1000 correct (14.50)

Iteration 200, loss = 2.3062
Checking accuracy on validation set
Got 137 / 1000 correct (13.70)

Iteration 300, loss = 2.1634
Checking accuracy on validation set
Got 231 / 1000 correct (23.10)

Iteration 400, loss = 2.0540
Checking accuracy on validation set
Got 263 / 1000 correct (26.30)

Iteration 500, loss = 1.8451
Checking accuracy on validation set
Got 285 / 1000 correct (28.50)

Iteration 600, loss = 1.8052
Checking accuracy on validation set
Got 354 / 1000 correct (35.40)

Iteration 700, loss = 1.6616
Checking accuracy on validation set
Got 382 / 1000 correct (38.20)



In [11]:
class Block(nn.Module):
  def __init__(self, in_channel, out_channel, stride=1):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3,
                           stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(out_channel)
    self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(out_channel)
    self.shortcut = nn.Identity()
    if stride != 1 or in_channel != out_channel:
      self.shortcut = nn.Sequential(nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, bias=False),
                                    nn.BatchNorm2d(out_channel))

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

def testblock():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)  # minibatch size 64, image size [3, 32, 32]
    model = Block(in_channel=3, out_channel=12, stride=1)
    scores = model(x)
    print(scores.size())
testblock()

torch.Size([64, 12, 32, 32])


In [12]:
class ResNet3(nn.Module):
  def __init__(self, block):
     super().__init__()
     self.in_channel=64
     self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
     self.bn1= nn.BatchNorm2d(64)
     self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
     self.layer1 = self._make_block(block, 64, num_blocks[0], stride=1)
     self.layer2 = self._make_block(block, 128, num_blocks[1], stride=2)
     self.layer3 = self._make_block(block, 256, num_blocks[2], stride=2)
     self.layer4 = self._make_block(block, 512, num_blocks[3], stride=2)
     self.avgpool = nn.AdaptiveAvgPool2d((1,1))
     self.fc = nn.Linear(512, num_classes)

  def _make_block(self, block, out_channel, blocks, stride):
    layers = []
    layers.append(block(self.in_channel, out_channel, stride))
    self.in_channel = out_channel
    for _ in range(1, blocks):
      layers.append(block(out_channel, out_channel))
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.maxpool(out)
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = self.avgpool(out)
    out = torch.flatten(out, 1)
    out = self.fc(out)
    return out


num_classes=10
num_blocks = [3, 4, 6, 3]
def testResNet3():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)
    model = ResNet3(Block)
    scores = model(x)
    print(scores.size())

testResNet3() # [64, 10]


torch.Size([64, 10])


In [26]:
learning_rate = 2e-4
num_classes=10
num_blocks = [3, 4, 6, 3]
model = ResNet3(Block).to(device)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

print_every = 200
train(model, optimizer, epochs=5)

epoch 0 / 5:
Iteration 0, loss = 2.5820
Checking accuracy on validation set
Got 112 / 1000 correct (11.20)

Iteration 200, loss = 1.4906
Checking accuracy on validation set
Got 456 / 1000 correct (45.60)

Iteration 400, loss = 1.2746
Checking accuracy on validation set
Got 510 / 1000 correct (51.00)

Iteration 600, loss = 1.4656
Checking accuracy on validation set
Got 539 / 1000 correct (53.90)

epoch 1 / 5:
Iteration 0, loss = 0.9280
Checking accuracy on validation set
Got 575 / 1000 correct (57.50)

Iteration 200, loss = 1.2993
Checking accuracy on validation set
Got 613 / 1000 correct (61.30)

Iteration 400, loss = 1.2437
Checking accuracy on validation set
Got 604 / 1000 correct (60.40)

Iteration 600, loss = 1.1063
Checking accuracy on validation set
Got 676 / 1000 correct (67.60)

epoch 2 / 5:
Iteration 0, loss = 0.7674
Checking accuracy on validation set
Got 652 / 1000 correct (65.20)

Iteration 200, loss = 0.7208
Checking accuracy on validation set
Got 640 / 1000 correct (64.00