In [1]:
from google.colab import drive
drive.mount('/content/drive')

FOLDERNAME = 'learn'
assert FOLDERNAME is not None, "error"

import sys
sys.path.append('/content/drive/My drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive /%FOLDERNAME/datasets/
!bash get_datasets.sh
%cd /content/drive/My\ Drive/%FOLDERNAME

Mounted at /content/drive
[Errno 2] No such file or directory: '/content/drive/My Drive /%FOLDERNAME/datasets/'
/content
bash: get_datasets.sh: No such file or directory
[Errno 2] No such file or directory: '/content/drive/My Drive/%FOLDERNAME'
/content


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F
import numpy as np
USE_GPU = True
dtype = torch.float
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print_every = 100
print('using device:', device)

using device: cuda


In [3]:
NUM_TRAIN = 49000
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))
cifar10_val = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_val = DataLoader(cifar10_train, batch_size = 64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./datasets', train=False, download=True,
                             transform=transform)
loader_test = DataLoader(cifar10_test, batch_size = 64)

100%|██████████| 170M/170M [00:16<00:00, 10.6MB/s]


In [4]:
def check_accuracy(loader, model):
  if loader.dataset.train:
    print('Checking accuracy on validation set')
  else:
    print('Checking accuracy on test set')

  num_correct = 0
  num_samples = 0
  model.eval()
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device=device, dtype=dtype)
      y = y.to(device=device, dtype=dtype)
      scores = model(x)
      _, preds = scores.max(1)
      num_correct += (preds==y).sum()
      num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100*acc))

In [30]:
def train(model, params, epochs=1):
  model = model.to(device=device)
  for e in range(epochs):
    print(f"epoch {e+1} / {epochs}:")
    for t, (x, y) in enumerate(loader_train):
      model.train()
      x = x.to(device=device, dtype=dtype)
      y = y.to(device=device, dtype=torch.long)

      scores = model(x)
      loss = F.cross_entropy(scores, y)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if t % print_every == 0:
        print('Iteration %d, loss = %.4f' % (t, loss.item()))
        check_accuracy(loader_val, model)
        print()

In [6]:
torch.manual_seed(42)

class MLPNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super().__init__()
    self.w1 = nn.Linear(input_size, hidden_size[0], bias=True)
    self.w2 = nn.Linear(hidden_size[0], hidden_size[1], bias=True)
    self.w3 = nn.Linear(hidden_size[1], hidden_size[2], bias=True)
    self.w4 = nn.Linear(hidden_size[2], num_classes, bias=True)
    self.flatten = nn.Flatten()

  def forward(self, x):
    x = self.flatten(x)
    x = F.relu(self.w1(x))
    x = F.relu(self.w2(x))
    x = F.relu(self.w3(x))
    scores = self.w4(x)
    return scores

def testMLP():
  input_size = 50
  hidden_size = [100, 100, 100]
  x = torch.zeros((64, input_size), dtype=dtype)
  model = MLPNet(input_size, hidden_size, 10)
  scores = model(x)
  print(scores.size())
testMLP()

torch.Size([64, 10])


In [20]:
learning_rate = 1e-3
hidden_size = [128, 128, 64]

model = MLPNet(3 * 32 * 32, hidden_size, 10)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

epoch 1 / 1:
Iteration 0, loss = 2.3268
Checking accuracy on validation set
Got 120 / 1000 correct (12.00)

Iteration 100, loss = 1.6313
Checking accuracy on validation set
Got 365 / 1000 correct (36.50)

Iteration 200, loss = 1.8362
Checking accuracy on validation set
Got 395 / 1000 correct (39.50)

Iteration 300, loss = 1.4359
Checking accuracy on validation set
Got 411 / 1000 correct (41.10)

Iteration 400, loss = 1.7349
Checking accuracy on validation set
Got 408 / 1000 correct (40.80)

Iteration 500, loss = 1.4389
Checking accuracy on validation set
Got 453 / 1000 correct (45.30)

Iteration 600, loss = 1.7878
Checking accuracy on validation set
Got 461 / 1000 correct (46.10)

Iteration 700, loss = 1.5371
Checking accuracy on validation set
Got 465 / 1000 correct (46.50)



In [7]:
class ConvNet(nn.Module):
  def __init__(self, in_channel, channel_1, channel_2, num_classes):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channel, channel_1, 5, padding=2, bias=True)
    self.conv2 = nn.Conv2d(channel_1, channel_2, 3, padding=1, bias=True)
    self.flatten = nn.Flatten()
    self.fc = nn.Linear(channel_2 * 32 * 32, num_classes, bias=False)

  def forward(self,x):
    x = self.conv2(F.relu(self.conv1(x)))
    x = F.relu(x)
    x = self.flatten(x)
    out = self.fc(x)
    return out

def testConvNet():
  in_channel, channel_1, channel_2, num_classes = 3, 16, 16, 10
  model = ConvNet(in_channel, channel_1, channel_2, num_classes)
  x = torch.zeros((64, 3, 32, 32), dtype=dtype)
  scores = model(x)
  print(scores.size())
testConvNet()

torch.Size([64, 10])


In [9]:
learning_rate = 9e-4
in_channel, channel_1, channel_2, num_classes = 3, 64, 32, 10

model = ConvNet(in_channel, channel_1, channel_2, num_classes)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer)

epoch 1 / 1:
Iteration 0, loss = 2.3414
Checking accuracy on validation set
Got 123 / 1000 correct (12.30)

Iteration 100, loss = 1.8324
Checking accuracy on validation set
Got 437 / 1000 correct (43.70)

Iteration 200, loss = 1.5288
Checking accuracy on validation set
Got 441 / 1000 correct (44.10)

Iteration 300, loss = 1.1818
Checking accuracy on validation set
Got 497 / 1000 correct (49.70)

Iteration 400, loss = 1.6356
Checking accuracy on validation set
Got 512 / 1000 correct (51.20)

Iteration 500, loss = 1.4074
Checking accuracy on validation set
Got 560 / 1000 correct (56.00)

Iteration 600, loss = 1.3649
Checking accuracy on validation set
Got 562 / 1000 correct (56.20)

Iteration 700, loss = 1.3823
Checking accuracy on validation set
Got 567 / 1000 correct (56.70)



In [7]:
from collections import OrderedDict

class Transition(nn.Sequential):
  def __init__(self, in_channel, out_channel):
    super().__init__()
    self.add_module('norm', nn.BatchNorm2d(in_channel))
    self.add_module('relu', nn.ReLU(inplace=True))
    self.add_module('conv', nn.Conv2d(in_channel, out_channel, 1, bias=False))
    self.add_module('pool', nn.AvgPool2d(2, 2))

In [8]:
class DenseLayer(nn.Module):
  def __init__(self, in_channel, growth_rate, bottleneck_size, drop_rate):
    super().__init__()
    self.norm1 = nn.BatchNorm2d(in_channel)
    self.relu1 = nn.ReLU(inplace=True)
    self.conv1 = nn.Conv2d(in_channel, growth_rate*bottleneck_size, 1, bias=False)

    self.norm2 = nn.BatchNorm2d(growth_rate*bottleneck_size)
    self.relu2 = nn.ReLU(inplace=True)
    self.conv2 = nn.Conv2d(growth_rate*bottleneck_size, growth_rate, 3, padding=1, bias=False)

    self.dropout = float(drop_rate)

  def forward(self, x):
    x = [x] if torch.is_tensor(x) else x
    x = self.conv1(self.relu1(self.norm1(torch.cat(x, 1))))
    output = self.conv2(self.relu2(self.norm2(x)))

    if self.dropout > 0:
      output = F.dropout(output, p=self.dropout, training=self.training)

    return output

In [9]:
class DenseBlock(nn.ModuleDict):
  def __init__(self, num_layers, in_channel, growth_rate, bottleneck_size, drop_rate):
    super().__init__()
    for i in range(num_layers):
      layer = DenseLayer(in_channel + i*growth_rate, growth_rate, bottleneck_size, drop_rate)
      self.add_module(f'denselayer{i+1}', layer)

  def forward(self,x):
    xs = [x]
    for name, layer in self.items():
      x_new = layer(xs)
      xs.append(x_new)

    return torch.cat(xs,1)

In [10]:
class DenseNet(nn.Module):
  def __init__(self, in_channel=32, growth_rate=16, bottleneck_size=4,
               block_config=(6,12,8), drop_rate=0, num_classes=10):
    super().__init__()
    self.features = nn.Sequential(OrderedDict([
        ('conv0', nn.Conv2d(3, in_channel, 7, padding=3, bias=False)),
        ('norm0', nn.BatchNorm2d(in_channel)),
        ('relu0', nn.ReLU(inplace=True)),
    ]))
    num_features = in_channel
    for i, num_layers in enumerate(block_config):
      block = DenseBlock(num_layers, num_features, growth_rate, bottleneck_size, drop_rate)
      self.features.add_module(f'denseblock{i+1}', block)
      num_features += num_layers * growth_rate

      if i!= len(block_config) - 1:
        trans = Transition(num_features, num_features // 2)
        self.features.add_module(f'transition{i+1}', trans)
        num_features = num_features // 2

    self.features.add_module(f'norm{i+2}', nn.BatchNorm2d(num_features))
    self.features.add_module(f'relu{i+2}', nn.ReLU(inplace=True))
    self.fc = nn.Linear(num_features, num_classes)
    self.flatten = nn.Flatten()
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.constant_(m.bias, 0)

  def forward(self, x):
    out = F.adaptive_avg_pool2d(self.features(x), (1, 1))
    out = self.fc(self.flatten(out))

    return out


In [11]:
def testDenseNet():
  model = DenseNet()
  x = torch.zeros((64, 3, 32, 32), dtype=dtype)
  scores = model(x)
  print(scores.size())
testDenseNet()

torch.Size([64, 10])


In [31]:
learning_rate = 2e-4

model = DenseNet().to(device)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

train(model, optimizer, epochs=20)

epoch 1 / 20:
Iteration 0, loss = 2.2961
Checking accuracy on validation set
Got 132 / 1000 correct (13.20)

Iteration 100, loss = 1.8829
Checking accuracy on validation set
Got 363 / 1000 correct (36.30)

Iteration 200, loss = 1.6674
Checking accuracy on validation set
Got 403 / 1000 correct (40.30)

Iteration 300, loss = 1.5245
Checking accuracy on validation set
Got 442 / 1000 correct (44.20)

Iteration 400, loss = 1.4496
Checking accuracy on validation set
Got 467 / 1000 correct (46.70)

Iteration 500, loss = 1.4042
Checking accuracy on validation set
Got 508 / 1000 correct (50.80)

Iteration 600, loss = 1.3717
Checking accuracy on validation set
Got 519 / 1000 correct (51.90)

Iteration 700, loss = 1.3722
Checking accuracy on validation set
Got 532 / 1000 correct (53.20)

epoch 2 / 20:
Iteration 0, loss = 1.2395
Checking accuracy on validation set
Got 544 / 1000 correct (54.40)

Iteration 100, loss = 1.3114
Checking accuracy on validation set
Got 545 / 1000 correct (54.50)

Iterat

In [32]:
check_accuracy(loader_val, model)

Checking accuracy on validation set
Got 739 / 1000 correct (73.90)
