Section 2

In [11]:
!pip install torch
!pip install torchvision
!pip install torchsummary
!pip install skorch



In [12]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import optimizer
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torchsummary import summary
import gc
import time
import skorch

#from d2l import torch as d2l
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [13]:
seed = 0
torch.manual_seed(seed)

<torch._C.Generator at 0x1d123466090>

In [14]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [37]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 10):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 32, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 64, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 128, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 256, layers[3], stride = 2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256, num_classes)
        
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [29]:
model = ResNet(ResidualBlock, [2,2,2,2]).to(device)
summary(model, (1, 48, 48))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 24, 24]           3,200
       BatchNorm2d-2           [-1, 64, 24, 24]             128
              ReLU-3           [-1, 64, 24, 24]               0
         MaxPool2d-4           [-1, 64, 12, 12]               0
            Conv2d-5           [-1, 32, 12, 12]          18,464
       BatchNorm2d-6           [-1, 32, 12, 12]              64
              ReLU-7           [-1, 32, 12, 12]               0
            Conv2d-8           [-1, 32, 12, 12]           9,248
       BatchNorm2d-9           [-1, 32, 12, 12]              64
           Conv2d-10           [-1, 32, 12, 12]           2,080
      BatchNorm2d-11           [-1, 32, 12, 12]              64
             ReLU-12           [-1, 32, 12, 12]               0
    ResidualBlock-13           [-1, 32, 12, 12]               0
           Conv2d-14           [-1, 32,

In [18]:
imsize = 48

dataTrain = datasets.FashionMNIST(
    root="dataset",
    train=True,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Resize((imsize,imsize), antialias=True)])
)
dataValid = datasets.FashionMNIST(
    root="dataset",
    train=False,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Resize((imsize,imsize), antialias=True)])
)


print(len(dataValid[0][0]))

valid_y = np.array([y for x,y in iter(dataValid)])
test_y = np.array([y for x,y in iter(dataTrain)])

1


Implementing Trainer

In [8]:
def predict(model, loader_valid):
  with torch.no_grad():
    correct = 0
    total = 0
    for x,y in loader_valid:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      _, predicted = torch.max(output.data, 1)
      total += y.size(0)
      correct += (predicted == y).sum().item()
      del x,y,output
  return 100*(correct/total)


#CLASStorch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, 
#weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False)
def trainer(epochs, dataloader, model, lr, loader_valid):
  optimizer = torch.optim.SGD(model.parameters(), lr)
  loss_f = nn.CrossEntropyLoss()
  print("Epoch, Loss, Time, val_acc")

  for epoch in range(epochs):
    start = time.perf_counter()
    for x,y in dataloader:
      x = x.to(device)
      y = y.to(device)
      pred = model(x)
      loss = loss_f(pred, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      del x, y
      torch.cuda.empty_cache()
      gc.collect()
    end = time.perf_counter()
    acc = predict(model, loader_valid)
    print(epoch + 1,",", loss.item(),",", end-start,",", acc)


In [9]:
epochs = 10
num_w = [2]
lr = [0.1]
b_size = 1028

for i in range(0,len(num_w)):
    loader_train = DataLoader(dataTrain, batch_size=b_size, shuffle=True, num_workers=num_w[i])
    loader_valid = DataLoader(dataValid, batch_size=b_size, shuffle=True, num_workers=num_w[i])

    for j in range(0, len(lr)):
        print("Num_W:", num_w[i], " LR:", lr[j])
        model = ResNet(ResidualBlock, [2,2,2,2]).to(device)
        trainer(epochs ,loader_train ,model, lr[j], loader_valid)

Num_W: 2  LR: 0.1
Epoch, Loss, Time, val_acc
1 , 0.38257092237472534 , 57.15927200001897 , 83.45


KeyboardInterrupt: 

Final Output

In [20]:
def predict(model, loader_valid):
  with torch.no_grad():
    correct = 0
    total = 0
    for x,y in loader_valid:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      _, predicted = torch.max(output.data, 1)
      total += y.size(0)
      correct += (predicted == y).sum().item()
      del x,y,output
  return 100*(correct/total)


#CLASStorch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, 
#weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False)
def trainer(epochs, dataloader, model, lr, loader_valid):
  optimizer = torch.optim.SGD(model.parameters(), lr)
  scaler = torch.cuda.amp.GradScaler()
  loss_f = nn.CrossEntropyLoss()
  print("Epoch, Loss, Time, val_acc")

  for epoch in range(epochs):
    start = time.perf_counter()
    for x,y in dataloader:
      x = x.to(device)
      y = y.to(device)
      with torch.cuda.amp.autocast():
        pred = model(x)
        loss = loss_f(pred, y)
      optimizer.zero_grad()
      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()
      del x, y
      torch.cuda.empty_cache()
      gc.collect()
    end = time.perf_counter()
    acc = predict(model, loader_valid)
    print(epoch + 1,",", loss.item(),",", end-start,",", acc)

In [38]:
epochs = 10
num_w = [3]
lr = [0.1]
b_size = 4500

for i in range(0,len(num_w)):
    loader_train = DataLoader(dataTrain, batch_size=b_size, shuffle=True, num_workers=num_w[i])
    loader_valid = DataLoader(dataValid, batch_size=b_size, shuffle=True, num_workers=num_w[i])

    for j in range(0, len(lr)):
        print("Num Workers:", num_w[i], " LR:", lr[j])
        model = ResNet(ResidualBlock, [2,2,2,2]).to(device)
        trainer(epochs ,loader_train ,model, lr[j], loader_valid)

Num Workers: 3  LR: 0.1
Epoch, Loss, Time, val_acc
1 , 0.7405751943588257 , 8.531301799986977 , 73.42999999999999
2 , 0.4162430465221405 , 7.911559400003171 , 82.87
3 , 0.37104809284210205 , 7.915440799988573 , 84.78
4 , 0.3938436210155487 , 8.018956500018248 , 85.35000000000001
5 , 0.3397643566131592 , 7.939368500025012 , 84.83000000000001
6 , 0.2798999845981598 , 7.979829399992013 , 87.16000000000001
7 , 0.26615262031555176 , 8.02372060000198 , 87.83999999999999
8 , 0.24686041474342346 , 7.987532500002999 , 87.06
9 , 0.2520042359828949 , 7.888644099992234 , 88.25
10 , 0.22613894939422607 , 7.996164200012572 , 88.64999999999999
