In [1]:
import torch
import torch.nn as nn
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch import optim

# For reproducability
torch.manual_seed(0)

<torch._C.Generator at 0x7f79c2343050>

# Full Description of the Convolutional Layer

In [2]:
layer = nn.Conv2d(in_channels = 3,
                  out_channels = 64,
                  kernel_size = (5, 5),
                  stride = 2,
                  padding = 1
                  )

# Closing the Loop on MNIST with Convolutional Networks

In [3]:
class MNISTConvNet(nn.Module):
  def __init__(self):
    super(MNISTConvNet, self).__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(1, 32, 5, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(32, 64, 5, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.fc1 = nn.Sequential(
        nn.Flatten(),
        nn.Linear(7*7*64, 1024),
        nn.Dropout(0.5),
        nn.Linear(1024, 10)
    )

  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    return self.fc1(x)

In [4]:
trainset = MNIST('.', train=True, download=True, 
                      transform=ToTensor())
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [5]:
lr = 1e-4
num_epochs = 40

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MNISTConvNet().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

In [6]:
for epochs in range(num_epochs):
  running_loss = 0.0
  num_correct = 0
  for inputs, labels in trainloader:
    optimizer.zero_grad()
    outputs = model(inputs.to(device))
    loss = loss_fn(outputs, labels.to(device))
    loss.backward()
    running_loss += loss.item()
    optimizer.step()
    _, idx = outputs.max(dim=1)
    num_correct += (idx == labels.to(device)).sum().item()
  print('Loss: {} Accuracy: {}'.format(running_loss/len(trainloader),
        num_correct/len(trainloader)))

Loss: 2.296968076529025 Accuracy: 8.176972281449894
Loss: 2.2871928072687404 Accuracy: 10.66950959488273
Loss: 2.27689610081695 Accuracy: 13.422174840085288
Loss: 2.265633254163047 Accuracy: 16.12366737739872
Loss: 2.2538989195183143 Accuracy: 18.872068230277186
Loss: 2.2400965372890806 Accuracy: 21.818763326226012
Loss: 2.2241890799007944 Accuracy: 24.513859275053306
Loss: 2.206875492260654 Accuracy: 26.886993603411515
Loss: 2.1858361100337143 Accuracy: 29.32089552238806
Loss: 2.1611065976401127 Accuracy: 31.600213219616204
Loss: 2.131434441121148 Accuracy: 33.833688699360344
Loss: 2.0956501632865305 Accuracy: 35.80383795309169
Loss: 2.0523823859340853 Accuracy: 37.11727078891258
Loss: 1.9991100185207213 Accuracy: 38.440298507462686
Loss: 1.9336972034562117 Accuracy: 39.817697228144986
Loss: 1.8544687518178782 Accuracy: 40.87953091684435
Loss: 1.7600860210624076 Accuracy: 41.90618336886994
Loss: 1.6509935147980892 Accuracy: 42.89658848614072
Loss: 1.5301751270731374 Accuracy: 44.09061

# Image Preprocessing Pipelines Enable More Robust Models

In [7]:
from torchvision import transforms

transform = transforms.Normalize(mean = (0.1307,),
                                 std = (0.3081,)
                                 )

In [8]:
transform = transforms.Compose([
      transforms.RandomCrop(224),
      transforms.RandomHorizontalFlip(),
      transforms.ColorJitter(brightness=0,
                             contrast=0,
                             saturation=0,
                             hue=0),
      transforms.ToTensor(),
      transforms.Normalize(mean = (0.1307,),
                           std = (0.3081,)
                           )
      ])

# Accelerating Training with Batch Normalization


In [9]:
layer = nn.BatchNorm2d(num_features=32,
                       eps=1e-05,
                       momentum=0.1,
                       affine = True,
                       track_running_stats = True)

In [10]:
layer = nn.BatchNorm1d(num_features=32)

#Group normalization for memory constrained learning tasks


In [11]:
layer = nn.GroupNorm(num_groups=1,
                     num_channels=32)

#Building a Convolutional Network for CIFAR-10


In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, 3, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),
        )
        self.block2 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(9216, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(128,10),
            nn.BatchNorm1d(10)
        )

    def forward(self, x):
        x = self.block1(x)
        return self.block2(x)

# Building a residual network with superhuman vision

In [13]:
from torchvision.models import resnet34

model = resnet34()

In [14]:
class ResidualBlock(nn.Module):
  def __init__(self, in_layers, out_layers, downsample=None):
    super(ResidualBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_layers, out_layers,
                           kernel_size=3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(out_layers)
    self.conv2 = nn.Conv2d(out_layers, out_layers,
                           kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(out_layers)
    self.downsample = downsample
    self.relu = nn.ReLU(inplace=True)

  def forward(self, inp):
    # Residual block
    out = self.conv1(inp)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    
    if self.downsample:
      inp = self.downsample(inp)
    
    # Shortcut connection
    out += inp
    return out

In [15]:
downsample = nn.Sequential(
      nn.Conv2d(64, 128, kernel_size=1, stride=1, bias=False),
      nn.BatchNorm2d(128)
    )

In [16]:
class ResNet34(nn.Module):
  def __init__(self):
    super(ResNet34, self).__init__()

    self.conv1 = nn.Sequential(
      nn.Conv2d(3, 64, kernel_size=7,
                stride=2, padding=3, bias=False),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=3,
                   stride=2, padding=1)
    )

    # Note that each ResidualBlock has 2 conv layers
    # 3 blocks in a row, 6 conv layers
    self.comp1 = nn.Sequential(
      ResidualBlock(64, 64),
      ResidualBlock(64, 64),
      ResidualBlock(64, 64)
    )

    # 4 blocks in a row, 8 conv layers
    downsample1 = nn.Sequential(
      nn.Conv2d(64, 128, kernel_size=1,
             stride=1, bias=False),
      nn.BatchNorm2d(128)
    )
    self.comp2 = nn.Sequential(
      ResidualBlock(64, 128, downsample=downsample1),
      ResidualBlock(128, 128),
      ResidualBlock(128, 128),
      ResidualBlock(128, 128)
    )
    
    # 6 blocks in a row, 12 conv layers
    downsample2 = nn.Sequential(
      nn.Conv2d(128, 256, kernel_size=1, stride=1, bias=False),
      nn.BatchNorm2d(256)
    )
    self.comp3 = nn.Sequential(
      ResidualBlock(128, 256, downsample=downsample2),
      ResidualBlock(256, 256),
      ResidualBlock(256, 256),
      ResidualBlock(256, 256),
      ResidualBlock(256, 256),
      ResidualBlock(256, 256),
    )
    
    # 3 blocks in a row, 6 conv layers
    downsample3 = nn.Sequential(
      nn.Conv2d(256, 512, kernel_size=1, stride=1, bias=False),
      nn.BatchNorm2d(512)
    )
    self.comp4 = nn.Sequential(
      ResidualBlock(256, 512, downsample=downsample3),
      ResidualBlock(512, 512),
      ResidualBlock(512, 512)   
    )

    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    # ImageNet classifier: 1000 classes
    self.fc = nn.Linear(512, 1000)

  def forward(self, inp):
    out = self.conv1(inp)
    
    out = self.comp1(out)
    out = self.comp2(out)
    out = self.comp3(out)
    out = self.comp4(out)

    out = self.avgpool(out)
    out = torch.flatten(out, 1)
    out = self.fc(out)

    return out