In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [2]:
Transform =  transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])
trainingdata = torchvision.datasets.CIFAR10(root='./ data', train=True, download=True, transform=Transform) 
testdata = torchvision.datasets.CIFAR10(root='./data',  train=False, download=True, transform=Transform)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
from torch.utils.data import DataLoader
import torch.nn.functional as F
VAL_SIZE = 0.1
BATCH_SIZE = 64
# generate indices: instead of the actual data we pass in integers instead
train_indices, val_indices, _, _ = train_test_split(
    range(len(trainingdata)),
    trainingdata.targets,
    stratify=trainingdata.targets,
    test_size=VAL_SIZE,
)

# generate subset based on indices
train_split = Subset(trainingdata, train_indices)
val_split = Subset(trainingdata, val_indices)

# create batches
trainDataLoader = DataLoader(train_split, batch_size=BATCH_SIZE, shuffle=True)
valDataLoader = DataLoader(val_split, batch_size=BATCH_SIZE, shuffle=True)
testDataLoader = DataLoader(testdata, batch_size=BATCH_SIZE, shuffle=True)

In [4]:
# trainDataLoader = DataLoader(trainingdata, batch_size= 64, shuffle=True)
# testDataLoader = DataLoader(testdata, batch_size= 64, shuffle=False)

In [5]:
N = 4 # Residual Layers
B = [2,2,2,2] # B[i] - Residual blocks in Residual layer i.
C = [] # C[i] - Channels in Residual layer i
c_0 = 42
for i in range(N):
  C.append(c_0)
  c_0 = 2*c_0
F_k = [3,3,3,3] # Conv. Kernel size in Residual layer i.
K = [1,1,1,1] # Skip connection kernel Size in Residual layer i.
P = 4 # Average pool kernel size
print("# of Residual Layers: ",N)
print("# of Residual Blocks in each Layer: ",B)
print("# of Channels in each Residual layer: ",C)
print("Conv. Kernel size in Residual layer: ",F_k)
print("Skip connection kernel Size in Residual layers: ",K)
print("Average pool kernel size: ",P)

# of Residual Layers:  4
# of Residual Blocks in each Layer:  [2, 2, 2, 2]
# of Channels in each Residual layer:  [42, 84, 168, 336]
Conv. Kernel size in Residual layer:  [3, 3, 3, 3]
Skip connection kernel Size in Residual layers:  [1, 1, 1, 1]
Average pool kernel size:  4


In [6]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, f, k, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=f, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=f,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes,
                          kernel_size=k, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [7]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 28

        self.conv1 = nn.Conv2d(3, 28, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(28)
        self.layer1 = self._make_layer(block, C[0], num_blocks[0],F_k[0],K[0], stride=1)
        self.layer2 = self._make_layer(block, C[1], num_blocks[1],F_k[1],K[1], stride=2)
        self.layer3 = self._make_layer(block, C[2], num_blocks[2],F_k[2], K[2], stride=2)
        self.layer4 = self._make_layer(block, C[3], num_blocks[3],F_k[3], K[3], stride=2)
        self.linear = nn.Linear(C[3], num_classes)

    def _make_layer(self, block, planes, num_blocks, f, k, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, f, k, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        # print("out size: ",out.size())
        out = self.layer1(out)
        # print("out size: ",out.size())
        out = self.layer2(out)
        # print("out size: ",out.size())
        out = self.layer3(out)
        out = self.layer4(out)
        # print("out size: ",out.size())
        out = F.avg_pool2d(out, P)
        # print("out size: ",out.size())
        out = out.view(out.size(0), -1)
        # print("out size: ",out.size())
        out = self.linear(out)
        return out

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
def project1_model():
    return ResNet(BasicBlock, B)

In [10]:
model=project1_model()
model.to(device)

ResNet(
  (conv1): Conv2d(3, 28, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(28, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(42, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(28, 42, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(42, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track

In [11]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    # torch.numel() returns number of elements in a tensor

print(count_parameters(model)/1000000)

4.811502


In [12]:
print(model)

ResNet(
  (conv1): Conv2d(3, 28, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(28, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(42, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(28, 42, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(42, 42, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(42, eps=1e-05, momentum=0.1, affine=True, track

In [13]:
# define loss function, optimizer

Loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
save_loss = {'train':[], 'val':[]}
save_acc = {'train':[], 'val':[]}

In [14]:
print("Training Model")

Training Model


In [15]:
# Training with Validation
import numpy as np
epochs = 5
min_valid_loss = np.inf
 
for e in range(epochs):
    train_loss = 0.0
    train_corrects = 0
    #for data, labels in trainloader:
    for i, (data, labels) in enumerate(trainDataLoader):
        # Transfer Data to GPU if available
        # if torch.cuda.is_available():
        #     data, labels = data.cuda(), labels.cuda()
        data = data.to(device)
        labels = labels.to(device)
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = Loss(target,labels)
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
        _, preds = torch.max(target,1) 
        train_corrects += torch.sum(preds == labels.data)
     
    valid_loss = 0.0
    val_corrects = 0
    model.eval()     # Optional when not using Model Specific layer
    #for data, labels in validloader:
    for i, (data, labels) in enumerate(valDataLoader):
        # # Transfer Data to GPU if available
        # if torch.cuda.is_available():
        #     data, labels = data.cuda(), labels.cuda()
        data = data.to(device)
        labels = labels.to(device)
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = Loss(target,labels)
        # Calculate Loss
        valid_loss += loss.item()
        _, preds = torch.max(target,1) 
        val_corrects += torch.sum(preds == labels.data)
 
    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainDataLoader)} \t\t Validation Loss: {valid_loss / len(testDataLoader)}')
    print("Train accuracy: ", train_corrects/len(trainDataLoader))
    print("Val Accuracy: ", val_corrects/len(valDataLoader))
     
    if min_valid_loss > valid_loss:
        # print(f'Validation Loss Decreased({min_valid_loss:.6f\}--->{valid_loss:.6f}) \t Saving The Model')
        print("'Validation Loss Decreased: From ",min_valid_loss," to ",valid_loss)
        print("Saving Model....")
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model.pth')

Epoch 1 		 Training Loss: 2.0088346058672126 		 Validation Loss: 0.9034363114909761
Train accuracy:  tensor(16.6321, device='cuda:0')
Val Accuracy:  tensor(21.0886, device='cuda:0')
'Validation Loss Decreased: From  inf  to  141.83950090408325
Saving Model....
Epoch 2 		 Training Loss: 1.7384563427079807 		 Validation Loss: 1.523610168201908
Train accuracy:  tensor(22.5810, device='cuda:0')
Val Accuracy:  tensor(13.9873, device='cuda:0')
Epoch 3 		 Training Loss: 1.5983568208122796 		 Validation Loss: 0.8345347149356915
Train accuracy:  tensor(26.0185, device='cuda:0')
Val Accuracy:  tensor(24.5696, device='cuda:0')
'Validation Loss Decreased: From  141.83950090408325  to  131.02195024490356
Saving Model....
Epoch 4 		 Training Loss: 1.5043471518226645 		 Validation Loss: 1.1422029186965554
Train accuracy:  tensor(28.3722, device='cuda:0')
Val Accuracy:  tensor(19.1646, device='cuda:0')
Epoch 5 		 Training Loss: 1.4350544356487014 		 Validation Loss: 0.9432683659207289
Train accuracy: 

In [16]:
test_loss = 0.0
model.eval()     # Optional when not using Model Specific layer
corrects = 0
#for data, labels in validloader:
for i, (data, labels) in enumerate(testDataLoader):
    # # Transfer Data to GPU if available
    # if torch.cuda.is_available():
    #     data, labels = data.cuda(), labels.cuda()
    data = data.to(device)
    labels = labels.to(device)
    # Forward Pass
    target = model(data)
    # Find the Loss
    loss = Loss(target,labels)
    # Calculate Loss
    test_loss += loss.item()
    _, preds = torch.max(target,1) 
    corrects += torch.sum(preds == labels.data)
print(corrects.float() / len(testDataLoader))

tensor(23.7580, device='cuda:0')


In [17]:
print("Test Loss is: ",test_loss/len(testDataLoader))

Test Loss is:  1.8708397400606969
