In [113]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import math
import joblib

In [114]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
style="darkgrid"

In [115]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [116]:
    torch.manual_seed(0)

In [117]:
batch_size = 64
num_classes = 10

In [118]:
all_transforms = transforms.Compose([
                                transforms.Resize(32),
                                transforms.ToTensor(),
                                transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) 
])

In [119]:
train_ds = torchvision.datasets.CIFAR10(root='./data', train=True, transform=all_transforms, download='True')
test_ds = torchvision.datasets.CIFAR10(root='./data', train=False, transform=all_transforms, download='True')

In [120]:
train_ds, val_ds = torch.utils.data.random_split(train_ds, [45000, 5000])

In [121]:
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)


val_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=True)

In [122]:
class ResBlock(nn.Module):
    
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super(ResBlock, self).__init__()
        """
        1. kernel_size = 3 and padding = 1 for both conv layers
        2. use the argument stride for conv1 layer and stride=1 for conv2 layer
        3. input channels = in_channels and output channels = out_channels for conv1
        4. input and output channels = out_channels for conv2
        """
        ### YOUR CODE HERE ###
        self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, dilation=1)
        self.bn1 = torch.nn.BatchNorm2d(out_channels)
        self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1)
        self.bn2 = torch.nn.BatchNorm2d(out_channels)
        self.relu = torch.nn.ReLU()
        ### ENDS HERE ###
        """
        We'll need identity_downsample when the dimensions f(x), i.e., output of
        step2 is not the same as x
        """
        self.identity_downsample = identity_downsample
        
        
    def forward(self, x):
        ### YOUR CODE HERE ###
        #step 1
        identity = x
        #step2
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)


        #do not modify this if condition, change your variable names accordingly
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        #step3
        x = x + identity
        #step4
        x = self.relu(x)
        
        ### ENDS HERE ###
        return x

In [123]:
class ResNet18(nn.Module):
    # d is the depth scaling factor, whereas w is the width scaling factor
    def __init__(self, image_channels, num_classes):
        phi = 1
        alpha = 1.1
        beta = 1.2
        d,w = self.calculate_scalingFactors(alpha,beta,phi)
        
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, int(64*w) , kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(int(64*w)) 
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
       #Width scaling by a factor means increasing the no of channels i.e. no of filters across each ResBlock
        self.layer1 = self.make_layer(int(64*w), int(64*w), 1,d)
        self.layer2 = self.make_layer(int(64*w), int(128*w), 2,d)
        self.layer3 = self.make_layer(int(128*w), int(256*w), 2,d)
        self.layer4 = self.make_layer(int(256*w), int(512*w), 2,d)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        #Aladdin uses a last channels variable we directly code it as 512*w
        self.fc = nn.Linear(int(512*w), num_classes)

        
    def make_layer(self, in_channels, out_channels, stride, depth_scaling):
        identity_downsample = None
        repeats = 2
        layer_repeat = math.ceil(depth_scaling*repeats)
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels)

        """
        Call `ResBlock` here.  You already have info about all the arguments 
        that needs to be passed to ResBlock.
        Hint1: use nn.Sequential to make one layer.
        Hint2: you will need two ResBlocks to make one layer
        """
        '''
        DOUBT: We need identity downsample even when stride != 1
        also same downsampler for both resnet blocks? don't they have different input size?
        also def identity_downsample(self, in_channels, out_channels): does not have stride? (so I just wrote the code directly up above)
        '''
        #model =  nn.Sequential(
        #    ResBlock(in_channels, out_channels, identity_downsample, stride),
        #    ResBlock(out_channels, out_channels) 
        #)
        
#         model = nn.Sequential(ResBlock(in_channels,out_channels,identity_downsample,stride))
#         for i in range(layer_repeat-1):
#             model.append(ResBlock(out_channels,out_channels))
#         return model

        model = nn.Sequential(ResBlock(in_channels,out_channels,identity_downsample,stride))
        for i in range(layer_repeat-1):
            nn.Sequential(model, ResBlock(out_channels,out_channels))
        return model
#         model = [ResBlock(in_channels,out_channels,identity_downsample,stride)]
#         for i in range(layer_repeat-1):
#             model.append(ResBlock(out_channels,out_channels))
#          return nn.Sequential(*model)   
        

    def identity_downsample(self, in_channels, out_channels):
        
        return nn.Sequential(
                nn.Conv2d(in_channels,out_channels,kernel_size=3,stride=2,padding=1),
                nn.BatchNorm2d(out_channels)
            ) 
    
    def calculate_scalingFactors(self, alpha,beta,phi):
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return depth_factor,width_factor
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x 

In [124]:
torch.manual_seed(0)
model = ResNet18(3, num_classes)
model.to(device)

In [125]:
learning_rate = 0.001
num_epochs = 100

In [126]:
criterion = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

total_step = len(train_loader)

patience = 5

In [127]:

test_accu = []
test_losses = []

curr_acc = 0

def validation(model, device, valid_loader, loss_function):
    # Settings
    model.eval()
    loss_total = 0
    
    epoch_loss = []
    running_loss=0
    correct=0
    total=0
    # Test validation data
    with torch.no_grad():
        for i, (images, labels) in enumerate(valid_loader):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss_total += loss.item()
            
            
            running_loss +=loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
    test_loss=running_loss/len(train_loader)
    accu=100.*correct/total
    test_accu.append(accu)
    test_losses.append(train_loss)
    
    if accu>curr_acc:
        joblib.dump(model, 'filename.pkl')

    return loss_total / len(valid_loader)

In [128]:
torch.manual_seed(0)


train_accu = []
train_losses = []

the_last_loss = math.inf
model.train()

epoch_cnt = 0;

for epoch in range(num_epochs):
	#Load in the data in batches using the train_loader object
    epoch_loss = []
    running_loss=0
    correct=0
    total=0
    epoch_cnt +=1
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        
        loss.backward()
        epoch_loss.append(loss.item())
        optimizer.step()
        
        running_loss +=loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
    train_loss=running_loss/len(train_loader)
    accu=100.*correct/total
    train_accu.append(accu)
    train_losses.append(train_loss)
    
    
    
    # Early stopping
    the_current_loss = validation(model, device, val_loader, criterion)
    print('The current loss:', the_current_loss)

    if the_current_loss > the_last_loss:
        trigger_times += 1
        print('trigger times:', trigger_times)

        if trigger_times >= patience:
            print('Early stopping!\nStart to test process.')
            break
            
    else:
        print('trigger times: 0')
        trigger_times = 0

    the_last_loss = the_current_loss
   
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [129]:

plt.plot(np.linspace(1, epoch_cnt, epoch_cnt).astype(int), train_losses)
plt.legend('Loss', loc='upper right');
    
plt.savefig('TrainLoss.png')

In [130]:
plt.plot(np.linspace(1, epoch_cnt,epoch_cnt).astype(int), train_accu)
plt.legend('Accuracy', loc='upper right');
plt.savefig('TrainAcc.png')

In [131]:
plt.plot(np.linspace(1, epoch_cnt,epoch_cnt).astype(int), test_losses)
plt.legend('Loss', loc='upper right');
    
plt.savefig('TestLoss.png')

In [132]:
plt.plot(np.linspace(1, epoch_cnt,epoch_cnt).astype(int), test_accu)
plt.legend('Accuracy', loc='upper right');
plt.savefig('TestAcc.png')

In [133]:
# model.eval()
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in train_loader:
#         images = images.to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
    
#     print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total))

In [134]:
model = joblib.load('filename.pkl')

In [135]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))