In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np
from tempfile import TemporaryDirectory

USE_GPU = True
dtype = torch.float32 # We will be using float throughout this tutorial.

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

cuda


In [7]:
NUM_TRAIN = 40000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('../cs231n/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('../cs231n/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64,
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('../cs231n/datasets', train=False, download=True,
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [8]:
dataloaders = {}
dataloaders['train'] = loader_train
dataloaders['val']  = loader_val
dataloaders['test'] = loader_test
dataset_sizes = {}
dataset_sizes['train'] = NUM_TRAIN
dataset_sizes['val'] = cifar10_val.__len__() - NUM_TRAIN
dataset_sizes['test'] = cifar10_test.__len__()

for k,v in dataset_sizes.items():
    print(k,v)

train 40000
val 10000
test 10000


In [9]:
def train_model(model,optimizer,criterion,num_epochs=1):
    model = model.to(device)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-'*10)

        for phase in ['train','val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                # forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs,labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                probs = nn.functional.softmax(outputs,dim=1)
                pred_labels = torch.argmax(probs,dim=1)
                running_corrects += torch.sum(pred_labels == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

In [11]:
def test_model(model):

    model = model.to(device)
    model.eval()
    running_corrects = 0

    with torch.no_grad():

        for inputs, labels in dataloaders['test']:

            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs)
            probs = nn.functional.softmax(logits,dim=1)
            pred_labels = torch.argmax(probs,dim=1)

            running_corrects += torch.sum(pred_labels == labels)

    print(running_corrects / dataset_sizes['test'])           

In [12]:
class MyModel(nn.Module):

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3,32,3,padding=1)
        self.conv2 = nn.Conv2d(32,64,3,padding=1)
        self.conv3 = nn.Conv2d(64,128,3,padding=1)
        self.conv4 = nn.Conv2d(128,256,3,padding=1)
        self.fc = nn.Linear(8*8*256,10)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        self.flatten = nn.Flatten()


    def forward(self,x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.flatten(x)
        scores = self.fc(x)

        return scores

In [13]:
model = MyModel()
optimizer = optim.Adam(model.parameters(),weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

train_model(model, optimizer, criterion, num_epochs=10)

Epoch 1/10
----------
train Loss: 1.6236 Acc: 0.4687
val Loss: 1.1400 Acc: 0.5987
Epoch 2/10
----------
train Loss: 0.9710 Acc: 0.6590
val Loss: 0.9146 Acc: 0.6785
Epoch 3/10
----------
train Loss: 0.7801 Acc: 0.7292
val Loss: 0.8544 Acc: 0.7123
Epoch 4/10
----------
train Loss: 0.6441 Acc: 0.7787
val Loss: 0.7306 Acc: 0.7529
Epoch 5/10
----------
train Loss: 0.5456 Acc: 0.8122
val Loss: 0.6539 Acc: 0.7801
Epoch 6/10
----------
train Loss: 0.4577 Acc: 0.8418
val Loss: 0.6503 Acc: 0.7827
Epoch 7/10
----------
train Loss: 0.3699 Acc: 0.8749
val Loss: 0.7214 Acc: 0.7797
Epoch 8/10
----------
train Loss: 0.2896 Acc: 0.8992
val Loss: 0.6094 Acc: 0.8095
Epoch 9/10
----------
train Loss: 0.2210 Acc: 0.9232
val Loss: 0.7512 Acc: 0.7829
Epoch 10/10
----------
train Loss: 0.1668 Acc: 0.9425
val Loss: 0.6635 Acc: 0.8093


In [14]:
test_model(model)

tensor(0.8005, device='cuda:0')
