In [3]:
## RUN THIS CELL
import torch
import torchvision
import torchvision.transforms as transforms

## Create Datasets

In [4]:
# create a transformation that changes are images to Tensors, 
# and then maps the images from the range [0, 1] to the range [-1, 1]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [5]:
## RUN THIS CELL
training_set = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform,
)

Files already downloaded and verified


In [6]:
## RUN THIS CELL
test_set = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform,
)

Files already downloaded and verified


In [7]:
training_loader = torch.utils.data.DataLoader(
    training_set,
    batch_size=4,
    shuffle=True,
    num_workers=1,
)

In [8]:
test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=4,
    shuffle=False,
    num_workers=1,
)

In [9]:
## Classes in our dataset
classes = [
    'plane',
    'car',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck',
]

## Define our Network

In [10]:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

In [11]:
## our network

class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3,
            out_channels=8,
            kernel_size=5,
            padding=5,
            stride=2
        )
        self.pool1 = nn.MaxPool2d(
            kernel_size=8,
            stride=2,
        )
        self.conv2 = nn.Conv2d(
            in_channels=8,
            out_channels=16,
            kernel_size=5,
            padding=5,
        )
        self.pool2 = nn.MaxPool2d(
            kernel_size=8,
            stride=2,
        )
        self.output_shape = (int(32/2/2), int(32/2/2), 16)
        self.fc1 = nn.Linear(np.prod(self.output_shape), 128)
        self.output_fc = nn.Linear(128, len(classes))
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, np.prod(self.output_shape))
        
        x = F.relu(self.fc1(x))
        x = self.output_fc(x)
        return x

In [12]:
net = Net()

## Define the loss function and optimizer

In [13]:
import torch.optim as optim

In [14]:
loss_func = nn.CrossEntropyLoss()

In [15]:
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

## Train the network

In [16]:
NUM_EPOCHS = 1
PRINT_FREQ = 100

In [1]:
for epoch in range(NUM_EPOCHS):
    current_loss = 0.0
    
    for i, data in enumerate(training_loader, 0):
        inputs, labels = data
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        current_loss += loss.item()
        if i % PRINT_FREQ == 0:
            print(f'[{epoch},{i}] loss: {current_loss / PRINT_FREQ:.3f}')
            current_loss = 0.0

NameError: name 'NUM_EPOCHS' is not defined

## Test network on the training data

In [26]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

In [27]:
accuracy = 100 * correct / total

In [28]:
print(f'Accuracy on the 10000 test images: {accuracy:.2f}')

Accuracy on the 10000 test images: 47.78


## Damn that sucks, what can we do better?

### Answer: Transfer Learning

In [29]:
# We want to use a pretrained imagenet model and transfer it to our CIFAR task.
# First we need to recreate our datasets so that the input images to the network "look like"
# imagenet images.
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
transform = transforms.Compose([
    transforms.Resize([224, 224]), #imagenet images are 224x224
    transforms.ToTensor(),
    transforms.Normalize( # pretrained network was trained with these params
        mean=mean,
        std=std, 
    ),
])

In [30]:
training_set = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform,
)

Files already downloaded and verified


In [31]:
test_set = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform,
)

Files already downloaded and verified


In [32]:
training_loader = torch.utils.data.DataLoader(
    training_set,
    batch_size=4,
    shuffle=True,
    num_workers=1,
)

In [33]:
test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=4,
    shuffle=False,
    num_workers=1,
)

## Now we pull in our pretrained network

In [34]:
## RUN THIS CELL
vgg16 = torchvision.models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\vamsh/.torch\models\vgg16-397923af.pth
553433881it [00:16, 34113899.29it/s]


In [35]:
# Now we want to make it so that gradients don't get calculated for the parameters
# in the pretrained model.
for param in vgg16.parameters():
    param.require_grad = False

In [36]:
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace)
  (2): Dropout(p=0.5)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace)
  (5): Dropout(p=0.5)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [37]:
# Now we want to change the final fully connected layer in the model
# since imagenet has 1000 classes and we only want to predict 10

# get number of input features to last layer
num_features = vgg16.classifier[6].in_features
# overwrite existing fc with new one
vgg16.classifier[6] = nn.Linear(num_features, 10)

In [38]:
# look at a summary of our network
import torchsummary
torchsummary.summary(vgg16, (3, 224, 224))

ModuleNotFoundError: No module named 'torchsummary'

In [None]:
## Now we need to copy our code for the loss function, training, etc from above
# NOTE: it would have been better practice to make that code into functions so that
# we wouldn't have to copy and paste code around.

In [40]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

In [42]:
PRINT_FREQ = 20
from tqdm import tqdm
for epoch in range(NUM_EPOCHS):
    current_loss = 0.0
    pbar = tqdm(enumerate(training_loader, 0), total=len(training_loader))
    for i, data in pbar:
        inputs, labels = data

        optimizer.zero_grad()
        outputs = vgg16(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        
        current_loss += loss.item()
        if i % PRINT_FREQ == 0:
            pbar.set_description(f'loss: {current_loss / PRINT_FREQ:.3f}')
            current_loss = 0.0


  0%|                                                                                        | 0/12500 [00:00<?, ?it/s]
loss: 0.121:   0%|                                                                           | 0/12500 [00:11<?, ?it/s]
loss: 0.121:   0%|                                                                | 1/12500 [00:11<39:09:17, 11.28s/it]
loss: 0.121:   0%|                                                                | 2/12500 [00:22<39:16:35, 11.31s/it]
loss: 0.121:   0%|                                                                | 3/12500 [00:33<38:30:31, 11.09s/it]
loss: 0.121:   0%|                                                                | 4/12500 [00:44<38:33:46, 11.11s/it]
loss: 0.121:   0%|                                                                | 5/12500 [00:55<38:49:00, 11.18s/it]
loss: 0.121:   0%|                                                                | 6/12500 [01:06<38:47:00, 11.17s/it]

KeyboardInterrupt: 

In [None]:
# Test Model on Test data
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = vgg16(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if total > 100:
            break
accuracy = 100 * correct / total
print(f'Accuracy on the {total} test images: {accuracy:.2f}')

In [None]:
# Save model
## Save the model
torch.save(
    {
        'epoch': 1,
        'state_dict': vgg16.state_dict(),
        'optimizer' : optimizer.state_dict(),
    },
    'model.pth.tar'
)

In [None]:
# load model
def load_model(filename):
    checkpoint = torch.load(filename, map_location='cpu')
    vgg16.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

In [None]:
load_model('model.pth.tar')

In [None]:
dataiter = iter(test_loader)

In [None]:
images, labels = dataiter.next()

In [None]:
import matplotlib.pyplot as plt

In [None]:
def inverse_transform(img):
    print(img.shape)
    npimg = img.numpy()
    npimg = npimg * np.array(std).reshape((-1, 1, 1)) + np.array(mean).reshape((-1, 1, 1))
    return np.transpose(npimg, (1, 2, 0))

In [None]:
plt.imshow(inverse_transform(torchvision.utils.make_grid(images)))

In [None]:
print(' '.join(classes[label] for label in labels))

In [None]:
outputs = vgg16(images)

In [None]:
_, predicted = torch.max(outputs.data, 1)

In [None]:
print(' '.join(classes[label] for label in predicted))