In [1]:
import numpy as np
import pandas as pd
import os
import tarfile
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader 
from torch.utils.data import Dataset
from torchvision.datasets.utils import download_url
from torchvision.transforms import ToTensor
from torchvision.datasets import ImageFolder

In [2]:
cifar10 = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(cifar10, '.')


Downloading https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz to ./cifar10.tgz


HBox(children=(FloatProgress(value=0.0, max=135107811.0), HTML(value='')))




In [3]:
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
  tar.extractall(path='./data')

In [4]:
print(os.listdir('./data/cifar10/train'))

['deer', 'cat', 'horse', 'ship', 'frog', 'dog', 'truck', 'bird', 'airplane', 'automobile']


In [5]:
main_directory = './data/cifar10/'

In [6]:
batch_size = 4
learning_rate = 1e-3

In [7]:
train = ImageFolder(main_directory + 'train', transform = ToTensor())
test_set = ImageFolder(main_directory + 'test', transform = ToTensor())

#from torch.utils.data import random_split
#train_set, val_set = random_split(train, [int(0.95*len(train)), int(len(train) - int(0.95*len(train)))])

train_loader = DataLoader(train, batch_size = batch_size, shuffle = True, pin_memory = True)
#val_loader = DataLoader(val_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = True, pin_memory = True)

In [8]:
print(f'Number of files in training set: {len(train)}')
#print(f'Number of files in validation set: {len(val_set)}')
print(f'Number of files in testing set: {len(test_set)}')

Number of files in training set: 50000
Number of files in testing set: 10000


In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [10]:
model = torchvision.models.vgg16(pretrained = True)
model.to(device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9)

In [12]:
for epoch in range(2):

    running_loss = 0.0
    for idx, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if idx % 1000 == 999:   
            print('[%d, %5d] loss: %.5f' %
                  (epoch + 1, idx + 1, running_loss / 1000))
            running_loss = 0.0

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[1,  1000] loss: 2.16586
[1,  2000] loss: 1.42507
[1,  3000] loss: 1.14836
[1,  4000] loss: 1.05320
[1,  5000] loss: 0.94879
[1,  6000] loss: 0.89776
[1,  7000] loss: 0.84329
[1,  8000] loss: 0.81111
[1,  9000] loss: 0.76002
[1, 10000] loss: 0.74172
[1, 11000] loss: 0.74801
[1, 12000] loss: 0.72878
[2,  1000] loss: 0.58454
[2,  2000] loss: 0.59858
[2,  3000] loss: 0.58244
[2,  4000] loss: 0.55053
[2,  5000] loss: 0.60740
[2,  6000] loss: 0.55195
[2,  7000] loss: 0.54548
[2,  8000] loss: 0.55329
[2,  9000] loss: 0.53009
[2, 10000] loss: 0.54035
[2, 11000] loss: 0.52775
[2, 12000] loss: 0.53623


In [13]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device)
            y = y.to(device = device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'correct samples: {num_correct} / {num_samples} ---- accuracy:{float(num_correct)/float(num_samples)*100}')
    model.train()

print("Training set accuracy:")
check_accuracy(train_loader, model)

print("Testing set accuracy:")
check_accuracy(test_loader, model)

Training set accuracy:
correct samples: 42338 / 50000 ---- accuracy:84.676
Testing set accuracy:
correct samples: 7951 / 10000 ---- accuracy:79.51
