In [6]:
import torch
import torch.nn as nn
import torchvision.models as model
import numpy as np
import torchvision.datasets as datasets
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.optim import lr_scheduler


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

root = './data'
download = True
batch_size = 64
cuda = torch.cuda.is_available()
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
# The output of torchvision datasets are PILImage images of range [0, 1].
# We transform them to Tensors of normalized range [-1, 1]

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root, train=False, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=batch_size, shuffle=False, **kwargs)

model_ft = models.vgg11(pretrained=True)
#model_ft = models.resnet18(pretrained=True)
for idx, m in enumerate(model_ft.children()):
  print(idx, '->', m)
for param in model_ft.parameters():
  param.requires_grad = False

print(model_ft)
num_ftrs = model_ft.classifier[0].in_features

#num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.classifier = nn.Linear(num_ftrs, 10)

#model_ft = nn.Sequential(*list(model_ft.children())[:-1], nn.AdaptiveAvgPool2d(1), nn.Flatten(1, -1), nn.Linear(num_ftrs, 10))


model = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

log_interval = 100
def train(data_loader, epoch):
    model.train()

    for batch_idx, (features, labels) in enumerate(data_loader):

        features, labels =  features.to(device), labels.to(device) 
        optimizer.zero_grad()
        output = model(features)
        # print(output.shape)
       
        loss = criterion(output, labels)
        loss.backward() # calculate gradients
        optimizer.step() # update network parameters
            
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.00f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(features), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.data.item()))
            
            # Compute accuracy
            _, argmax = torch.max(output, 1)
            accuracy = (labels == argmax.squeeze()).float().mean()


def test(data_loader):
    model.eval()
    test_loss = 0
    correct = 0
    for features, labels in data_loader:
        features, labels =  features.to(device), labels.to(device) 
        output = model(features)
        test_loss += criterion(output, labels).data.item()
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(labels.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(data_loader) # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

epochs = 1
for epoch in range(1, epochs + 1):
    train(train_loader, epoch)
    test(test_loader)

Files already downloaded and verified
Files already downloaded and verified
0 -> Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace=True)
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace=True)
  (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): ReLU(inplace=True)
  (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (14): ReLU(inplace=True)
  (15): MaxPool2d(kernel_size=2, stride=2, padding=0, 

KeyboardInterrupt: ignored