In [15]:
from torch.nn import functional as F

from PIL import Image
from glob import glob
from scipy.io import loadmat

import torch.nn as nn
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.utils.data as data
#import torchvision.transforms.functional as F

import torch
import torchvision
import os
import random

#### Load the data

In [10]:
class Crowd(data.Dataset):
    def __init__(self, root_path, crop_size, downsample_ratio=8):
        self.root_path = root_path
        self.trans = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # transform to Tensors of normalized range [-1, 1]
        self.im_list = sorted(glob(os.path.join(self.root_path, '*.jpg')))
        self.annot_list = sorted(glob(os.path.join(self.root_path, '*.mat')))
        print('number of img: {}'.format(len(self.im_list))) 


    def __len__(self):
        return len(self.im_list)

    def __getitem__(self, item):
        img_path = self.im_list[item]
        ann_path = img_path.replace('.jpg', '_ann.mat')
        img = Image.open(img_path).convert('RGB')
        keypoints = loadmat(ann_path)['annPoints']
        img = self.trans(img)
        name = os.path.basename(img_path).split('.')[0]
        return img, len(keypoints), name

In [11]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
num_epochs = 5
batch_size = 4
learning_rate = 0.001
downsample_ratio=8
crop_size = 512

#train_dataset = Crowd('data/Train/', crop_size, downsample_ratio)
#test_dataset = Crowd('data/Test/', crop_size, downsample_ratio)

transformation = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform =transformation)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform =transformation)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,shuffle=False)

#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170499072it [00:23, 7261278.31it/s]                                


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


#### Build the Model

In [12]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # -> n, 3, 32, 32
        x = self.pool(F.relu(self.conv1(x)))  # -> n, 6, 14, 14
        x = self.pool(F.relu(self.conv2(x)))  # -> n, 16, 5, 5
        x = x.view(-1, 16 * 5 * 5)            # -> n, 400
        x = F.relu(self.fc1(x))               # -> n, 120
        x = F.relu(self.fc2(x))               # -> n, 84
        x = self.fc3(x)                       # -> n, 10
        return x

#### Training Loop

In [16]:
model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        # origin shape: [4, 3, 32, 32] = 4, 3, 1024
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 2000 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')

Epoch [1/5], Step [2000/12500], Loss: 2.3267
Epoch [1/5], Step [4000/12500], Loss: 2.3159
Epoch [1/5], Step [6000/12500], Loss: 1.7092
Epoch [1/5], Step [8000/12500], Loss: 2.2804
Epoch [1/5], Step [10000/12500], Loss: 1.6572
Epoch [1/5], Step [12000/12500], Loss: 2.0640
Epoch [2/5], Step [2000/12500], Loss: 1.3956
Epoch [2/5], Step [4000/12500], Loss: 1.9504
Epoch [2/5], Step [6000/12500], Loss: 1.4494
Epoch [2/5], Step [8000/12500], Loss: 0.8449
Epoch [2/5], Step [10000/12500], Loss: 1.1278
Epoch [2/5], Step [12000/12500], Loss: 0.9263
Epoch [3/5], Step [2000/12500], Loss: 1.3213
Epoch [3/5], Step [4000/12500], Loss: 2.0114
Epoch [3/5], Step [6000/12500], Loss: 1.3584
Epoch [3/5], Step [8000/12500], Loss: 2.3253
Epoch [3/5], Step [10000/12500], Loss: 2.0933
Epoch [3/5], Step [12000/12500], Loss: 1.8916
Epoch [4/5], Step [2000/12500], Loss: 0.9613
Epoch [4/5], Step [4000/12500], Loss: 1.3311
Epoch [4/5], Step [6000/12500], Loss: 1.1497
Epoch [4/5], Step [8000/12500], Loss: 0.6193
Epoc

#### Testing Loop

In [17]:
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        #print(f'Accuracy of {classes[i]}: {acc} %')

Accuracy of the network: 55.67 %
