In [5]:
from __future__ import print_function
from PIL import Image
import os
import os.path
import errno
import numpy as np
import sys
import zipfile
import scipy.io as sio
import torch.utils.data as data
import glob

class UCSD(data.Dataset):
    """UCSD pedestrian counting data."""

    def __init__(self, data_dir, annotation_dir, transform=None):

        self.file_list = []
        self.file_cnts = []
        
        files = glob.glob(
            os.path.join(
                annotation_dir, 
                '*count_roi_mainwalkway.mat'))
        
        for f in files:
            tmp = sio.loadmat(f)
            
            l_count = tmp['count'][0][0].ravel()
            r_count = tmp['count'][0][1].ravel()
            t_count = l_count + r_count
            
            [self.file_cnts.append(c) for c in t_count]
            
            file_parts = os.path.basename(f).split('_')
            seq_id = "_".join(file_parts[0:3])
            
            for i in np.arange(len(t_count)):
                
                self.file_list.append(
                    os.path.join(
                        data_dir,
                        seq_id + ".y",
                        "{}_f{:03d}.png".format(seq_id,i+1)))
            
            self.transform = transform

    def __len__(self):
        return len(self.file_list)
        
    def __getitem__(self, idx):
        img_name = os.path.join(self.file_list[idx])
        img = Image.open(img_name)
        img = img.resize((128,128))

        if self.transform:
            img = self.transform(img)
        return img, self.file_cnts[idx]

In [6]:
import torch
import random

class SubsetSampler(object):

    def __init__(self, subset):
        self.subset = subset

    def __iter__(self):
        return iter(self.subset)

    def __len__(self):
        return len(self.subset)


class RandomSubsetSampler(object):

    def __init__(self, data_source, train_share=0.8):

        # Generate a list of indizes reaching from 0 ... len(data_source)-1
        idxList = list(range(0,len(data_source)))

        # Ensure that list is sorted randomly
        random.shuffle(idxList)

        # Split dataset random shares of train and test data
        numberOfTrainSamples = int(len(data_source) / (1 / train_share))
        
        self.train_samples = idxList[:numberOfTrainSamples]
        self.test_samples = idxList[numberOfTrainSamples:]


    def trainSampler(self):
        return SubsetSampler(self.train_samples)

    def testSampler(self):
        return SubsetSampler(self.test_samples)

In [7]:
import numpy as np
import torch

# Converts the 1-channel image into a 3-channel mage
class ExpandTo3D(object):

    def __call__(self, image):

        width = image.size()[1]
        height = image.size()[2]

        return image.expand(3,width,height)

In [23]:
import torch
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torchvision.datasets
import torchvision.models as models


transformsData = transforms.Compose([transforms.Scale(250), transforms.ToTensor(), ExpandTo3D()] )

dataset = UCSD(data_dir='./data/ucsdpeds/vidf', annotation_dir='./data/vidf-cvpr', transform=transformsData)

sampler = RandomSubsetSampler(dataset)

trainloader = torch.utils.data.DataLoader(dataset, batch_size=20, sampler=sampler.trainSampler(), num_workers=2 )
testloader = torch.utils.data.DataLoader(dataset, batch_size=20, sampler=sampler.testSampler(), num_workers=2)

net = models.alexnet(num_classes=60)
net.cuda()

# Alternative: Mean Squared Error
# Alternative: Use ADAM
# Learning Rate 1/2 after every 50 epoch
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

print("Start training")
for epoch in range(300):
    
    running_loss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data
        labels = labels.long()
        
        inputs = inputs.cuda()
        labels = labels.cuda()
  
        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


    print( str(epoch) + ": " + str(loss.data[0]))

    correct = 0
    total = 0
    
    for data in testloader:
  
        inputs, labels = data
        labels = labels.long()
        
        inputs = inputs.cuda()
        labels = labels.cuda()
        
        outputs = net(Variable(inputs))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
        
    print('Accuracy of the network on the test images: %f %%' % (100 * correct / total))

print('Finished Training')


Start training
0: 3.7887134552001953
Accuracy of the network on the test images: 9.500000 %
1: 3.537203550338745
Accuracy of the network on the test images: 6.250000 %
2: 3.5200493335723877
Accuracy of the network on the test images: 7.500000 %
3: 3.5156090259552
Accuracy of the network on the test images: 7.000000 %
4: 3.549427032470703
Accuracy of the network on the test images: 6.000000 %
5: 3.5307552814483643
Accuracy of the network on the test images: 4.500000 %
6: 3.551217555999756
Accuracy of the network on the test images: 4.000000 %
7: 3.530918836593628
Accuracy of the network on the test images: 3.750000 %
8: 3.5493812561035156
Accuracy of the network on the test images: 3.750000 %
9: 3.528485059738159
Accuracy of the network on the test images: 3.750000 %
10: 3.521488666534424
Accuracy of the network on the test images: 3.750000 %
11: 3.501631498336792
Accuracy of the network on the test images: 3.750000 %
12: 3.471083402633667
Accuracy of the network on the test images: 4.0

104: 0.0712505355477333
Accuracy of the network on the test images: 82.750000 %
105: 0.0012737751239910722
Accuracy of the network on the test images: 85.500000 %
106: 0.005825900938361883
Accuracy of the network on the test images: 82.500000 %
107: 0.008779382333159447
Accuracy of the network on the test images: 85.000000 %
108: 0.013592338189482689
Accuracy of the network on the test images: 82.750000 %
109: 0.05955486372113228
Accuracy of the network on the test images: 82.000000 %
110: 0.05223972722887993
Accuracy of the network on the test images: 83.250000 %
111: 0.0003775596560444683
Accuracy of the network on the test images: 84.500000 %
112: 0.5499049425125122
Accuracy of the network on the test images: 82.000000 %
113: 0.0929202288389206
Accuracy of the network on the test images: 82.750000 %
114: 0.17140457034111023
Accuracy of the network on the test images: 83.500000 %
115: 0.0025904655922204256
Accuracy of the network on the test images: 84.500000 %
116: 0.004923820495605

204: 1.3828277587890625e-05
Accuracy of the network on the test images: 85.250000 %
205: 8.134842209983617e-05
Accuracy of the network on the test images: 85.500000 %
206: 1.3256072634248994e-05
Accuracy of the network on the test images: 84.000000 %
207: 0.00032939910306595266
Accuracy of the network on the test images: 85.000000 %
208: 1.2874603271484375e-05
Accuracy of the network on the test images: 86.000000 %
209: 3.013610876223538e-05
Accuracy of the network on the test images: 85.750000 %
210: 0.00015459061251021922
Accuracy of the network on the test images: 86.500000 %
211: 0.00040683746919967234
Accuracy of the network on the test images: 85.500000 %
212: 0.00045909881009720266
Accuracy of the network on the test images: 86.750000 %
213: 2.937316821771674e-05
Accuracy of the network on the test images: 86.500000 %
214: 0.00012602805509231985
Accuracy of the network on the test images: 84.250000 %
215: 0.0010105610126629472
Accuracy of the network on the test images: 83.50000