In [1]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn

cudnn.benchmark = True #-- uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms.
                       #-- If this is set to false, uses some in-built heuristics that might not always be fastest.

cudnn.fastest = True #-- this is like the :fastest() mode for the Convolution modules,
                     #-- simply picks the fastest convolution algorithm, rather than tuning for workspace size

from torch.autograd import Variable
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import torch.optim as optim

import random, os, glob
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from models.alexnet import *
from utils import *

In [2]:
import torchvision
from torchvision import datasets, models, transforms

In [4]:
imSize = 225
batchSize = 128
nb_epoch = 50

In [5]:
data_dir = "../../data/GestureImages/"

In [6]:
mean = (0.39257858439139737, 0.35762259154897047, 0.31527230940662754) 
std = (0.18832936651233589, 0.17874159347517907, 0.17543465933504346)

In [7]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}

In [8]:
dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])for x in ['train', 'val']}

dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batchSize, shuffle=True, num_workers=4) for x in ['train', 'val']}

In [9]:
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes

In [10]:
# Get a batch of training data
inputs, classes = next(iter(dset_loaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

In [11]:
# define the criterion
criterion = nn.CrossEntropyLoss()

In [20]:
# defined a new net with the number of classes corresponding to the dataset
alexTunedClassifier = alexnet(len(dset_classes)).train()
# load the pre-trained model and copy only the Features 
copyFeaturesParametersAlexnet(alexTunedClassifier, models.alexnet(pretrained=True))
alexTunedClassifier = alexTunedClassifier.cuda()

copy Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
copy Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
copy Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
copy Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
copy Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [15]:
def train_model(model, criterion, optimizer):
    model.train()
    for data in dset_loaders['train']:
        inputs, labels = data
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())   
        optimizer.zero_grad()
        outputs = model(inputs)[1]
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    return model        

In [19]:
def test_model(model):
    model.eval()
    corrects, total = 0.0, 0.0
    for i,data in enumerate(dset_loaders['val']):
        inputs, labels = data
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        outputs = model(inputs)[1]
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        corrects += torch.sum(predicted == labels.data)        
    print "test : #Correct", corrects," on",total,"("+str(round(corrects/total*100, 1))+"%)"




In [21]:
optimizer=optim.SGD([{'params': alexTunedClassifier.classifier.parameters()},
                     {'params': alexTunedClassifier.features.parameters(), 'lr': 0.0}
                    ], lr=0.01, momentum=0.9)
for epoch in range(20):
    print "epoch", epoch,
    model = train_model(alexTunedClassifier, criterion, optimizer)
    test_model(alexTunedClassifier)

epoch 0 test : #Correct 1660.0 on 3998.0 (41.5%)
epoch 1 test : #Correct 1910.0 on 3998.0 (47.8%)
epoch 2 test : #Correct 1817.0 on 3998.0 (45.4%)
epoch 3 test : #Correct 1760.0 on 3998.0 (44.0%)
epoch 4 test : #Correct 1832.0 on 3998.0 (45.8%)
epoch 5 test : #Correct 1935.0 on 3998.0 (48.4%)
epoch 6 test : #Correct 1892.0 on 3998.0 (47.3%)
epoch 7 test : #Correct 1934.0 on 3998.0 (48.4%)
epoch 8 test : #Correct 1928.0 on 3998.0 (48.2%)
epoch 9 test : #Correct 1893.0 on 3998.0 (47.3%)
epoch 10 test : #Correct 1811.0 on 3998.0 (45.3%)
epoch 11 test : #Correct 1892.0 on 3998.0 (47.3%)
epoch 12 test : #Correct 1935.0 on 3998.0 (48.4%)
epoch 13 test : #Correct 1947.0 on 3998.0 (48.7%)
epoch 14 test : #Correct 1891.0 on 3998.0 (47.3%)
epoch 15 test : #Correct 1854.0 on 3998.0 (46.4%)
epoch 16 test : #Correct 1823.0 on 3998.0 (45.6%)
epoch 17 test : #Correct 1887.0 on 3998.0 (47.2%)
epoch 18 test : #Correct 1873.0 on 3998.0 (46.8%)
epoch 19 test : #Correct 1810.0 on 3998.0 (45.3%)


In [22]:
# save the net
torch.save(alexTunedClassifier, "ckpt/gesture_multiBGImages_alexFineTuned-50epoch-lr-0.01.ckpt")

In [23]:
optimizer=optim.SGD([{'params': alexTunedClassifier.classifier.parameters()},
                     {'params': alexTunedClassifier.features.parameters(), 'lr': 0.0}
                    ], lr=0.001, momentum=0.9)
for epoch in range(20):
    print "epoch", epoch,
    model = train_model(alexTunedClassifier, criterion, optimizer)
    test_model(alexTunedClassifier)

epoch 0 test : #Correct 1844.0 on 3998.0 (46.1%)
epoch 1 test : #Correct 1861.0 on 3998.0 (46.5%)
epoch 2 test : #Correct 1879.0 on 3998.0 (47.0%)
epoch 3 test : #Correct 1836.0 on 3998.0 (45.9%)
epoch 4 test : #Correct 1884.0 on 3998.0 (47.1%)
epoch 5 test : #Correct 1879.0 on 3998.0 (47.0%)
epoch 6 test : #Correct 1871.0 on 3998.0 (46.8%)
epoch 7 test : #Correct 1865.0 on 3998.0 (46.6%)
epoch 8 test : #Correct 1877.0 on 3998.0 (46.9%)
epoch 9 test : #Correct 1882.0 on 3998.0 (47.1%)
epoch 10 test : #Correct 1887.0 on 3998.0 (47.2%)
epoch 11 test : #Correct 1894.0 on 3998.0 (47.4%)
epoch 12 test : #Correct 1867.0 on 3998.0 (46.7%)
epoch 13 test : #Correct 1869.0 on 3998.0 (46.7%)
epoch 14 test : #Correct 1878.0 on 3998.0 (47.0%)
epoch 15 test : #Correct 1877.0 on 3998.0 (46.9%)
epoch 16 test : #Correct 1882.0 on 3998.0 (47.1%)
epoch 17 test : #Correct 1869.0 on 3998.0 (46.7%)
epoch 18 test : #Correct 1883.0 on 3998.0 (47.1%)
epoch 19 test : #Correct 1860.0 on 3998.0 (46.5%)


In [24]:
# save the net
torch.save(alexTunedClassifier, "ckpt/gesture_multiBGImages_alexFineTuned-50epoch-lr-0.01-50epoch-lr-0.001.ckpt")

In [25]:
# load the model and define the optimizer to train net with lr of 0.01
#alexTunedClassifier = torch.load("ckpt/gesture_1classdivers_alexFineTuned-50epoch-lr-0.01-50epoch-lr-0.001.ckpt")
#alexTunedClassifier = alexTunedClassifier.cuda()

optimizer=optim.SGD(alexTunedClassifier.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20):
    print "epoch", epoch,
    model = train_model(alexTunedClassifier, criterion, optimizer)
    test_model(alexTunedClassifier)

epoch 0 test : #Correct 2148.0 on 3998.0 (53.7%)
epoch 1 test : #Correct 2271.0 on 3998.0 (56.8%)
epoch 2 test : #Correct 2330.0 on 3998.0 (58.3%)
epoch 3 test : #Correct 2438.0 on 3998.0 (61.0%)
epoch 4 test : #Correct 2505.0 on 3998.0 (62.7%)
epoch 5 test : #Correct 2527.0 on 3998.0 (63.2%)
epoch 6 test : #Correct 2407.0 on 3998.0 (60.2%)
epoch 7 test : #Correct 2612.0 on 3998.0 (65.3%)
epoch 8 test : #Correct 2565.0 on 3998.0 (64.2%)
epoch 9 test : #Correct 2556.0 on 3998.0 (63.9%)
epoch 10 test : #Correct 2531.0 on 3998.0 (63.3%)
epoch 11 test : #Correct 2727.0 on 3998.0 (68.2%)
epoch 12 test : #Correct 2788.0 on 3998.0 (69.7%)
epoch 13 test : #Correct 2700.0 on 3998.0 (67.5%)
epoch 14 test : #Correct 2861.0 on 3998.0 (71.6%)
epoch 15 test : #Correct 2735.0 on 3998.0 (68.4%)
epoch 16 test : #Correct 2745.0 on 3998.0 (68.7%)
epoch 17 test : #Correct 2748.0 on 3998.0 (68.7%)
epoch 18 test : #Correct 2733.0 on 3998.0 (68.4%)
epoch 19 test : #Correct 2798.0 on 3998.0 (70.0%)


In [26]:
# save the net
torch.save(alexTunedClassifier, "ckpt/gesture_multiBGImages_alexFineTuned-50epoch-lr-0.01-50epoch-lr-0.001-50epoch-lr-0.001.ckpt")

In [27]:
# load the model and define the optimizer to train net with lr of 0.001
optimizer=optim.SGD(alexTunedClassifier.parameters(), lr=0.0001, momentum=0.9)
for epoch in range(20):
    print "epoch", epoch,
    model = train_model(alexTunedClassifier, criterion, optimizer)
    test_model(alexTunedClassifier)

epoch 0 test : #Correct 2827.0 on 3998.0 (70.7%)
epoch 1 test : #Correct 2840.0 on 3998.0 (71.0%)
epoch 2 test : #Correct 2842.0 on 3998.0 (71.1%)
epoch 3 test : #Correct 2843.0 on 3998.0 (71.1%)
epoch 4 test : #Correct 2825.0 on 3998.0 (70.7%)
epoch 5 test : #Correct 2834.0 on 3998.0 (70.9%)
epoch 6 test : #Correct 2863.0 on 3998.0 (71.6%)
epoch 7 test : #Correct 2844.0 on 3998.0 (71.1%)
epoch 8 test : #Correct 2830.0 on 3998.0 (70.8%)
epoch 9 test : #Correct 2847.0 on 3998.0 (71.2%)
epoch 10 test : #Correct 2843.0 on 3998.0 (71.1%)
epoch 11 test : #Correct 2862.0 on 3998.0 (71.6%)
epoch 12 test : #Correct 2841.0 on 3998.0 (71.1%)
epoch 13 test : #Correct 2845.0 on 3998.0 (71.2%)
epoch 14 test : #Correct 2861.0 on 3998.0 (71.6%)
epoch 15 test : #Correct 2852.0 on 3998.0 (71.3%)
epoch 16 test : #Correct 2820.0 on 3998.0 (70.5%)
epoch 17 test : #Correct 2862.0 on 3998.0 (71.6%)
epoch 18 test : #Correct 2858.0 on 3998.0 (71.5%)
epoch 19 test : #Correct 2859.0 on 3998.0 (71.5%)


In [28]:
# save the net
torch.save(alexTunedClassifier, "ckpt/gesture_multiBGImages_alexFineTuned-50epoch-lr-0.01-50epoch-lr-0.001-50epoch-lr-0.001-50epoch-lr-0.0001.ckpt")