Conclusion:

Accuracy hovers at around 50% using the VGG-16 pretrained model, and training all the layers. It might be better fine-tuning only the last layers. However, don't expect things to get drastically better. Also, the training is taking (very) very long, i.e. ~10 minutes for a single epoch. So abandoning this for now..

In [1]:
import numpy as np
import torch 
import torchvision
import torch.nn as nn
import torch.utils.data as data
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
from torch.utils.data import *
from torch.optim import Adam
from torch.optim import lr_scheduler
from __future__ import print_function, division
import time
import os
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

In [2]:
x_data = np.load('x_data.npz.npy')
y_data = np.load('y_data.npz.npy')

In [3]:
# Download and load pretrained resnet.
vgg16 = torchvision.models.vgg16(pretrained=True)

In [4]:
vgg16.classifier[6].out_features = 15

In [5]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print('cuda available')
    vgg16 = vgg16.cuda()

cuda available


In [6]:
criterion = nn.CrossEntropyLoss()

In [7]:
# Observe that only fc parameters are being optimized
optimizer = Adam(vgg16.parameters(), lr=0.001)

In [8]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [9]:
class CustomDataSet(Dataset):
    """Dataset wrapping data and target tensors.
    
    Each sample will be retrieved by indexing both tensors along the first
    dimension.

    Arguments:
        data_tensor (Tensor): contains sample data.
        target_tensor (Tensor): contains sample targets (labels).
    """

    def getCategoryLabel(self, y_vec):
        tot = y_vec.shape[0]
        vec = y_vec.shape[1]

        y_cat = np.zeros(tot)
        for i in range(tot):
            for j in range(vec):
                if (y_vec[i,j] == 1.0):
                    y_cat[i]=j
                    break                       
        return y_cat    
    
    def __init__(self, data_arr, target_arr):
        assert data_arr.shape[0] == target_arr.shape[0]
        self.data_arr = data_arr
        self.target_arr = torch.from_numpy(self.getCategoryLabel(target_arr).astype('uint8'))

        self.transform = transforms.Compose([
            transforms.Scale((224,224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])            
        ])
        
    def __getitem__(self, index):
        
        # this hack needed because transform will not act on images directly!
        # Have to first convert to img using Image.fromarray(..)
        # The use the transforms from above
        temp_img = Image.fromarray(self.data_arr[index].astype('uint8'))
        mod_img = self.transform(temp_img)

        label = self.target_arr[index]        
        return mod_img, label

    def __len__(self):
        return self.data_arr.shape[0]


In [10]:
# I need to be sure to randomize x_data and y_data, because the below only takes the last 3000 snapshots
# of the track, which isn't ideal way to validate...
train_data = CustomDataSet(x_data[:-3000], y_data[:-3000])
valid_data = CustomDataSet(x_data[-3000:], y_data[-3000:])

In [11]:
dataset_sizes={}
dataset_sizes['train'] = len(train_data)
dataset_sizes['valid'] = len(valid_data)

In [12]:
dataLoaders = {}
dataLoaders['train'] = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
dataLoaders['valid'] = DataLoader(valid_data, batch_size=32, shuffle=True, num_workers=4)

In [13]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in tqdm(dataLoaders[phase]):
                # get the inputs
                inputs, labels = data
                
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)
                
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [14]:
train_model(vgg16, criterion, optimizer, exp_lr_scheduler)

  0%|          | 0/468 [00:00<?, ?it/s]

Epoch 0/24
----------


100%|██████████| 468/468 [10:19<00:00,  1.22s/it]
  0%|          | 0/94 [00:00<?, ?it/s]

train Loss: 0.0566 Acc: 0.4082


100%|██████████| 94/94 [00:33<00:00,  3.05it/s]
  0%|          | 0/468 [00:00<?, ?it/s]

valid Loss: 0.0434 Acc: 0.4723

Epoch 1/24
----------


100%|██████████| 468/468 [10:18<00:00,  1.22s/it]
  0%|          | 0/94 [00:00<?, ?it/s]

train Loss: 0.0442 Acc: 0.4627


100%|██████████| 94/94 [00:33<00:00,  3.04it/s]
  0%|          | 0/468 [00:00<?, ?it/s]

valid Loss: 0.0423 Acc: 0.4793

Epoch 2/24
----------


100%|██████████| 468/468 [10:19<00:00,  1.22s/it]
  0%|          | 0/94 [00:00<?, ?it/s]

train Loss: 0.0429 Acc: 0.4757


100%|██████████| 94/94 [00:33<00:00,  3.05it/s]
  0%|          | 0/468 [00:00<?, ?it/s]

valid Loss: 0.0416 Acc: 0.4907

Epoch 3/24
----------


100%|██████████| 468/468 [10:19<00:00,  1.22s/it]
  0%|          | 0/94 [00:00<?, ?it/s]

train Loss: 0.0421 Acc: 0.4835


100%|██████████| 94/94 [00:32<00:00,  3.05it/s]
  0%|          | 0/468 [00:00<?, ?it/s]

valid Loss: 0.0403 Acc: 0.5010

Epoch 4/24
----------


100%|██████████| 468/468 [10:18<00:00,  1.22s/it]
  0%|          | 0/94 [00:00<?, ?it/s]

train Loss: 0.0415 Acc: 0.4836


100%|██████████| 94/94 [00:32<00:00,  3.05it/s]
  0%|          | 0/468 [00:00<?, ?it/s]

valid Loss: 0.0412 Acc: 0.4820

Epoch 5/24
----------


 33%|███▎      | 156/468 [03:26<06:52,  1.32s/it]Process Process-43:
Process Process-44:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/pytorch/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
Process Process-41:
  File "/home/paperspace/anaconda3/envs/pytorch/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process Process-42:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/paperspace/anaconda3/envs/pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/paperspace/anaconda3/envs/

KeyboardInterrupt: 