In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from PIL import Image

import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from imutils import contours
import shutil

%matplotlib inline

TRAINING_PATH = 'pushes/'
BATCH_SIZE = 16
IMG_SHAPE = (128, 128)

In [2]:
class RandomShift(object):
    def __init__(self, shift):
        self.shift = shift
        
    @staticmethod
    def get_params(shift):
        """Get parameters for ``rotate`` for a random rotation.
        Returns:
            sequence: params to be passed to ``rotate`` for random rotation.
        """
        hshift, vshift = np.random.uniform(-shift, shift, size=2)

        return hshift, vshift 
    def __call__(self, img):
        hshift, vshift = self.get_params(self.shift)
        
        return img.transform(img.size, Image.AFFINE, (1,0,hshift,0,1,vshift), resample=Image.BICUBIC, fill=1)
    

In [3]:
# define basic image transforms for preprocessing
transform = transforms.Compose(
[
    transforms.ToPILImage(),
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(0.2),
    RandomShift(0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5,), std = (0.5, ))
])

class Dataset(torch.utils.data.Dataset):
    '''
    Custom Dataset object for the CDiscount competition
        Parameters:
            root_dir - directory including category folders with images

        Example:
        images/
            1000001859/
                26_0.jpg
                26_1.jpg
                ...
            1000004141/
                ...
            ...
    '''
    
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.categories = sorted(os.listdir(root_dir))
        self.cat2idx = dict(zip(self.categories, range(len(self.categories))))
        self.idx2cat = dict(zip(self.cat2idx.values(), self.cat2idx.keys()))
        self.files = []
        cat_mapping = {}
        for (dirpath, dirnames, filenames) in os.walk(self.root_dir):
            for f in filenames:
                if f.endswith('.png'):
                    o = {}
                    o['img_path'] = dirpath + '/' + f
                    o['category'] = self.cat2idx[dirpath[dirpath.find('/')+1:]]
                    cat_mapping[o['category']] = dirpath.split('/')[-1]
                    self.files.append(o)
        self.transform = transform
        print(f'Categorie mapping: {cat_mapping}')
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        img_path = self.files[idx]['img_path']
        category = self.files[idx]['category']
        image = cv2.imread(img_path)
        image = cv2.resize(image, IMG_SHAPE)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)
            
        return {'image': image, 'category': category}


# create instance of data class and pytorch dataloader
dataSet = Dataset(TRAINING_PATH, transform=transform)
dataloader = torch.utils.data.DataLoader(dataSet, batch_size=BATCH_SIZE, shuffle=True)

Categorie mapping: {0: 'down', 1: 'other', 2: 'up'}


In [4]:
from Net import Net


net = Net()
print('######### Network created #########')
print('Architecture:\n', net)

### Train
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

print('Started Training!')
net.train()
for epoch in range(10):
    running_loss = 0.0
    examples = 0
    for i, data in enumerate(dataloader, 0):
        # Get the inputs
        inputs, labels = data['image'], data['category']
        
        # Wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.data
        examples += BATCH_SIZE
    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / examples))

print('Finished Training!')

######### Network created #########
Architecture:
 Net(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (dropout): Dropout(p=0.3)
  (fc1): Linear(in_features=46656, out_features=512, bias=True)
  (bnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (bnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (bnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc4): Linear(in_features=64, out_features=3, bias=True)
)
Started Training!
[1,    42] loss: 0.040
[2,    42] loss: 0.027


In [21]:
import re
'''
From: https://stackoverflow.com/questions/4623446/how-do-you-sort-files-numerically
'''
def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

In [23]:
from IPython.display import clear_output
net.eval()

transform = transforms.Compose(
[
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5,), std = (0.5, ))
])
PATH = 'classification/IMG_8266/'
classes = []
for frame in sorted(os.listdir(PATH), key = alphanum_key):
    image = cv2.imread(PATH + frame)
    image = cv2.resize(image, IMG_SHAPE)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    x = transform(image).reshape(1, 3, 128, 128)
    classes.append(np.argmax(net(x).detach().numpy()))

In [28]:
def moveCount(labels):
    k = 0
    t = 0
    for i in range(1, len(labels)):
        if labels[i-1] == 0 and labels[i] == 2:
            k+=1
    for i in range(2, len(labels)-1):
        if labels[i-2] == 0 and labels[i-1] == 1 and labels[i] == 2 and labels[i+1] == 2:
            t+=1
    return k + t

In [29]:
moveCount(classes)

25

In [30]:
# save torch model for further prediction
torch.save(net.state_dict(), 'model.pt')