In [1]:
import os
import pandas as pd
import numpy as np
import re
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


from warnings import filterwarnings
filterwarnings('ignore')
RND_STATE = 42
np.random.seed(RND_STATE)

PATH = ''
foldersTrain = {'flow': 'OptFlowTrain/',
           'frames': 'FramesTrain/'}

foldersValid = {'flow': 'OptFlowValid/',
           'frames': 'FramesValid/'}

batch_size = 32
n_epochs = 100
LEARNING_RATE = 1e-3

In [2]:
'''
From: https://stackoverflow.com/questions/4623446/how-do-you-sort-files-numerically
'''
def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

In [3]:
framesTrain = [PATH + foldersTrain['frames'] + img for img in sorted(os.listdir(foldersTrain['frames']), key=alphanum_key)]
flowTrain =  [PATH + foldersTrain['flow'] + img for img in sorted(os.listdir(foldersTrain['flow']), key=alphanum_key)]
framesValid = [PATH + foldersValid['frames'] + img for img in sorted(os.listdir(foldersValid['frames']), key=alphanum_key)]
flowValid =  [PATH + foldersValid['flow'] + img for img in sorted(os.listdir(foldersValid['flow']), key=alphanum_key)]

In [4]:
labelsTrain = pd.read_csv(PATH + 'training_labels.csv')
labelsValid = pd.read_csv(PATH + 'valid_labels.csv')

In [5]:
# tmp
framesTrain = framesTrain[:labelsTrain.shape[0]]
flowTrain = flowTrain[:labelsTrain.shape[0]]

In [6]:
class DataLoader(Dataset):
    def __init__(self, img, flow, labels):
        self.img = img
        self.flow = flow
        self.size = len(self.img)
        self.y = labels

    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):
        Ximg = []
        Ximg.append(cv2.imread(self.img[idx]))
        Ximg = np.array(Ximg)
        
        Xflow = []
        Xflow.append(cv2.imread(self.flow[idx]))
        Xflow = np.array(Xflow)
        

        # normalize
        for i in range(Ximg[0].shape[-1]):
            if np.std(Ximg[0][:, :, i]) > 0:
                Ximg[0][:, :, i] = (Ximg[0][:, :, i] - np.mean(Ximg[0][:, :, i])) / np.std(Ximg[0][:, :, i])
                
        # normalize
        for i in range(Xflow[0].shape[-1]):
            if np.std(Xflow[0][:, :, i]) > 0:
                Xflow[0][:, :, i] = (Xflow[0][:, :, i] - np.mean(Xflow[0][:, :, i])) / np.std(Xflow[0][:, :, i])
        
        return Ximg[0], Xflow[0], self.y[idx]
    

In [17]:
train_dataset = DataLoader(framesTrain, flowTrain, pd.get_dummies(labelsTrain.iloc[:, 1]).values)
valid_dataset = DataLoader(framesValid, flowValid, pd.get_dummies(labelsValid.iloc[:, 1]).values)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                          batch_size=batch_size, shuffle=False, drop_last=True)

In [34]:
class Net(nn.Module):
    def __init__(self, n_classes = 3):
        super(Net, self).__init__()
        
        self.cnn_branch1 = nn.Sequential(
            nn.Conv2d(3, 64, 3),
            nn.Conv2d(64, 32, 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2)),
            nn.Conv2d(32, 16, 2),
            nn.Conv2d(16, 4, 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2))
        )
        
        self.cnn_branch2 = nn.Sequential(
            nn.Conv2d(3, 64, 3),
            nn.Conv2d(64, 32, 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2)),
            nn.Conv2d(32, 16, 2),
            nn.Conv2d(16, 4, 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2))
        )

        self.linear_branch = nn.Sequential(
            nn.Linear(30*30*4*2, 256),  # dimensionality error
            nn.ReLU(inplace=True),
            nn.Linear(256, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, n_classes),
            nn.Softmax()
        )

        # init layers with weights
        for m in self.cnn_branch1.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, np.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                
        # init layers with weights
        for m in self.cnn_branch2.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, np.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        for m in self.linear_branch.children():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform(m.weight)
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1.0)
                m.bias.data.zero_()

    def forward(self, x1, x2):
        x1, x2  = x1.permute(0,3,1,2), x2.permute(0,3,1,2)
        x1,x2 = self.cnn_branch1(x1), self.cnn_branch2(x2)
        
        # flatten
        x1, x2 = x1.reshape(batch_size, -1),x2.reshape(batch_size, -1)
        
        # concatenate along 1 axis
        x = torch.cat((x1, x2), dim = 1)
        out = self.linear_branch(x)
        return out

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [39]:
model = Net()

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

In [40]:
lr_sch = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=8, verbose=True)

if torch.cuda.is_available():
    model = model.to(torch.device("cuda"))
    criterion = criterion.to(torch.device("cuda"))


def train(epoch):
    model.train()
    dtype = torch.FloatTensor
    for batch_idx, (x1, x2, target) in enumerate(train_loader):
        x1, x2, target = Variable(x1).type(dtype), Variable(x2).type(dtype), Variable(target).type(dtype)
        if torch.cuda.is_available():
            x1, x2 = x1.to(torch.device("cuda")), x2.to(torch.device("cuda"))
            target = target.to(torch.device("cuda"))
        optimizer.zero_grad()
        output = model(x1, x2)

        target = torch.argmax(target, dim=1)

        loss = criterion(output, target)

        loss.backward()
        optimizer.step()
    lr_sch.step(loss)


def evaluate(data_loader):
    model.eval()
    loss = 0
    correct = 0
    preds = np.array([])
    targets = np.array([])
    
    dtype = torch.FloatTensor
    for batch_idx, (x1, x2, target) in enumerate(data_loader):
        x1, x2, target = Variable(x1).type(dtype), Variable(x2).type(dtype), Variable(target).type(dtype)
        if torch.cuda.is_available():
            x1, x2 = x1.to(torch.device("cuda")), x2.to(torch.device("cuda"))
            target = target.to(torch.device("cuda"))

        output = model(x1, x2)
        if len(preds) == 0:
            if torch.cuda.is_available():
                preds = output.cpu().detach().numpy()
                targets = target.cpu().detach().numpy()
            else:
                preds = output.detach().numpy()
                targets = target.detach().numpy()
        else:
            if torch.cuda.is_available():
                preds = np.vstack([preds, output.cpu().detach().numpy()])
                targets = np.vstack([targets, target.cpu().detach().numpy()])
            else:
                preds = np.vstack([preds, output.detach().numpy()])
                targets = np.vstack([targets, target.detach().numpy()])
        
        target = torch.argmax(target, dim=1)
        loss += F.cross_entropy(output, target).item()


    s = np.sum(targets, axis=1) > 0
    tot = np.sum(((preds > 0.5) + targets) > 0, axis=1)
    intersect = np.sum((preds > 0.5) * targets, axis=1)
    acc = intersect[s] / tot[s]
    out_acc = np.sum(acc == 1) / len(acc)

    loss /= len(data_loader.dataset)

    print('\nAverage loss: {:.4f}, Jaccard Accuracy: ({:.4f}%)\n'.format(
        loss, 100 * out_acc))
    return preds, target


print('[INFO] Training started...')
import time
t1 = time.time()
for epoch in range(n_epochs):
    train(epoch)
    preds, targets = evaluate(train_loader)
print('Total training time {:2f} seconds'.format(time.time() - t1))

print('[INFO] Training finished')

torch.save(model.state_dict(), 'model.pt')

print('[INFO] Model Saved!')

print('Predicted labels')
print(preds[:10])
print('\n')

print('Real labels')
print(targets[:10])
print('\n')


[INFO] Training started...

Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0403, Jaccard Accuracy: (23.7500%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)

Epoch    10: reducing learning rate of group 0 to 5.0000e-05.

Average loss: 0.0403, Jaccard Accuracy: (23.7500%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0403, Jaccard Accuracy: (23.7500%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average loss: 0.0399, Jaccard Accuracy: (25.0000%)


Average loss: 0.0401, Jaccard Accuracy: (24.3750%)


Average l

KeyboardInterrupt: 