In [None]:
# Description: 

# 1. Use the pretrained ResNet18 as feature extractor
# 2. Save extracted features (data3d is just the combinatation of train/valid/test resnet18 data)
# 3. Train MLP for image classification and video classification


In [11]:
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,sampler,Dataset
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import math

label_mat=scipy.io.loadmat('./data/q3_2_data.mat')
label_train=label_mat['trLb']
label_val=label_mat['valLb']

class ActionDataset(Dataset):
    """Action dataset."""

    def __init__(self,  root_dir,labels=[], transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            labels(list): labels if images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.length=len(os.listdir(self.root_dir))
        self.labels=labels
    def __len__(self):
        return self.length*3

    def __getitem__(self, idx):
        
        folder=math.floor(idx/3)+1
        imidx=idx%3+1
        folder=format(folder,'05d')
        imgname=str(imidx)+'.jpg'
        img_path = os.path.join(self.root_dir,
                                folder,imgname)
        image = Image.open(img_path)
        if len(self.labels)!=0:
            Label=self.labels[math.floor(idx/3)][0]-1
        if self.transform:
            image = self.transform(image)
        if len(self.labels)!=0:
            sample={'image':image,'img_path':img_path,'Label':Label}
        else:
            sample={'image':image,'img_path':img_path}
        return sample


data_transform = T.Compose([
        T.Resize((224,224)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
image_dataset_train=ActionDataset(root_dir='./data/trainClips/',labels=label_train,transform=data_transform)

image_dataloader_train = DataLoader(image_dataset_train, batch_size=256,
                        shuffle=False, num_workers=4)
image_dataset_val=ActionDataset(root_dir='./data/valClips/',labels=label_val,transform=data_transform)

image_dataloader_val = DataLoader(image_dataset_val, batch_size=256,
                        shuffle=False, num_workers=4)
image_dataset_test=ActionDataset(root_dir='./data/testClips/',labels=[],transform=data_transform)

image_dataloader_test = DataLoader(image_dataset_test, batch_size=256,
                        shuffle=False, num_workers=4)

In [12]:
import torchvision.models as models

resnet18 = models.resnet18(pretrained=True).cuda()

In [13]:
def extract(x):
    resnet18.eval()
    N, C, W, H = x.size()
    feature = torch.zeros([N,512]).cuda()
    def copy_data(m, i, o):
        feature.copy_(o.data)
    layer = resnet18._modules.get('avgpool')
    h = layer.register_forward_hook(copy_data)
    resnet18(x)
    h.remove()
    return feature.cpu().numpy()
    
Train = np.empty([256,512])
dataloader = image_dataloader_test
# dataloader = image_dataloader_train
# dataloader = image_dataloader_val
for t, sample in enumerate(dataloader):
    
    x_var = Variable(sample['image'].cuda())    
    feature = extract(x_var)

    if t==0: 
        Train = feature
    else:
        Train = np.append(Train, feature, axis=0)

  


In [14]:
Train = np.float32(Train)
scipy.io.savemat('test-resnet18.mat', {'tstD':Train}, do_compression=True)
# scipy.io.savemat('train-resnet18.mat', {'tstD':Train}, do_compression=True)
# scipy.io.savemat('valid-resnet18.mat', {'tstD':Train}, do_compression=True)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,sampler,Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import math

label_mat=scipy.io.loadmat('./label2d.mat')
train_mat=scipy.io.loadmat('./train-resnet18.mat')
valid_mat=scipy.io.loadmat('./valid-resnet18.mat')
test_mat=scipy.io.loadmat('./test-resnet18.mat')

feature_train = train_mat['trD']
feature_valid = valid_mat['valD']
feature_test = test_mat['tstD']

label_train=label_mat['trLb'][:,0]
label_valid=label_mat['valLb'][:,0]

empty_mat = scipy.io.loadmat('./empty2d.mat')
label_test = empty_mat['tstLb'][:,0]

train = torch.utils.data.TensorDataset(torch.from_numpy(feature_train), torch.from_numpy(label_train))
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True, num_workers=4)
valid = torch.utils.data.TensorDataset(torch.from_numpy(feature_valid), torch.from_numpy(label_valid))
valid_loader = torch.utils.data.DataLoader(valid, batch_size=64, shuffle=False, num_workers=4)
test = torch.utils.data.TensorDataset(torch.from_numpy(feature_test), torch.from_numpy(label_test))
test_loader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=False, num_workers=4)

In [2]:
gpu_dtype = torch.cuda.FloatTensor

mlp = nn.Sequential(
    nn.Linear(512, 512),
    nn.ReLU(inplace=True),
    nn.Linear(512, 512),
    nn.ReLU(inplace=True),
    nn.Linear(512, 10),
              )

mlp = mlp.type(gpu_dtype)
mlp.cuda()

def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

In [3]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample[0].cuda())
            y_var = Variable(sample[1].cuda().long())

            scores = model(x_var)

            loss = loss_fn(scores, y_var)
            if (t + 1) % 100 == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval() 
    for t, sample in enumerate(loader):
        x_var = Variable(sample[0].cuda())
        y_var = sample[1].cuda()
        y_var=y_var.cpu()
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [4]:
loss_fn = nn.MultiMarginLoss().cuda()
optimizer = optim.Adam(mlp.parameters(), lr=1e-4, weight_decay=1e-5)
torch.random.manual_seed(12345)

mlp.apply(reset) 
mlp.train() 
train(mlp, loss_fn, optimizer,train_loader, num_epochs=10) 
mlp.eval()
check_accuracy(mlp, train_loader)
check_accuracy(mlp, valid_loader)

Starting epoch 1 / 10
t = 100, loss = 0.0858
t = 200, loss = 0.0342
t = 300, loss = 0.0280
Starting epoch 2 / 10
t = 100, loss = 0.0151
t = 200, loss = 0.0093
t = 300, loss = 0.0103
Starting epoch 3 / 10
t = 100, loss = 0.0087
t = 200, loss = 0.0089
t = 300, loss = 0.0121
Starting epoch 4 / 10
t = 100, loss = 0.0136
t = 200, loss = 0.0019
t = 300, loss = 0.0081
Starting epoch 5 / 10
t = 100, loss = 0.0004
t = 200, loss = 0.0031
t = 300, loss = 0.0000
Starting epoch 6 / 10
t = 100, loss = 0.0007
t = 200, loss = 0.0013
t = 300, loss = 0.0000
Starting epoch 7 / 10
t = 100, loss = 0.0002
t = 200, loss = 0.0007
t = 300, loss = 0.0005
Starting epoch 8 / 10
t = 100, loss = 0.0003
t = 200, loss = 0.0009
t = 300, loss = 0.0000
Starting epoch 9 / 10
t = 100, loss = 0.0000
t = 200, loss = 0.0000
t = 300, loss = 0.0005
Starting epoch 10 / 10
t = 100, loss = 0.0010
t = 200, loss = 0.0000
t = 300, loss = 0.0001
Got 23295 / 23310 correct (99.94)
Got 5929 / 6690 correct (88.62)


In [5]:
def predict_on_test(model, loader):

    num_correct = 0
    num_samples = 0
    model.eval() 
    results=open('mlp2d.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample[0].cuda())
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count

count=predict_on_test(mlp, test_loader)
print(count)

9810


In [None]:
# 84.82% on Kaggle test data



In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,sampler,Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import math

label_mat=scipy.io.loadmat('./label3d.mat')
data_mat=scipy.io.loadmat('./data3d.mat')

feature_train = data_mat['trD']
feature_valid = data_mat['valD']
feature_test = data_mat['tstD']

label_train=label_mat['trLb'][:,0]
label_valid=label_mat['valLb'][:,0]

empty_mat = scipy.io.loadmat('./empty3d.mat')
label_test = empty_mat['tstLb'][:,0]

train = torch.utils.data.TensorDataset(torch.from_numpy(feature_train), torch.from_numpy(label_train))
train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True, num_workers=4)
valid = torch.utils.data.TensorDataset(torch.from_numpy(feature_valid), torch.from_numpy(label_valid))
valid_loader = torch.utils.data.DataLoader(valid, batch_size=32, shuffle=False, num_workers=4)

test = torch.utils.data.TensorDataset(torch.from_numpy(feature_test), torch.from_numpy(label_test))
test_loader = torch.utils.data.DataLoader(test, batch_size=32, shuffle=False, num_workers=4)

In [7]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample[0].cuda())
            y_var = Variable(sample[1].cuda().long())

            scores = model(x_var)

            loss = loss_fn(scores, y_var)
            if (t + 1) % 32 == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval() 
    for t, sample in enumerate(loader):
        x_var = Variable(sample[0].cuda())
        y_var = sample[1].cuda()
        y_var=y_var.cpu()
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [8]:
gpu_dtype = torch.cuda.FloatTensor

mlp = nn.Sequential(
    nn.Linear(512*3, 512*2),
    nn.ReLU(inplace=True),
    nn.Linear(512*2, 512),
    nn.ReLU(inplace=True),
    nn.Linear(512, 10),
              )

mlp = mlp.type(gpu_dtype)
mlp.cuda()

def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

In [9]:
loss_fn = nn.MultiMarginLoss().cuda()
optimizer = optim.Adam(mlp.parameters(), lr=1e-4)
torch.random.manual_seed(12345)

mlp.apply(reset) 
mlp.train() 
train(mlp, loss_fn, optimizer,train_loader, num_epochs=5) 
mlp.eval()
check_accuracy(mlp, train_loader)
check_accuracy(mlp, valid_loader)

Starting epoch 1 / 5
t = 32, loss = 0.1689
t = 64, loss = 0.0448
t = 96, loss = 0.0367
t = 128, loss = 0.0365
t = 160, loss = 0.0386
t = 192, loss = 0.0353
t = 224, loss = 0.0072
Starting epoch 2 / 5
t = 32, loss = 0.0113
t = 64, loss = 0.0066
t = 96, loss = 0.0301
t = 128, loss = 0.0032
t = 160, loss = 0.0051
t = 192, loss = 0.0059
t = 224, loss = 0.0025
Starting epoch 3 / 5
t = 32, loss = 0.0018
t = 64, loss = 0.0053
t = 96, loss = 0.0020
t = 128, loss = 0.0030
t = 160, loss = 0.0016
t = 192, loss = 0.0049
t = 224, loss = 0.0022
Starting epoch 4 / 5
t = 32, loss = 0.0025
t = 64, loss = 0.0013
t = 96, loss = 0.0004
t = 128, loss = 0.0031
t = 160, loss = 0.0002
t = 192, loss = 0.0031
t = 224, loss = 0.0017
Starting epoch 5 / 5
t = 32, loss = 0.0000
t = 64, loss = 0.0019
t = 96, loss = 0.0000
t = 128, loss = 0.0000
t = 160, loss = 0.0048
t = 192, loss = 0.0000
t = 224, loss = 0.0056
Got 7764 / 7770 correct (99.92)
Got 2050 / 2230 correct (91.93)


In [10]:
def predict_on_test3d(model, loader):

    num_correct = 0
    num_samples = 0
    model.eval() 
    results=open('mlp3d.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample[0].cuda())
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count

count=predict_on_test3d(mlp, test_loader)
print(count)

3270


In [None]:
# 83.88% on Kaggle test data

