# About

Main purpose of this notebook is to get our data loaded into Torch such that we can start experimenting with NN

Following:
https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [25]:
%matplotlib notebook

import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, sampler
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import random_split
from mpl_toolkits.mplot3d import Axes3D
from PIL import Image
from scipy.stats import norm

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
USE_GPU = False

dtype = torch.float32

if USE_GPU and torch.cuda.is_available(): 
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
# Constant to control how frequently we print train loss
print_every = 100
print('using device:', device)

using device: cpu


## Dataset Class

Lets load our csv into our dataset class and do a quick sanity check to see if our data is correct.

In [39]:
class HandDataset(Dataset):
    """3D Hand Pose dataset"""
    
    def __init__(self, csv_file, **kwargs):
        """
        Args:
            csv_file (string): Path to csv file
            transform (callable, optional): Optional transform to be applied
        """
        self.csv = pd.read_csv(csv_file)
        self.transform = kwargs.get('transform',None)
        self.train = kwargs.get('train', True)
        
        
    def __len__(self):
        return len(self.csv)
    
    def gaussian_2d(self, idx, std):
        pos_2d = self.csv.iloc[idx, (21*3)+1:].as_matrix().astype(float)
        pos_2d = pos_2d.reshape(21,2)
        w, h = 480, 640
        heatmap = np.zeros((21, w, h))
        
        rv = norm(loc=0, scale=std)
        
        for i, pos in enumerate(pos_2d):
            pos_start_x, pos_end_x = int(pos[0]-std*4), int(pos[0]+std*4)
            pos_start_y, pos_end_y = int(pos[1]-std*4), int(pos[0]+std*4)
            
            pos_start_x = np.minimum(pos_start_x, 0)
            pos_start_y = np.minimum(pos_start_y, 0)
            pos_end_x = np.maximum(pos_end_x, w)
            pos_end_y = np.maximum(pos_end_y, h)
            
            for j in range(pos_start_x, pos_end_x):
                for k in range(pos_start_y, pos_end_y):
                    dist = np.linalg.norm((j,k)-pos)
                    heatmap[i,j,k] = rv.pdf(dist)
        return heatmap
            
        
    
    def __getitem__(self, idx):
        img_name = self.csv.iloc[idx,0]
        image = io.imread(img_name)
        
        pos_3d = self.csv.iloc[idx, 1:(21*3)+1].as_matrix().astype(float)
        pos_3d = pos_3d.reshape(21,3)

        pos_2d = self.csv.iloc[idx, (21*3)+1:].as_matrix().astype(float)
        pos_2d = pos_2d.reshape(21,2)
        
        heatmap_2d = None # Enabling it is super slow right now...
        #heatmap_2d = self.gaussian_2d(idx,3)
        
        sample = {'image': image,
                  'pos_3d': pos_3d,
                  'pos_2d': pos_2d,
                  'heatmap_2d': heatmap_2d}
        
        if self.transform:
            sample = self.transform(sample)
        return sample

class ToTensor(object):
    def __call__(self, sample):
        image = sample['image']
        pos_3d, pos_2d = sample['pos_3d'], sample['pos_2d']
        # swap color axis
        image = image.transpose((2,0,1))
        return {'image': torch.from_numpy(image),
               'pos_3d': torch.from_numpy(pos_3d),
               'pos_2d': torch.from_numpy(pos_2d),
               'heatmap_2d': torch.from_numpy(heatmap_2d)}
    
class Scale(object):
    def __init__(self, s):
        self.s = s
        
    def __call__(self, sample):
        image = sample['image']
        pos_3d, pos_2d = sample['pos_3d'], sample['pos_2d']
        image = Image.fromarray(image)
        width, height = image.width, image.height
        s_width, s_height = int(width*self.s), int(height*self.s)
        
        image = image.resize((s_width, s_height))
        image = np.array(image).reshape((s_height, s_width, 3))
        
        pos_3d = pos_3d*self.s
        pos_2d = pos_2d*self.s
        
        return {'image': image,
               'pos_3d': pos_3d,
               'pos_2d': pos_2d,
               'heatmap_2d': heatmap_2d}
        
        


In [None]:
handDataset = HandDataset('train_dataset.csv', transform=Scale(1))
sample = handDataset[np.random.randint(len(handDataset))]

def show_joints(image, pos_2d, pos_3d, heatmap_2d):
    fig = plt.figure(figsize=plt.figaspect(2.))
    ax = fig.add_subplot(2,1,1)
    height, width, depth = image.shape
    ax.imshow(image)
    for joint in heatmap_2d:
        ax.imshow(joint)
    ax.scatter(pos_2d[:,0], pos_2d[:, 1], s=10, marker='.', c='r')
    ax = fig.add_subplot(2,1,2, projection="3d")
    ax.set_xlim(-height/2, height/2)
    ax.set_ylim(-width/2, width/2)
    ax.view_init(-90,-90)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.scatter(pos_3d[:,0], pos_3d[:,1], pos_3d[:,2], s=30)
    plt.show()


show_joints(sample['image'], sample['pos_2d'], sample['pos_3d'],
           sample['heatmap_2d'])

In [15]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1) # "flatten" the C * H * W values into a single vector

def random_weight(shape): 
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
    if len(shape) == 2: # FC weight
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH,kW]
        
    # randn is standard normal distribution generator.
    w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in) 
    w.requires_grad = True
    return w

def zero_weight(shape):
    return torch.zeros(shape, device=device, dtype=dtype, requires_grad=True)


class TwoLayerFC(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)
        
    def forward(self, x):
        x = flatten(x)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores
    

    

In [24]:
transform = transforms.Compose([
    Scale(0.5),
    ToTensor(),
])

hand_train = HandDataset('dataset.csv', transform=transform, train=True)
N = len(hand_train)
loader_train = DataLoader(hand_train, batch_size=64,
            sampler=sampler.SubsetRandomSampler(range(int(N*0.8))))

hand_val = HandDataset('dataset.csv', transform=transform, train=True)
loader_val = DataLoader(hand_val, batch_size=64,
            sampler=sampler.SubsetRandomSampler(range(int(N*0.8), int(N*0.9))))

hand_test = HandDataset('dataset.csv', transform=transform, train=False)
loader_test = DataLoader(hand_test, batch_size=64,
                         sampler=sampler.SubsetRandomSampler(range(int(N*0.9),N)))



In [13]:

for i_batch, sample_batched in enumerate(loader_train):
    print(i_batch, sample_batched['image'].size())
    break
        

0 torch.Size([64, 3, 240, 320])
