In [1]:
import cv2
import numpy as np
import pickle

def collect_video_framecount(action,subject,trial_num):
    
    action_dict = {'KT':'Knot_Tying','S':'Suturing','NP': 'Needle_Passing'}
    
    act = action_dict[action]

    filename1 = act+'/video/'+act+'_'+subject+'00'+str(trial_num)+'_capture1.avi'
    filename2 = act+'/video/'+act+'_'+subject+'00'+str(trial_num)+'_capture2.avi'
    
    print('reading '+filename1)
    
    vidcap1 = cv2.VideoCapture(filename1)
    vidcap2 = cv2.VideoCapture(filename2)
    
    count = 0
    success = True
    
    while success:
      success,image = vidcap1.read()
    
      '''
      success,image = vidcap2.read()
      '''  
      count += 1

    print('total frame count : %d' % count)
    
    return count-1

def collect_video_sample(action,subject,trial_num,num_frames):
    
    action_dict = {'KT':'Knot_Tying','S':'Suturing','NP': 'Needle_Passing'}
    
    act = action_dict[action]

    filename1 = act+'/video/'+act+'_'+subject+'00'+str(trial_num)+'_capture1.avi'
    filename2 = act+'/video/'+act+'_'+subject+'00'+str(trial_num)+'_capture2.avi'
    
    print('reading '+filename1)
    
    vidcap1 = cv2.VideoCapture(filename1)
    vidcap2 = cv2.VideoCapture(filename2)
    
    # collect kinematic data
    filepath = act + '/kinematics/AllGestures/'
    filename = filepath + act + '_' +subject + '00' + str(trial_num) + '.txt'
    data = np.loadtxt(filename)
    num_labels = data.shape[0]
    print('total labels loaded: %d' % num_labels)
    
    if (num_labels>num_frames):
          pass
    else:
          num_frames = num_labels
          
    count = 0
    success = True
    
    while success and count<num_frames:
      success,image = vidcap1.read()
      write_name = 'data/' + subject+'_'+str(trial_num)+'_1'+'_%d_'+ action + '.png'
      cv2.imwrite(write_name % count, image)     # save frame as png file
    
      '''
      success,image = vidcap2.read()
      write_name = 'data/' + subject+'_'+str(trial_num)+'_2'+'_%d_'+ action + '.png'
      cv2.imwrite(write_name % count, image)     # save frame as png file
      '''
      count += 1
      if count%100 == 0:
          print('capturing frame %d' % count)  
    
    print('total frame count : %d' % count)
    
    # only take cols 38-49 (slave left) and 57-68 (slave right)
    
    dataL = data[:count,38:49]
    dataR = data[:count,57:68]
    
    out = np.hstack((dataL,dataR))
    
    print('total labels saved: %d' % out.shape[0])
    
    return out



In [16]:
subject_list = {}
subject_list['KT'] = [['B','C','D','E','F','G'],[4,5,5,5,5,5]]
#subject_list['KT'] = [['B'],[4]]

y = np.empty((0,22))

task = 'KT'
subj = subject_list[task][0]
trial = subject_list[task][1]
for i in range(len(subj)):
    for j in range(trial[i]):
        num_frames = collect_video_framecount(task,subj[i],j+1)
        out = collect_video_sample(task,subj[i],j+1,num_frames)
        y = np.vstack((y,out))

import pickle

print(y.shape)
picklefile = open('kinematics', 'ab') 
pickle.dump(y,picklefile)
picklefile.close()

reading Knot_Tying/video/Knot_Tying_B001_capture1.avi
total frame count : 1750
reading Knot_Tying/video/Knot_Tying_B001_capture1.avi
total labels loaded: 1735
capturing frame 100
capturing frame 200
capturing frame 300
capturing frame 400
capturing frame 500
capturing frame 600
capturing frame 700
capturing frame 800
capturing frame 900
capturing frame 1000
capturing frame 1100
capturing frame 1200
capturing frame 1300
capturing frame 1400
capturing frame 1500
capturing frame 1600
capturing frame 1700
total frame count : 1735
total labels saved: 1735
reading Knot_Tying/video/Knot_Tying_B002_capture1.avi
total frame count : 1486
reading Knot_Tying/video/Knot_Tying_B002_capture1.avi
total labels loaded: 1480
capturing frame 100
capturing frame 200
capturing frame 300
capturing frame 400
capturing frame 500
capturing frame 600
capturing frame 700
capturing frame 800
capturing frame 900
capturing frame 1000
capturing frame 1100
capturing frame 1200
capturing frame 1300
capturing frame 1400

In [17]:
# functions to downsize the images and compile them into a dataset

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.utils.data import Dataset
import os

import torchvision.datasets as dset
import torchvision.transforms as T
import glob
from PIL import Image
import cv2
import numpy as np
import pickle

import pdb

class JIGSAWDataset(Dataset):

    def __init__(self, y, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.labels = y
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return self.y.shape[0]
        
    def __getitem__(self, idx): 
        file_list = glob.glob('data/*.png')
        img_name = file_list[idx]
        #print('opening image ' +  img_name)
        image = Image.open(img_name,'r')
        label = self.labels[idx,:]
        if self.transform:
            image = self.transform(image)
        sample =  (image,label)

        return sample

def load_dataset():
    
    data_path = 'data'
    picklefile = open("kinematics", "rb" )
    
    num_files = len(next(os.walk('data'))[2]) #dir is your directory path as string
    print(num_files)
    
    trans = T.Compose([
                T.Resize((60,80), interpolation=2),
                T.ToTensor()])
    transy = T.Compose([T.ToTensor()])

    y = pickle.load(picklefile)
    print(y.shape)
    picklefile.close()
    
    dataset = JIGSAWDataset(y,data_path,transform = trans)
    
    train_loader = DataLoader(
        dataset,
        batch_size=64,
        num_workers=0,
        shuffle=False,
        sampler=sampler.SubsetRandomSampler(range(num_files-1000))
    )
    
    val_loader = DataLoader(
        dataset,
        batch_size=64,
        num_workers=0,
        shuffle=False,
        sampler=sampler.SubsetRandomSampler(range(num_files-1000,num_files))
    )
    
    return train_loader,val_loader
    
loader_train,loader_val = load_dataset()


USE_GPU = True
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)


46303
(46303, 22)
using device: cuda


In [21]:
def check_accuracy_part34(loader, model):

    print('Checking accuracy on validation set')

    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.float)
            scores = model(x)
            loss_fn = torch.nn.MSELoss(reduction='mean')
            loss = loss_fn(scores,y)

        print('MSE loss is: %d ' % loss)

def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.float)
            
            scores = model(x)
            loss_fn = torch.nn.MSELoss(reduction='sum')
            loss = loss_fn(scores,y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            
            if t % 5 == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                #check_accuracy_part34(loader_val, model)
                print()
            

In [22]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

model = None
optimizer = None

# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

channel_1 = 16
channel_2 = 32
channel_3 = 64
hidden_size = 1000
learning_rate = 1e-3
reg = 1e-5

# computing the output from the conv nets
pad1 = 2 
filt1 = 5
im_H = 60
im_W = 80
stride1 = 1
wout1 = (im_W + 2*pad1 - filt1 )/stride1 + 1
hout1 = (im_H + 2*pad1 - filt1 )/stride1 + 1
print('%d %d' %(wout1,hout1))
pad2 = 1
filt2 = 5
stride2 =2
wout2 = (wout1 + 2 *pad2 - filt2 )/stride2 + 1
hout2 = (hout1 + 2 *pad2 - filt2 )/stride2 + 1
print('%d %d' %(wout2,hout2))
pad3 = 1
filt3 = 2
stride3 = 2
wout3 = (wout2 + 2 *pad3 - filt3 )/stride3 + 1
hout3 = (hout2 + 2 *pad3 - filt3 )/stride3 + 1
print('%d %d' %(wout3,hout3))
linear_input_dim = int(channel_3*hout3*wout3)
print('linear input dim required: ' ,linear_input_dim)

'''

'''

model = nn.Sequential(
    nn.Conv2d(3,channel_1,filt1,padding=pad1,stride = stride1),
    nn.BatchNorm2d(channel_1),
    nn.ReLU(),
    nn.Conv2d(channel_1,channel_2,filt2,padding=pad2, stride = stride2),
    nn.BatchNorm2d(channel_2),
    nn.ReLU(),
    nn.Conv2d(channel_2,channel_3,filt3,padding=pad3,stride = stride3),
    nn.BatchNorm2d(channel_3),
    nn.ReLU(),
    Flatten(),
    nn.Linear(19200,hidden_size),
    nn.BatchNorm1d(hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size,22),
)

# you can use Nesterov momentum in optim.SGD
#optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9, nesterov=True)

optimizer = optim.Adam(model.parameters(),lr = learning_rate, weight_decay = reg)

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                                 END OF YOUR CODE                             
################################################################################

# You should get at least 70% accuracy
train_part34(model, optimizer, epochs=10)

80 60
39 29
20 15
linear input dim required:  20916
Iteration 0, loss = 375.2760

Iteration 5, loss = 1257.7787



KeyboardInterrupt: 

In [38]:
data_path = 'data'
picklefile = open("kinematics", "rb" )

num_files = len(next(os.walk('data'))[2]) #dir is your directory path as string
print(num_files)

trans = T.Compose([
            T.Resize((60,80), interpolation=2),
            T.ToTensor()])
transy = T.Compose([T.ToTensor()])

y = pickle.load(picklefile)
picklefile.close()

dataset = JIGSAWDataset(y,data_path,transform = trans)


6647
tensor([[[0.0275, 0.0510, 0.0627,  ..., 0.1176, 0.1176, 0.1176],
         [0.0275, 0.0510, 0.0627,  ..., 0.1176, 0.1176, 0.1176],
         [0.0275, 0.0471, 0.0510,  ..., 0.1059, 0.1176, 0.1176],
         ...,
         [0.0667, 0.1294, 0.1569,  ..., 0.3804, 0.3647, 0.3098],
         [0.0627, 0.1294, 0.1451,  ..., 0.3725, 0.3569, 0.3020],
         [0.0627, 0.1294, 0.1451,  ..., 0.3804, 0.3647, 0.3098]],

        [[0.0275, 0.0510, 0.0627,  ..., 0.1176, 0.1176, 0.1176],
         [0.0275, 0.0510, 0.0627,  ..., 0.1176, 0.1176, 0.1176],
         [0.0275, 0.0471, 0.0510,  ..., 0.1255, 0.1176, 0.1176],
         ...,
         [0.0627, 0.1294, 0.1686,  ..., 0.0902, 0.0824, 0.0941],
         [0.0588, 0.1255, 0.1569,  ..., 0.0784, 0.0863, 0.0980],
         [0.0588, 0.1255, 0.1569,  ..., 0.0863, 0.0941, 0.1059]],

        [[0.0275, 0.0510, 0.0627,  ..., 0.0980, 0.0980, 0.0980],
         [0.0275, 0.0510, 0.0627,  ..., 0.0980, 0.0980, 0.0980],
         [0.0275, 0.0471, 0.0510,  ..., 0.0863, 0.098