In [1]:
from skimage import io
from skimage.transform import resize
import numpy as np
import random
from sklearn import svm
from skimage.feature import hog
import os

In [15]:
class GestureRecognizer(object):

    """class to perform gesture recognition"""

    def __init__(self, data_directory):

        """
            data_directory : path like /home/sanket/mlproj/dataset/
            includes the dataset folder with '/'

            Initialize all your variables here
        """
        self.base_dir = data_directory
        # self.base_dir = os.path.abspath('.') + '/dataset/'
        self.win_size = 128

    def IOU(self, A, B):
        x_overlap = max(0, min(B[0],B[2]) - max(A[0],A[2]))
        y_overlap = max(0, min(B[1],B[3]) - max(A[1],A[3]))
        inter = x_overlap * y_overlap;
        
        A_area = (A[2] - A[0] + 1)*(A[3] - A[1] + 1)
        B_area = (B[2] - B[0] + 1)*(B[3] - B[1] + 1)
        
        union = (A_area + B_area - inter)*1.0
        inter = inter*1.0
        
        return inter/union
    
    def train(self, train_list):

        """
            train_list : list of users to use for training
            eg ["user_1", "user_2", "user_3"]

            The train function should train all your classifiers,
            both binary and multiclass on the given list of users
        """
        
        train_x_pos = []
        train_x_neg = []
        train_y = []
        
        for user in train_list:
            csv_file = self.base_dir + user + '/' + user + '_loc.csv'
            with open(csv_file,'r') as f:
                f.readline()
                for line in f:
                    data = line.strip().split(',')
                    file_name = data[0]
                    x1,y1,x2,y2 = map(int, data[1:])
                    
                    img = io.imread(self.base_dir + file_name,as_grey=True)
                    h,w = img.shape[:2]
                    imgg = img[y1:y2,x1:x2]
                    imgg = resize(imgg, (self.win_size, self.win_size))
                    imgg_hog = hog(imgg)
                    
                    label = ord(file_name.split('/')[1][0])
                    
                    train_x_pos.append(imgg_hog)
                    train_y.append(label)
                    
                    count = 0
                    
                    A = [x1,y1,x2,y2]
                    
                    while True:
                        x1_r = random.randrange(0,w - self.win_size)
                        y1_r = random.randrange(0,h - self.win_size)
                        x2_r = x1_r + self.win_size
                        y2_r = y1_r + self.win_size
                        
                        if y2_r >= h or x2_r>=w:
                            continue
                        
                        B = [x1_r, y1_r, x2_r, y2_r]
                        
                        if self.IOU(A,B) < 0.1:
                            train_x_neg.append(hog(img[y1_r:y2_r,x1_r:x2_r]))
                            count += 1
                        
                        if count >= 2:
                            break
        
        self.clf_gesture =  svm.LinearSVC()
        self.clf_gesture.fit(np.asarray(train_x_pos), np.asarray(train_y))
        score_gesture = self.clf_gesture.score(np.asarray(train_x_pos), np.asarray(train_y))
        print 'Training accuracy for gesture classifier : %f' %(score_gesture)
        
        self.clf_hnh = svm.LinearSVC()
        train_x_hnh = train_x_pos + train_x_neg
        train_y_hnh = [1] * len(train_x_pos) + [0] * len(train_x_neg)
        self.clf_hnh.fit(np.asarray(train_x_hnh), np.asarray(train_y_hnh))
        score_hnh = self.clf_hnh.score(np.asarray(train_x_hnh), np.asarray(train_y_hnh))
        print 'Training accuracy for Hand/Non-hand classifier %f' %(score_hnh)
    
    
    def test(self, test_list):

        """
            train_list : list of users to use for training
            eg ["user_1", "user_2", "user_3"]

            The train function should train all your classifiers,
            both binary and multiclass on the given list of users
        """
        
        test_x_pos = []
        test_x_neg = []
        test_y = []
        
        for user in test_list:
            csv_file = self.base_dir + user + '/' + user + '_loc.csv'
            with open(csv_file,'r') as f:
                f.readline()
                for line in f:
                    data = line.strip().split(',')
                    file_name = data[0]
                    x1,y1,x2,y2 = map(int, data[1:])
                    
                    img = io.imread(self.base_dir + file_name,as_grey=True)
                    h,w = img.shape[:2]
                    imgg = img[y1:y2,x1:x2]
                    imgg = resize(imgg, (self.win_size, self.win_size))
                    imgg_hog = hog(imgg)
                    
                    label = ord(file_name.split('/')[1][0])
                    
                    test_x_pos.append(imgg_hog)
                    test_y.append(label)
                    
                    count = 0
                    
                    A = [x1,y1,x2,y2]
                    
                    while True:
                        x1_r = random.randrange(0,w - self.win_size)
                        y1_r = random.randrange(0,h - self.win_size)
                        x2_r = x1_r + self.win_size
                        y2_r = y1_r + self.win_size
                        
                        if y2_r >= h or x2_r>=w:
                            continue
                        
                        B = [x1_r, y1_r, x2_r, y2_r]
                        
                        if self.IOU(A,B) < 0.1:
                            test_x_neg.append(hog(img[y1_r:y2_r,x1_r:x2_r]))
                            count += 1
                        
                        if count >= 2:
                            break
        
        score_gesture = self.clf_gesture.score(np.asarray(test_x_pos), np.asarray(test_y))
        print 'Testing accuracy for gesture classifier : %f' %(score_gesture)
        
        
        test_x_hnh = test_x_pos + test_x_neg
        test_y_hnh = [1] * len(test_x_pos) + [0] * len(test_x_neg)
        
        score_hnh = self.clf_hnh.score(np.asarray(test_x_hnh), np.asarray(test_y_hnh))
        print 'Training accuracy for Hand/Non-hand classifier %f' %(score_hnh)

    
    def recognize_gesture(self, image):

        """
            image : a 320x240 pixel RGB image in the form of a numpy array
            
            This function should locate the hand and classify the gesture.

            returns : (position, label)

            position : a tuple of (x1,y1,x2,y2) coordinates of bounding box
                       x1,y1 is top left corner, x2,y2 is bottom right

            label : a single character. eg 'A' or 'B'
        """

        return position, label

    def translate_video(self, image_array):

        """
            image_array : a list of images as described above.
                          can be of arbitrary length

            This function classifies the video into a 5 character string

            returns : word (a string of 5 characters)
                    no two consecutive characters are identical
        """

        return word

In [16]:
G = GestureRecognizer(os.path.abspath('.') + '/dataset/')
print G.base_dir

/home/ayush/GPU_ML/ML/workspace/project/complete_data/dataset/


In [17]:
user = [3,4,5,6,7,9,10,11,12,13,14,15]
user = map(lambda x : 'user_' + str(x), user)
print user

['user_3', 'user_4', 'user_5', 'user_6', 'user_7', 'user_9', 'user_10', 'user_11', 'user_12', 'user_13', 'user_14', 'user_15']


In [18]:
G.train(user)

Training accuracy for gesture classifier : 1.000000
Training accuracy for Hand/Non-hand classifier 1.000000


In [19]:
user_test = [16,17,18,19]
user_test = map(lambda x : 'user_' + str(x), user_test)
print user_test

['user_16', 'user_17', 'user_18', 'user_19']


In [20]:
G.test(user_test)

Testing accuracy for gesture classifier : 0.764583
Training accuracy for Hand/Non-hand classifier 0.968056
