In [2]:
#Imports
import torch
import os
import pandas as pd
import numpy as np
import math

# Data preprocessing
This part of the code is to generate 20x20 matrices representing the distance between joints in all frames.
This resulted in two .npy files X_train.npy and Y_train.npy
X_train contains a list of all 20x20 matrices, and Y_train a list of 1x6 vectors representing the correct letter.

Note: We will have to re-run this when the additional data for the other letters is published.

In [6]:
#Loop through directories in dataset_v0
path_of_data_directory = "../dataset_v0/"

annotations_A = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_A/annotations.csv"))
annotations_B = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_B/annotations.csv"))
annotations_C = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_C/annotations.csv"))
annotations_L = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_L/annotations.csv"))
annotations_R = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_R/annotations.csv"))
annotations_U = pd.read_csv(os.path.join(path_of_data_directory, "ASL_letter_U/annotations.csv"))

annotations = [annotations_A, annotations_B, annotations_C, annotations_C, annotations_L, annotations_R, annotations_U]

print(annotations_A.shape)
print(annotations_B.shape)
print(annotations_C.shape)
print(annotations_L.shape)
print(annotations_R.shape)
print(annotations_U.shape)

(147819, 9)
(150003, 9)
(146118, 9)
(152229, 9)
(154518, 9)
(156660, 9)


In [14]:
#Separate each frame into its own object and put them in a list
list_of_frames = []
for annotation in annotations:
    #Removing hand position rows from data (is always 0?)
    annotation = annotation[annotation.joint != 'hand_position']
    i = 0
    #Seperate each frame and add to a list
    while i < len(annotation):
        frame = annotation[i : i+20]
        list_of_frames.append(frame)
        i += 20


Number of unique frames: 50165
    ID  Unnamed: 0  frame       gesture     joint  person_idx  video_idx  \
1    1         NaN      0  ASL_letter_A      root      103860          0   
2    2         NaN      0  ASL_letter_A   thumb_1      103860          0   
3    3         NaN      0  ASL_letter_A   thumb_2      103860          0   
4    4         NaN      0  ASL_letter_A   thumb_3      103860          0   
5    5         NaN      0  ASL_letter_A   index_1      103860          0   
6    6         NaN      0  ASL_letter_A   index_2      103860          0   
7    7         NaN      0  ASL_letter_A   index_3      103860          0   
8    8         NaN      0  ASL_letter_A   index_4      103860          0   
9    9         NaN      0  ASL_letter_A  middle_1      103860          0   
10  10         NaN      0  ASL_letter_A  middle_2      103860          0   
11  11         NaN      0  ASL_letter_A  middle_3      103860          0   
12  12         NaN      0  ASL_letter_A  middle_4      10

In [33]:
print("Number of unique frames:", len(list_of_frames))
#print("Example of frame structure:", list_of_frames[20])
test_frame = list_of_frames[-1]
print(test_frame)

Number of unique frames: 50165
            ID  Unnamed: 0  frame       gesture     joint  person_idx  \
156640  156640         NaN    182  ASL_letter_U      root       79875   
156641  156641         NaN    182  ASL_letter_U   thumb_1       79875   
156642  156642         NaN    182  ASL_letter_U   thumb_2       79875   
156643  156643         NaN    182  ASL_letter_U   thumb_3       79875   
156644  156644         NaN    182  ASL_letter_U   index_1       79875   
156645  156645         NaN    182  ASL_letter_U   index_2       79875   
156646  156646         NaN    182  ASL_letter_U   index_3       79875   
156647  156647         NaN    182  ASL_letter_U   index_4       79875   
156648  156648         NaN    182  ASL_letter_U  middle_1       79875   
156649  156649         NaN    182  ASL_letter_U  middle_2       79875   
156650  156650         NaN    182  ASL_letter_U  middle_3       79875   
156651  156651         NaN    182  ASL_letter_U  middle_4       79875   
156652  156652      

In [15]:
#Function that returns the Y-vector for a given gestures i.e [1 0 0 0 0 0] for the letter A
gestures = ["ASL_letter_A", "ASL_letter_B", "ASL_letter_C", "ASL_letter_L", "ASL_letter_R", "ASL_letter_U"]
def getYVectorFromGesture(gesture):
    y = np.zeros(6)
    y[gestures.index(gesture)] = 1
    return y

print(getYVectorFromGesture("ASL_letter_U"))

[0. 0. 0. 0. 0. 1.]


In [26]:
#This function creates a 20x20 distance matrix for each of the 50k frames. Took me 2 hours /Jacob

# Turn each frame into a distance matrix
X_train = []
Y_train = []
count = 1
#For each frame in our dataset we want to create a distance matrix
for frame in list_of_frames:
    distance_matrix = np.zeros((20,20))

    #For each joint in the frame
    for i in range(len(frame)):
        #Check that the joint is visible
        if(frame.iloc[i].x != 0 and frame.iloc[i].y != 0):
            #For each other joint in the frame
            for j in range(len(frame)):
                #Check that the joint is visible
                if(frame.iloc[j].x != 0 and frame.iloc[j].y != 0):
                    #Calculate distance between joint i and all joints j and add to matrix
                    distance_matrix[i,j] = math.sqrt((frame.iloc[i].x - frame.iloc[j].x)**2 + (frame.iloc[i].y - frame.iloc[j].y)**2)

    #Append the distance matrix for each frame into X_train
    X_train.append(distance_matrix)

    #Get the correct letter from the frame and save it in the format:
    #  [A B C L R U] = [1 0 0 0 0 0] if A is correct
    #  [0 0 1 0 0 0] if C is correct etc. 
    Y_train.append(getYVectorFromGesture(frame.iloc[0].gesture))
    if (count % 250 == 0):
        print("Number of frames done:", count)

    count += 1

Number of frames done: 250
Number of frames done: 500
Number of frames done: 750
Number of frames done: 1000
Number of frames done: 1250
Number of frames done: 1500
Number of frames done: 1750
Number of frames done: 2000
Number of frames done: 2250
Number of frames done: 2500
Number of frames done: 2750
Number of frames done: 3000
Number of frames done: 3250
Number of frames done: 3500
Number of frames done: 3750
Number of frames done: 4000
Number of frames done: 4250
Number of frames done: 4500
Number of frames done: 4750
Number of frames done: 5000
Number of frames done: 5250
Number of frames done: 5500
Number of frames done: 5750
Number of frames done: 6000
Number of frames done: 6250
Number of frames done: 6500
Number of frames done: 6750
Number of frames done: 7000
Number of frames done: 7250
Number of frames done: 7500
Number of frames done: 7750
Number of frames done: 8000
Number of frames done: 8250
Number of frames done: 8500
Number of frames done: 8750
Number of frames done: 

In [27]:
#Saves the X_train and Y_train data to numpy file
np.save('X_train.npy', X_train, allow_pickle=True)
np.save('Y_train.npy', Y_train, allow_pickle=True)

# Usage of the preprocessed data
The following shows how to read the data from the files, which will be used later in the process.

In [12]:
#Read test from the .npy files containing data
X_train = np.load('./Data/X_train.npy', allow_pickle=True)
Y_train = np.load('./Data/Y_train.npy', allow_pickle=True)

print(Y_train[0])

[1. 0. 0. 0. 0. 0.]


In [28]:
#Shuffle both lists (X and Y), and then normalize X-vector
from sklearn.preprocessing import normalize
import random

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

x,y = unison_shuffled_copies(X_train, Y_train)

x_array = np.array(x)
y_array = np.array(y)

print(np.mean(x_array))

min_max_x = (x_array - np.min(x_array))/(np.max(x_array) - np.min(x_array))
print("Max after normalization", np.max(min_max_x))
print("Min after normalization", np.min(min_max_x))

print(x_array.shape)
print(y_array.shape)
print(len(x_array))
print(len(y_array))


47.38305328514792
Max after normalization 1.0
Min after normalization 0.0
(50165, 20, 20)
(50165, 6)
50165
50165


In [29]:
#Split into training and testing data
assert len(x_array) == len(y_array)
train_size = round(0.8 * len(x_array))

X_train = x_array[0:train_size]
X_test = x_array[train_size:]

Y_train = y_array[0:train_size]
Y_test = y_array[train_size:]

print("X_train & Y_train shapes: ", X_train.shape, Y_train.shape)
print("X_test & Y_test shapes: ", X_test.shape, Y_test.shape)



X_train & Y_train shapes:  (40132, 20, 20) (40132, 6)
X_test & Y_test shapes:  (10033, 20, 20) (10033, 6)


# To get the data to your file, import? this file and call the function get_GR_data()

In [30]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def get_GR_data():
    #Read data from files
    X_data = np.load('./Data/X_train.npy', allow_pickle=True)
    Y_data = np.load('./Data/Y_train.npy', allow_pickle=True)

    X_shuffled, Y_shuffled = unison_shuffled_copies(X_data, Y_data)

    #Make them numpy arrays
    X_array = np.array(X_shuffled)
    Y_array = np.array(Y_shuffled)

    #Normalize X-vectors
    X_array = (x_array - np.min(x_array))/(np.max(x_array) - np.min(x_array))

    #Split into training and test sets
    assert len(Y_array) == len(X_array)
    train_size = round(0.8 * len(x_array))

    X_train = x_array[0:train_size]
    X_test = x_array[train_size:]

    Y_train = y_array[0:train_size]
    Y_test = y_array[train_size:]
    
    return X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = get_GR_data()
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

#%store X_train, X_test, Y_train, Y_test

(40132, 20, 20)
(10033, 20, 20)
(40132, 6)
(10033, 6)
