In [67]:
import numpy as np
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
import numpy.random as r
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
%matplotlib inline 

In [306]:
df = np.array([])

for i in range(10):
    data = np.array(pd.read_csv(f'American Sign Language Digits Dataset/{i}/Output Images - Sign {i}.csv'))
    df = np.append(df,data)
df = df.reshape((5000,65))
paths = df[:,0]
X = df[:,1:64]
y = df[:,64]
X = X.astype(float)
y = y.astype(float)


#Scale x,y,z values individually
x_vals = [[row[i] for i in range(0,63,3)] for row in X]
y_vals = [[row[i] for i in range(1,63,3)] for row in X]
z_vals = [[row[i] for i in range(2,63,3)] for row in X]

X_scale = StandardScaler()
x_vals = X_scale.fit_transform(x_vals)
y_vals = X_scale.fit_transform(y_vals)
z_vals = X_scale.fit_transform(z_vals)

X_trans = []
for i in range(len(x_vals)):
    combined = []
    for j in range(len(x_vals[i])):
        combined.append(x_vals[i][j])
        combined.append(y_vals[i][j])
        combined.append(z_vals[i][j])
    X_trans.append(combined)
X_trans = np.array(X_trans)
X_trans = X_scale.fit_transform(X_trans)



X_train, X_test, y_train, y_test = train_test_split(X_trans, y, test_size=0.4)

In [290]:
#Modifies the labels for neural network
def change_labels(Y_train):
    ans = np.zeros((len(Y_train), 10))
    for i in range(len(Y_train)):
        ans[i, int(Y_train[i])] = 1
    return ans

In [338]:
def f(z):
    #return 1 / (1 + np.exp(-z))
    
    #RELU
    return z * (z > 0)

    #tanh
    #return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

def f_deriv(z):
    #return f(z) * (1 - f(z))
    
    #RELU
    return 1 * (z > 0)

    #tanh
    #return 1 - f(z)**2

In [482]:
def setup_and_init_weights(nn_structure):
    W = {} #creating a dictionary i.e. a set of key: value pairs
    b = {}
    for l in range(1, len(nn_structure)):
        b[l] = r.random_sample((nn_structure[l],))
        
        #Weight Initialization Technique from https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78"
        #Called "He Weight Initialization"
        W[l] = 2 * r.random_sample((nn_structure[l], nn_structure[l-1])) - 1
        W[l] *= np.sqrt(2.0/(nn_structure[l-1])) 
        
        print(nn_structure[l-1])
    return W, b

def init_tri_values(nn_structure):
    tri_W = {}
    tri_b = {}
    for l in range(1, len(nn_structure)):
        tri_W[l] = np.zeros((nn_structure[l], nn_structure[l-1]))
        tri_b[l] = np.zeros((nn_structure[l],))
    return tri_W, tri_b

def feed_forward(x, W, b):
    a = {1: x} # create a dictionary for holding the a values for all levels
    z = { } # create a dictionary for holding the z values for all the layers
    for l in range(1, len(W) + 1): # for each layer
        node_in = a[l]
        z[l+1] = W[l].dot(node_in) + b[l]  # z^(l+1) = W^(l)*a^(l) + b^(l)
        a[l+1] = f(z[l+1]) # a^(l+1) = f(z^(l+1))
    return a, z

def calculate_out_layer_delta(y, a_out, z_out):
    # delta^(nl) = -(y_i - a_i^(nl)) * f'(z_i^(nl))
    return -(y-a_out) * f_deriv(z_out) 

def calculate_hidden_delta(delta_plus_1, w_l, z_l):
    # delta^(l) = (transpose(W^(l)) * delta^(l+1)) * f'(z^(l))
    return np.dot(np.transpose(w_l), delta_plus_1) * f_deriv(z_l)

def train_nn(nn_structure, X, y, lamb, iter_num=3000, alpha=0.25):
    W, b = setup_and_init_weights(nn_structure)
    cnt = 0
    N = len(y)
    avg_cost_func = []
    print('Starting gradient descent for {} iterations'.format(iter_num))
    while cnt < iter_num:
        if cnt%50 == 0:
            print('Iteration {} of {}'.format(cnt, iter_num))
        tri_W, tri_b = init_tri_values(nn_structure)
        avg_cost = 0
        for i in range(N):
            delta = {}
            # perform the feed forward pass and return the stored a and z values, to be used in the
            # gradient descent step
            a, z = feed_forward(X[i, :], W, b)
            # loop from nl-1 to 1 backpropagating the errors
            for l in range(len(nn_structure), 0, -1):
                if l == len(nn_structure):
                    delta[l] = calculate_out_layer_delta(y[i,:], a[l], z[l])
                    avg_cost += np.linalg.norm((y[i,:]-a[l]))
                else:
                    if l > 1:
                        delta[l] = calculate_hidden_delta(delta[l+1], W[l], z[l])
                    # triW^(l) = triW^(l) + delta^(l+1) * transpose(a^(l))
                    tri_W[l] += np.dot(delta[l+1][:,np.newaxis], np.transpose(a[l][:,np.newaxis]))# np.newaxis increase the number of dimensions
                    # trib^(l) = trib^(l) + delta^(l+1)
                    tri_b[l] += delta[l+1]
        # perform the gradient descent step for the weights in each layer
        for l in range(len(nn_structure) - 1, 0, -1):
            W[l] += -alpha * (1.0/N * tri_W[l])
            
            #=====================================================adding regularization
            #W[l] += -alpha * lamb * W[l]
            b[l] += -alpha * (1.0/N * tri_b[l])
        # complete the average cost calculation
        avg_cost = 1.0/N * avg_cost
        avg_cost_func.append(avg_cost)
        cnt += 1
    return W, b, avg_cost_func


def predict_y(W, b, X, n_layers):
    N = X.shape[0]
    y = np.zeros((N,))
    for i in range(N):
        a, z = feed_forward(X[i, :], W, b)
        y[i] = np.argmax(a[n_layers])
    return y

In [483]:
nn_structure = [63, 30, 30, 10]
y_v_train = change_labels(y_train)
# train the NN
W, b, avg_cost_func = train_nn(nn_structure, X_train, y_v_train, 0, 50, 0.005)

63
30
30
Starting gradient descent for 50 iterations
Iteration 0 of 50


In [484]:
# get the prediction accuracy and print
y_pred = predict_y(W, b, X_test, 4)
print(y_pred[100:200])
print(y_test[100:200])
print('Prediction accuracy is {}%'.format(accuracy_score(y_test, y_pred) * 100))

[9. 4. 4. 7. 2. 9. 6. 2. 4. 6. 4. 3. 6. 9. 3. 2. 4. 3. 3. 7. 3. 9. 9. 3.
 4. 4. 4. 7. 2. 4. 5. 2. 9. 3. 4. 4. 3. 6. 6. 3. 2. 4. 4. 7. 3. 3. 9. 3.
 3. 9. 9. 6. 5. 3. 5. 6. 4. 2. 4. 4. 6. 9. 4. 5. 6. 9. 7. 6. 9. 6. 2. 3.
 6. 6. 6. 6. 3. 6. 3. 9. 2. 9. 2. 3. 4. 9. 3. 9. 4. 4. 2. 5. 7. 9. 6. 9.
 9. 2. 3. 4.]
[8. 7. 2. 2. 2. 6. 1. 3. 9. 6. 9. 7. 0. 8. 3. 6. 8. 0. 3. 4. 0. 7. 4. 0.
 4. 5. 8. 7. 6. 1. 6. 4. 4. 3. 8. 9. 5. 0. 1. 1. 1. 4. 8. 7. 0. 3. 9. 3.
 3. 1. 5. 1. 7. 5. 8. 0. 1. 3. 6. 1. 0. 7. 8. 5. 0. 5. 7. 1. 9. 2. 6. 0.
 2. 1. 1. 6. 3. 6. 2. 8. 2. 5. 2. 0. 5. 8. 0. 8. 8. 5. 2. 3. 7. 4. 2. 4.
 9. 2. 3. 9.]
Prediction accuracy is 28.1%


In [423]:
print(y_train)

[7. 4. 5. ... 0. 9. 0.]


In [293]:
#Returns a model given the training data
def svm_model_asl(c, X_train, Y_train, kernel='linear'):
    svm_model = svm.SVC(C=c, probability=False, kernel=kernel)
    svm_model.fit(X_train, Y_train)
    return svm_model

#Modifies the labels for 1 vs all classification
#e.g. if we are testing for "1", all the labels != 1 will be set to 0
def changea_labels(Y_train, num):
    ans = np.array([1 if elem == num else 0 for elem in Y_train])
    return ans

In [300]:
#1 vs all classification

#predict given all 10 "1 vs all models"
def prediction(X, models):
    confidences = [model.predict(X) for model in models]
    ans = []
    for i in range(X.shape[0]):
        mostconfident = -1
        confidence = -1
        for j in range(10):
            if confidences[j][i] > confidence:
                confidence = confidences[j][i]
                mostconfident = j
        ans.append(mostconfident)
    return ans

#accuracy
def accuracy(Yhat, Y):
    correct = [1 if Yhat[i] == Y[i] else 0 for i in range(len(Y))]
    return sum(correct)/len(correct)

def svm_a(c, X_train, Y_train, X_test, Y_test, kernel):
    models = []
    for i in range(10):
        Y_train_temp = changea_labels(Y_train,i)
        models.append(svm_model_asl(c,X_train, Y_train_temp, kernel))
    
    Yhat_train = prediction(X_train, models)
    acc_train = accuracy(Yhat_train, Y_train)
    
    Yhat_test = prediction(X_test, models)
    acc_test = accuracy(Yhat_test, Y_test)
    
    return (c, acc_train, acc_test, models)


In [307]:
models = svm_a(1, X_train, y_train, X_test, y_test, 'poly')

In [308]:
prediction(X_test[100:200],models[3])

[8,
 7,
 2,
 2,
 2,
 6,
 1,
 3,
 9,
 6,
 9,
 7,
 0,
 8,
 3,
 6,
 8,
 0,
 3,
 4,
 0,
 7,
 4,
 0,
 4,
 5,
 8,
 7,
 6,
 1,
 6,
 4,
 4,
 3,
 8,
 9,
 5,
 0,
 1,
 1,
 1,
 4,
 8,
 0,
 0,
 3,
 9,
 3,
 3,
 1,
 5,
 1,
 7,
 5,
 8,
 0,
 1,
 3,
 6,
 0,
 0,
 7,
 8,
 5,
 0,
 5,
 7,
 1,
 9,
 0,
 6,
 0,
 2,
 1,
 1,
 0,
 3,
 6,
 2,
 8,
 2,
 5,
 2,
 0,
 5,
 8,
 0,
 8,
 8,
 5,
 2,
 3,
 7,
 4,
 2,
 4,
 9,
 2,
 3,
 9]