In [336]:
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt 

in_data = loadmat('face_emotion_data.mat')
#loadmat() loads a matlab workspace into a python dictionary, where the names of the variables are the keys 
#in the dictionary.  To see what variables are loaded, uncomment the line below: 
#print([key for key in in_data])

y = in_data['y']
X = in_data['X']

<div style="background-color: #ccffcc; padding: 10px;">

Classifier a.) Truncated SVD

Known result from lecture: 

w_min = sum(i=1, i=p) 1/sigma_i * v_i * (u_i^T * d)

Truncate as follows: 

w_min_r = sum(i=1, i=r) 1/sigma_i * v_i * (u_i^T * d)

</div>


In [285]:
# Split data into 8 equal sized slices

X_slices = np.split(X,8)
y_slices = np.split(y,8)


# split slices into 6-slice stacks
from itertools import combinations

X_stacks = []
y_stacks = []
X_holdouts = []
y_holdouts = []

# use combinations to get all possible 6-slice combinations
for combo in combinations(range(8), 6):
    X_stack = np.vstack([X_slices[i] for i in combo])
    y_stack = np.vstack([y_slices[i] for i in combo])
    X_stacks.append(X_stack)
    y_stacks.append(y_stack)
    
    # designate the X_holdouts and y_holdouts as the slices not assigned to each stack
    holdout_indices = [i for i in range(8) if i not in combo]
    X_holdout = np.vstack([X_slices[i] for i in holdout_indices])
    y_holdout = np.vstack([y_slices[i] for i in holdout_indices])
    X_holdouts.append(X_holdout)
    y_holdouts.append(y_holdout)

# convert to nparrays for better processing
X_stacks = np.array(X_stacks)
y_stacks = np.array(y_stacks)
X_holdouts = np.array(X_holdouts)
y_holdouts = np.array(y_holdouts)


In [414]:
# this function computes a list of w_min's for the all values of r from 1 to rank(input)

def w_min_rs(X_, y_):
    U_,s_,VT_ = np.linalg.svd(X_, full_matrices = False)
    UT_ = U_.T
    V_ = VT_.T
    sigma_ = np.diag(s_)
    rows_ = 9
    cols_ = 9
    sigma_ = np.zeros_like(np.zeros((rows_, cols_)));
    np.fill_diagonal(sigma_, s_)
    w_min_rs_ = []
    
    for i in range(np.linalg.matrix_rank(X_)):
        # truncate sigma
        sigma_inv_ = np.linalg.inv(sigma_)
        sigma_i_ = sigma_inv_ * (np.arange(sigma_inv_.shape[0]) < i+1)[:, None]
        w_min_rs_.append(V_ @ sigma_i_ @ UT_ @ y_)
        
    return w_min_rs_

# this function estimates the error rate some training and test data
# it also returns the best value of r
def estimate_error(X_train, y_train, X_test, y_test, regularization_type):
    
    # compute the w_mins with the function above
    w_min_rs_ = w_min_rs(X_train, y_train)
    
    error_rates = []
    
    # for each value of r
    if(regularization_type == "truncated_SVD"):
        
        for w_min_r in w_min_rs_:

            # compute y_pred by using the w_min_r value on X_test
            y_pred = X_test @ w_min_r

            # compute the proportion of errors for that value of r
            # this is a binary classifier so use sign of predictions, and take mean
            error = np.mean(np.sign(y_pred) != np.sign(y_test))

            error_rates.append(error)
    
    # for each value of r
    elif(regularization_type == "ridge_regression"):
        
        lambdas_ = [2**i for i in range(-1, 5)]
        w_min_lambdas_ = ridge_regression(X_train, y_train, lambdas_)
        
        for w_min_lambda in w_min_lambdas_:

            # compute y_pred by using the w_min_lambda value on X_test
            y_pred = X_test @ w_min_lambda

            # compute the proportion of errors for that value of lambda
            # this is a binary classifier so use sign of predictions, and take mean
            error = np.mean(np.sign(y_pred) != np.sign(y_test))

            error_rates.append(error)
    
    # find the (first) value of r that minimizes error
    best_r = np.argmin(error_rates)
    
    # find the error rate corresponding to the optimal value of r
    best_error_rate = error_rates[best_r]
    
    return best_r, best_error_rate


In [None]:
# for each X_stack (6-long), 28 total :
    # -select the best value of r
    # -use the w corresponding to the best value of r to predict the labels of holdout set 1
    # -compute the % error of these predicted labels
    # function estimate_error above does all of this
    # run estimate_error with each X_stack, y_stack, X_holdout 1, y_holdout 1
    
    # -use the w corresponding to the best value of r to predict the labels of holdout set 2
    # -compute the % error of these predicted labels
    # function estimate_error above does all of this
    # run estimate_error with each X_stack, y_stack, X_holdout 2, y_holdout 2

In [434]:
error_rates_holdout1_SVD = []
error_rates_holdout2_SVD = []

# for each X_stack (6-long, 28 total):
for i in range(len(X_stacks)):
    # append the computed error rate at the optimal value of r
    error_rates_holdout1_SVD.append(estimate_error(X_stacks[i], y_stacks[i], X_holdouts[i][:16], 
                                                   y_holdouts[i][:16], "truncated_SVD")[1])
    error_rates_holdout2_SVD.append(estimate_error(X_stacks[i], y_stacks[i], X_holdouts[i][16:], 
                                                   y_holdouts[i][16:], "truncated_SVD")[1])
    
error_rates_overall_SVD = np.concatenate([error_rates_holdout1_SVD, error_rates_holdout2_SVD])
print(f"Truncated SVD: \nError rate average over both holdout sets: {np.mean(error_rates_overall_SVD)}")

Truncated SVD: 
Error rate average over both holdout sets: 0.03459821428571429


In [426]:
####################################################################################################

<div style="background-color: #ccffcc; padding: 10px;">
    
Classifier b.) - Ridge Regression
    
several of the functions above can be re-used so I will call them as needed

Also, apply result from Lecture video 4.1:
    
w_min = V (Sigma^2 + lambda*I)^-1 @ Sigma @ U^T @ y
    
This result is derived from taking the expression:
    
w_min = (A^T A + lambda*I)^-1 @ A^T @ y
    
and plugging in the SVD for A, A = U @ Sigma @ V^T

In [428]:
def ridge_regression(X_, y_, lambdas_):
    
    U_, s_, VT_ = np.linalg.svd(X_, full_matrices=False)
    V_ = VT_.T
    sigma_ = np.diag(s_)
    dimension_ = len(s_)
    w_min_lambdas_ = []
    
    for lambda_ in lambdas_:
        w_min_lambda = V_ @ np.linalg.inv(sigma_**2 + lambda_ * np.identity(dimension_)) @ sigma_ @ U_.T @ y_
        w_min_lambdas_.append(w_min_lambda)
        
    return w_min_lambdas_

In [432]:
error_rates_holdout1_ridge = []
error_rates_holdout2_ridge = []

# for each X_stack (6-long, 28 total):
for i in range(len(X_stacks)):
    # append the computed error rate at the optimal value of r
    error_rates_holdout1_ridge.append(estimate_error(X_stacks[i], y_stacks[i], X_holdouts[i][:16], 
                                                     y_holdouts[i][:16], "ridge_regression")[1])
    error_rates_holdout2_ridge.append(estimate_error(X_stacks[i], y_stacks[i], X_holdouts[i][16:], 
                                                     y_holdouts[i][16:], "ridge_regression")[1])
    
error_rates_overall_ridge = np.concatenate([error_rates_holdout1_ridge, error_rates_holdout2_ridge])
print(f"Ridge Regression: \nError rate average over both holdout sets: {np.mean(error_rates_overall_ridge)}")

Ridge Regression: 
Error rate average over both holdout sets: 0.03571428571428571
