In [1]:
# Import packages
import os
import numpy as np 
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist, pdist, squareform

In [2]:
def load_data(path, name):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    data = np.loadtxt(os.path.join(path, name))
    X, Y = data[:, 1:], data[:, 0]

    return(X, Y)

In [4]:
def shuffle_data(X, Y):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters:
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output:
    Updated weights and biases
    --------------------
    '''
    # Data is currently unshuffled; we should shuffle
    # each X[i] with its corresponding y[i]
    perm = np.random.permutation(max(Y.shape))
    X = X[perm, :]
    Y = Y[perm]

    return(X, Y)

In [6]:
def split_data(X, Y, train_percent):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    # Calculate no. of training examples based on user specified percentage
    # Here we use 2/3, 1/3 by default as required by the assignment
    n_train = round(train_percent*max(Y.shape))
    
    # Filter the dataframe to get training and testing rows
    X_train = X[:n_train]
    Y_train = Y[:n_train]
    
    # Validation set
    X_val = X[n_train:]
    Y_val = Y[n_train:]
    
    # Return statement
    return(X_train, X_val, Y_train, Y_val)

In [8]:
def get_polynomial_kernel(X, X_, d):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    return(np.power(np.dot(X, X.T), d))

In [9]:
def get_gaussian_kernel(X, X_, c):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    # Compute pairwise distances
    K = np.einsum('ij,ij->i',X, X)[:,None] + np.einsum('ij,ij->i',X_,X_) - 2*np.dot(X,X_.T)
    
    # Then apply parameter c
    K = np.exp(K*c)
    
    # Return statement
    return(K)

In [10]:
def get_accuracy(target, pred):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    return np.sum(target==pred)/max(target.shape)

In [11]:
def get_results(history):
    '''
    --------------------
    Get results
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    # Store results
    best_epoch = np.array(history["dev_accuracies"]).argmax()
    best_training_accuracy = history['accuracies'][best_epoch]
    best_dev_accuracy = history['dev_accuracies'][best_epoch]
    
    # Display results
    print(f"best training accuracy: {history['accuracies'][best_epoch]}")
    print(f"best dev accuracy: {history['dev_accuracies'][best_epoch]}")
    print(f"best epoch: {best_epoch}")
    
    return(best_epoch, best_training_accuracy, best_dev_accuracy)

In [12]:
def train_kernel_perceptron(X_train, Y_train, X_dev, y_dev, epochs, lr, kernel, kernel_args, n_classes):
    '''
    --------------------
    Kernel perceptron algorithm
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/-1) training label (shape = n X 1)
    --------------------
    Output: 
    w: trained weights
    b: trained biases
    y_preds: predictions 
    --------------------
    '''
    # Store a record of training and validation accuracies at each epoch
    history = {
        "train_accuracies": [],
        "val_accuracies": []
    }
    
    # Transform X according to the user specified kernel
    if kernel == 'polynomial':
        X_train = get_polynomial_kernel(X_train, X_train, **kernel_args)
    elif kernel == 'gaussian':
        X_train = get_gaussian_kernel(X_train, X_train, **kernel_args)
    
    # Initialize alpha weights
    A = np.zeros((n_classes, X_train.shape[0]))
    
    # Initialize the best accuracy to 0 and update this during training
    best_accuracy = 0
    
    # Run for a fixed number of epochs
    for epoch in range(epochs):
        
        # Shuffle data at start of each epoch
        X_train, Y_train = shuffle_data(X_train, Y_train)
        
        # Do this for each example in the dataset
        for i in range(X_train.shape[0]):

            # Compute the prediction with the current weights: add 1 to take zero indexing into account
            # dim(A.T) --> (10, 6199), dim(X_train[i, :]) ---> (6199, 1) ====> dim(y_hat) --> 10 X 1
            y_hat = (A @ X_train[i, :]
            
            # Check if the prediction is correct against the labels
            # If it is correct we don't need to make any updates: we just move to the next iteration
            # If it is not correct then we update the weights and biases in the direction of the label
            if y_hat != Y_train[i]: A[y_hat - 1, :] += 

    
    # Return statement
    return(history)

In [13]:
def run_kernel_perceptron_training(epochs, lr, data_path = 'data', name = 'zipcombo.dat', 
                                   kernel = 'polynomial', d = 2, n_classes=10):
    '''
    --------------------
    Run perceptron algorithm to get a base-line
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/0) training label (shape = n X 1)
    --------------------
    Output: 
    w: trained weights
    y_preds: predictions
    --------------------
    '''
    # Set the random seed for random number generator to ensure reproducibility
    np.random.seed(132089)

    # Prepare data for the perceptron
    X, Y = load_data(data_path, name)
    
    # Shuffle the dataset before splitting it
    X, Y = shuffle_data(X, Y)
    
    # Split the data into training and validation set 
    X_train, Y_train, X_val, Y_val = split_data(X, Y, 0.66666)
    
    # Construct kernel arguments dictionary
    if kernel == 'polynomial': kernel_args = {'d': d}

    # Call the perceptron training with the given epochs
    history = train_kernel_perceptron(X_train, Y_train, X_val, Y_val, epochs, lr, kernel, kernel_args, n_classes)
    
    # Get results from history
    best_epoch, best_training_accuracy, best_dev_accuracy = get_results(history)
    
    # Return statement
    return(best_epoch, best_training_accuracy, best_dev_accuracy, history)

In [14]:
def main(epochs = 1000, lr = 1):
    '''
    --------------------
    Main training loop
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    # Call training function
    best_epoch, best_accuracy, best_loss, history = run_kernel_perceptron_training(epochs, lr)

In [15]:
main()

(6199, 6199)
(6199, 10)


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()