In [6]:
# Import packages
%reset -f
from helpers import load_all_data, vectorized_flatten, sigmoid, get_log_loss, get_accuracy, sigmoid_derivative, gradient_update, get_loss_plot, plot_loss
import numpy as np
import cv2

In [7]:
# Load and prepare data
def prep_data(data_path):
    '''
    --------------------
    Perceptron algorithm
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/0) training label (shape = n X 1)
    --------------------
    Output: 
    
    w: trained weights
    y_hat: predictions
    --------------------
    '''
    # Get datasets
    X_train, y_train, X_dev, y_dev, X_test, y_test = load_all_data(data_path)
    
    # Flatten datasets
    X_train_flattened = vectorized_flatten(X_train)
    X_dev_flattened = vectorized_flatten(X_dev)
    X_test_flattened = vectorized_flatten(X_test)
    
    # Add extra column to Y_train
    y_train = y_train.reshape(1, -1)
    y_dev = y_dev.reshape(1, -1)
    y_test = y_test.reshape(1, -1)
    
    return(X_train_flattened, X_dev_flattened, X_test_flattened, y_train, y_dev, y_test)

In [8]:
def train_perceptron(X, Y, epochs, lr, tolerance):
    '''
    --------------------
    Perceptron algorithm
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/-1) training label (shape = n X 1)
    --------------------
    Output: 
    w: trained weights
    b: trained biases
    y_preds: predictions 
    --------------------
    '''
    # Initialize weights and biases
    w = np.zeros(X.shape[0])
    b = 0
    
    # History goes here
    history = {
        "weights": [w],
        "losses": [], 
        "biases": [b],
        "accuracies": []
    }
    
    convergence_counter = 0
    best_accuracy = np.inf
    
    # Run for a fixed number of epochs
    for epoch in range(1, epochs + 1): 
        
        # Do this for each example in the dataset
        for i in range(X.shape[1]):
            
            # Store the sample data
            x_i = X[:, i]
            y_i = Y[0][i]
            
            # Compute the prediction with the current weights
            if (np.dot(w, x_i) + b > 0): y_hat = 1
            else: y_hat = -1
            
            # Check if the prediction is correct against the labels
            # If it is correct we don't need to make any updates: we just move to the next iteration
            # If it is not correct then we do the following: 
            # 1) Update the weights and biases in the direction of the label
            if y_hat != y_i:
                w += lr*(y_i - y_hat)*x_i
                b += lr*(y_i - y_hat)
            
            # Get predictions
            y_preds = np.array([int(np.dot(w, X[:, i]) + b  > 0) for i in range(X.shape[1])])
        
            # Training accuracy                       
            accuracy = get_accuracy(Y, y_preds)
                
            # Check convergence, keeps a counter of how many epochs it has been without an improvement
            # Counter resets whenever there's an improvent            
            if accuracy < best_accuracy - tolerance:
                best_loss = loss
                convergence_counter = 0
            else:
                convergence_counter += 1
                
                # Append results to history
                history["biases"].append(b)
                history["weights"].append(w)
                history["accuracies"].append(accuracy)
        
                # Get training accuracy
                print("Epoch {}/{}: Training_accuracy = {}".format(epoch, epochs, accuracy))
    
    # Return statement
    return(w, b)

In [None]:
def get_best_results(history):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    # Store results
    best_epoch = np.array(history["losses"]).argmin()
    best_accuracy = history['accuracies'][best_epoch]
    best_loss = history['losses'][best_epoch]
    
    # Display results
    print(f"best accuracy: {history['accuracies'][best_epoch]}")
    print(f"best loss: {history['losses'][best_epoch]}")
    print(f"best epoch: {best_epoch}")
    
    return(best_epoch, best_accuracy, best_loss)

In [None]:
def get_results(X_dev, y_dev, history, best_epoch, label="dev"):
    '''
    --------------------
    Prepare data
    --------------------
    Parameters: 
    weights: Current set of weights
    biases: Current set of biases
    gradients: Current set of gradients
    learning_rate: parameter to guide SGD step size
    --------------------
    Output: 
    Updated weights and biases
    --------------------
    '''
    w = history["weights"][best_epoch]
    b = history["biases"][best_epoch]
    activations = forward_pass(X_dev, w, b)

    y_dev_prob = activations[-1]
    y_dev_pred = np.where(y_dev_prob > 0.5, 1, 0)

    loss = get_log_loss(y_dev, y_dev_prob)
    accuracy = get_accuracy(y_dev, y_dev_pred)
    print(f"{label} set accuracy: {accuracy}")
    
    return(accuracy)

In [9]:
def get_perceptron_predictions(w, b, X, Y): 
    '''
    --------------------
    Run perceptron algorithm to get a base-line
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/0) training label (shape = n X 1)
    --------------------
    Output: 
    w: trained weights
    y_preds: predictions
    --------------------
    '''
    # Get predictions
    y_preds = np.array([int(np.dot(w, X[:, i]) + b  > 0) for i in range(X.shape[1])])

    # Return statement
    return(y_preds, accuracy)

In [10]:
def get_perceptron_baseline(data_path, epochs):
    '''
    --------------------
    Run perceptron algorithm to get a base-line
    --------------------
    Parameters: 
    X: Numpy array of training features (shape = 784 X n)
    y: Binary (1/0) training label (shape = n X 1)
    --------------------
    Output: 
    w: trained weights
    y_preds: predictions
    --------------------
    '''
    # Set the random seed for np.random number generator
    # This will make sure results are reproducible
    np.random.seed(132089)
    
    # Prepare data for the perceptron
    X_train_flattened, X_dev_flattened, X_test_flattened, y_train, y_dev, y_test = prep_data(data_path)
    
    # Call the perceptron training with the given epochs
    w, b = train_perceptron(X_train_flattened, y_train, epochs)
    
    # Get train set performance
    train_preds, train_accuracy = get_perceptron_performance(w, b, X_train_flattened, y_train)
    
    # Get dev set performance
    dev_preds, dev_accuracy = get_perceptron_performance(w, b, X_dev_flattened, y_dev)
    
    # Return statement
    return(w, b, train_preds, dev_preds, train_accuracy, dev_accuracy)

In [None]:
# Set parameters
data_path = '../setup/data'
epochs = 100

# Call training function
w, b, train_preds, dev_preds, train_accuracy, dev_accuracy = get_perceptron_baseline(data_path, epochs)

Epoch 1/100: Training_accuracy = 0.5
Epoch 1/100: Training_accuracy = 0.68025
Epoch 2/100: Training_accuracy = 0.6936666666666667
Epoch 2/100: Training_accuracy = 0.7846666666666666
Epoch 3/100: Training_accuracy = 0.837
Epoch 3/100: Training_accuracy = 0.8585
Epoch 4/100: Training_accuracy = 0.5176666666666667
Epoch 4/100: Training_accuracy = 0.8005
Epoch 5/100: Training_accuracy = 0.64025
Epoch 5/100: Training_accuracy = 0.9178333333333333
Epoch 6/100: Training_accuracy = 0.7600833333333333
Epoch 6/100: Training_accuracy = 0.8029166666666666
Epoch 7/100: Training_accuracy = 0.7224166666666667
Epoch 7/100: Training_accuracy = 0.898
Epoch 8/100: Training_accuracy = 0.6161666666666666
Epoch 8/100: Training_accuracy = 0.8725
Epoch 9/100: Training_accuracy = 0.7235833333333334
Epoch 9/100: Training_accuracy = 0.6359166666666667
Epoch 10/100: Training_accuracy = 0.5351666666666667
Epoch 10/100: Training_accuracy = 0.8911666666666667
Epoch 11/100: Training_accuracy = 0.698
Epoch 11/100: Tra

In [None]:
# Show results
train_accuracy, dev_accuracy