In [1]:
%matplotlib inline

import sys
sys.path.append("..") # This allows for importing from other directories above
# Our imports
from models.adaboost_mh import AdaBoostMH
from models.weak_learner import stump_base as weak_clf
# Standard Imports
import numpy as np
import matplotlib.pyplot as plt


# Load the data
X_train = np.load('../data/pendigits/pendigits_train_data.npy')
y_train = np.load('../data/pendigits/pendigits_train_labels.npy')
X_test = np.load('../data/pendigits/pendigits_test_data.npy')
y_test = np.load('../data/pendigits/pendigits_test_labels.npy')

# Initialize model 
model = AdaBoostMH(X_train, y_train, X_test, y_test)

In [2]:
# Set parameters
T, W_init = 19, False

In [3]:
# Test that it runs okay just for a single T
err_tr, err_te, gammas, D_ts = model.run_factorized(T, weak_clf, W_init) # verbose True
print("The training error is {}.\nThe testing error is {}.".format(err_tr, err_te))

ValueError: not enough values to unpack (expected 6, got 4)

In [None]:
# Train the model. Note this should be the longest step.
model_performance = [model.run_factorized(T, weak_clf, W_init) for t in range(T)] 

In [None]:
# Extract model performance metrics into separate lists
model_tuples = [[tups[i] for tups in model_performance] for i in range(4)] # 4 = num ele in each tuple 
err_tr, err_te, gammas, D_ts = model_tuples[0], model_tuples[1], model_tuples[2], model_tuples[3]

In [None]:
# Function for plotting training and testing error on same plot
def plot_error(err_tr, err_te, title):
    T = len(err_tr)
    ts = np.linspace(1, T, num=T)
    plt.plot(ts, err_tr, label='Training Error')
    plt.plot(ts, err_te, label='Testing Error')
    plt.legend(loc='best')
    plt.xlabel("T")
    plt.title(title)

In [None]:
# ecdf taken from https://stackoverflow.com/questions/15792552/numpy-scipy-equivalent-of-r-ecdfxx-function
# ecdf = Empirical Cumulative Distribution Function
def ecdf(x):
    xs = np.sort(x)
    ys = np.arange(1, len(xs)+1)/float(len(xs))
    return xs, ys

# Function for plotting the gammas, either plots ecdf or gamma change over rounds
def plot_gammas(gammas, which_plot, title):
    if which_plot == 'cdf':
        xs, ys = ecdf(gammas)
        plt.plot(xs, ys)
        plt.title(title)
    else:
        T = len(gammas)
        ts = np.linspace(1, T, num=T)
        plt.plot(ts, gammas)
        plt.xlabel("T")
        plt.ylabel("gamma")
        plt.title(title)

In [None]:
# Plot training and testing error
title = "Error for Factorized Model on Penn Digits"
plot_error(err_tr, err_te, title)

In [None]:
# Plot change for gamma_t over T rounds
plot_gammas(gammas, 'gamma_rate', 'Gamma over T rounds')

In [None]:
# Plot cdf for gammas
plot_gammas(gammas, 'cdf', 'CDF for Gammas over all rounds T')