In [None]:
%matplotlib inline

import sys
sys.path.append("..") # This allows for importing from other directories above

# from models.adaboost_mh import AdaBoostMH
import numpy as np
import matplotlib.pyplot as plt
from data.utils.fetch_data import get_Wts_and_vts

# Load data
## Load W_ts and vts
rel_path = '../data/'
Wts_pen, vts_pen = get_Wts_and_vts(rel_path, 'pen')
Wts_iso, vts_iso = get_Wts_and_vts(rel_path, 'iso')

## Load labels for both W_ts and vts
y_pen = np.load(rel_path + 'pendigits/pendigits_train_labels.npy')
k_pen = 10
# careful isolet_train_labels are floats...
# IT ALSO ISN"T ZERO INDEXED!!!
y_iso = np.load(rel_path + 'isolet/isolet_train_labels.npy').astype('int')
y_iso = y_iso - 1
k_iso = 26

In [None]:
# One hot encode 
def one_hot_labels(y,k):
    """
    y: N by 1 numpy-array corresponding to the labels.
    k: float, corresponding to number of unique classes.
    returns: Y, an N by k numpy-array with each row containing
             a 1 for the correct class and -1 otherwise.
    """
    N = y.shape[0]
    Y = np.ones((N, k)) * -1
    for i in range(N):
        Y[i, y[i]] *= -1 # Make the correct class become +1.
    return Y

In [None]:
# Transform y (n,) -> Y (n, k)
Y_pen = one_hot_labels(y_pen, k_pen)
Y_iso = one_hot_labels(y_iso, k_iso)

In [None]:
def calc_pseudo_ws(Wts, vts, Y):
    T = 30
    w_pseudo_ts = []
    for t in range(T):
        W_t = Wts[t + 1] # skip W_0
        v_t = vts[t] 
        # Get all the y_i,l * v_l in one matrix
        Y_mult_v_t = np.multiply(Y, v_t)
        # The above is either 1 or -1, to get an indicator
        # about whether their product is +1 or -1 you can 
        # add one to the whole matrix and all the -1 -> 0,
        # or add negative one to the whole matrix and all
        # the +1 -> 0. You just need to normalize by \pm 1/2,
        # to get an indicator or a matrix mask.
        w_p_mask = ((Y_mult_v_t + 1) * 0.5).astype('int')
        w_n_mask = ((Y_mult_v_t - 1) * -0.5).astype('int')
        w_p_t = np.sum(np.multiply(W_t, w_p_mask), axis=1)
        w_n_t = np.sum(np.multiply(W_t, w_n_mask), axis=1)
        w_pseudo_t = np.sum(np.abs(w_p_t - w_n_t))
        w_pseudo_ts.append(w_pseudo_t)
    return w_pseudo_ts

In [None]:
w_pseudo_pen = calc_pseudo_ws(Wts_pen, vts_pen, Y_pen)
w_pseudo_iso = calc_pseudo_ws(Wts_iso, vts_iso, Y_iso)

In [None]:
# Function for plotting Pseudo weights on same plot for two datasets
def plot_ws(ws_pen, ws_iso, title):
    T = len(ws_pen)
    ts = np.linspace(1, T, num=T)
    plt.plot(ts, ws_pen, label='Pen digits data')
    plt.plot(ts, ws_iso, label='Isolets data')
    plt.legend(loc='best')
    plt.xlabel("T")
    plt.ylabel("Pseudo Weight Value")
    plt.title(title)

In [None]:
plot_ws(w_pseudo_pen, w_pseudo_iso, "Pseudo Weights over 30 rounds")