In [216]:
# This code cell will not be shown in the HTML version of this notebook
# import custom library
import sys
sys.path.append('../../')
from mlrefined_libraries import superlearn_library as superlearn
from mlrefined_libraries import math_optimization_library as optlib
datapath = '../../mlrefined_datasets/superlearn_datasets/'

# demos for this notebook
classif_plotter = superlearn.lin_classification_demos
optimizers = optlib.optimizers
classification_plotter = superlearn.classification_static_plotter.Visualizer();
feature_scaling_tools = superlearn.feature_scaling_tools
static_plotter = optlib.static_plotter.Visualizer()
from mlrefined_libraries import unsupervised_library as unsuplib


cost_lib = superlearn.cost_functions

# import autograd functionality to bulid function's properly for optimizers
import autograd.numpy as np

# import timer
from datetime import datetime 

# this is needed to compensate for %matplotlib notebook's tendancy to blow up images when plotted inline
%matplotlib notebook
from matplotlib import rcParams
rcParams['figure.autolayout'] = True

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


We will need a class per 

- input normalization / loading

- model

- cost function

- optimizer

- various plotting tools for visualization / debugging

Each needs to be made to intake new models we will develop in the chapter to follow.  Lets start with normalize.

In [215]:
class input_normalizer:
    '''
    A class that wraps up the various input normalization schemes
    we have seen including
    - mean centering / std normalization
    - PCA sphereing
    - ZCA sphereing
    
    For each scheme you put in input features, and the following is returned
    - normalizer: the normalization scheme of your choice, returned as a function that 
    you can then use for future test points
    - inverse_normalizer: inverse normalization function for reversing the chosen 
    normalization
    
    You can then normalize the input x of a dataset using the desired normalization scheme
    by 
    
    x_normalized = normalizer(x)
    
    and then return the data to its original form as
    
    x_orig = inverse_normalizer(x_normalized)
    '''
    
    def create_functions(self,x,scheme,**kwargs):
        normalizer = 0
        inverse_normalizer = 0
        
        # standard normalization - for each feature subtract mean, divide by standard deviation 
        if scheme == 'standard':
            normalizer, inverse_normalizer = self.standard(x)
        
        # PCA-sphereing - use PCA to normailze input features
        if scheme =='PCA-sphereing':
            normalizer, inverse_normalizer = self.PCA_sphere(x,**kwargs)
        
        return normalizer, inverse_normalizer

    # standard normalizer - subtract mean, divide by standard deviation - for each input feature
    def standard_normalizer(self,x):
        # compute mean / std of each input feature
        x_means = np.mean(x,axis = 1)[:,np.newaxis]
        x_stds = np.std(x,axis = 1)[:,np.newaxis]    

        # create normalizer and input normalizer functions based on mean / std
        normalizer = lambda data: (data - x_means)/x_stds
        
        # create inverse normalizer function 
        inverse_normalizer = lambda data: data*x_stds + x_means
        
        return normalizer, inverse_normalizer
        
    ##### PCA-sphereing functions ####
    # PCA-sphereing - use PCA to normalize input features
    def PCA_sphereing(self,x,**kwargs):
        # standard normalize the input data
        standard_normalizer, inv_standard_normalizer = self.standard(x)
        x_standard = standard_normalizer(x)
        
        # compute pca transform and inverse transform for sphereing
        D,V = self.PCA(x_standard,**kwargs)
        D1 = np.array([d**(0.5) for d in D])
        D2 = np.array([1/d**(0.5) for d in D])
        D1_full = np.diag(D1)
        D2_full = np.diag(D2)
        M = np.dot(D2_full,V.T)
        M_inv = np.dot(V,D1_full)
        
        # make normalizer and inverse normalizer
        normalizer = lambda data: np.dot(M,standard_normalizer(data))
        inverse_normalizer = lambda data: np.dot(M_inv,inv_standard_normalizer(data))

        return normalizer, inverse_normalizer
    
    # compute eigendecomposition of data covariance matrix
    def PCA(self,x,**kwargs):
        '''
        A function for producing the full PCA transformation on an input dataset.  
        '''
        lam = 10**(-7)
        if 'lam' in kwargs:
            lam = kwargs['lam']

        # create the correlation matrix
        P = float(x.shape[1])
        Cov = 1/P*np.dot(x,x.T) + lam*np.eye(x.shape[0])

        # use numpy function to compute eigenvalues / vectors of correlation matrix
        D,V = np.linalg.eigh(Cov)
        return D,V

In [217]:
test = input_normalizer()

In [26]:
# load in dataset to perform PCA on
X_original = np.loadtxt(datapath + '2d_span_data.csv',delimiter=',')

# mean-center the data
x_sphered

# compute the full PCA transformation of dataset
W,S = PCA_sphere(X)

# compute principal components
unsuplib.PCA_demos.sphereing_visualizer(X,W,S,pcs)