In [1]:
# import standard plotting and animation
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import clear_output
from matplotlib import gridspec
import autograd.numpy as np
import copy
import time
datapath = '../../mlrefined_datasets/nonlinear_superlearn_datasets/'

# This code cell will not be shown in the HTML version of this notebook
import sys
sys.path.append('../../')
from mlrefined_libraries import multilayer_perceptron_library as multi
from mlrefined_libraries import nonlinear_superlearn_library as nonlin

# this is needed to compensate for %matplotlib notebook's tendancy to blow up images when plotted inline
%matplotlib notebook
from matplotlib import rcParams
rcParams['figure.autolayout'] = True


%load_ext autoreload
%autoreload 2

In [20]:
### plot it
# construct figure
fig = plt.figure(figsize = (9,4))
artist = fig

# create subplot with 3 panels, plot input function in center plot
gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) 
ax1 = plt.subplot(gs[0]); 
ax2 = plt.subplot(gs[1]); 

# load in datasets
csvname2 = datapath + 'noisy_sin_sample.csv'
csvname1 = datapath + 'sin_function.csv'

data1 = np.loadtxt(csvname1,delimiter=',').T
x1 = data1[:,0]
y1 = data1[:,1]
y1.shape = (len(y1),1)

data2 = np.loadtxt(csvname2,delimiter=',').T
x2 = data2[:,0]
y2 = data2[:,1]
y2.shape = (len(y2),1)

# plot dataset left
ax1.scatter(x1,y1,c = 'k',edgecolor = 'w',s = 40,zorder = 1)
ax1.set_xlim([-0.1,1.1])
ax1.set_ylim([-1.5,1.5])
ax1.set_xlabel(r'$x$', fontsize = 14,labelpad = 10)
ax1.set_ylabel(r'$y$', rotation = 0,fontsize = 14,labelpad = 10)
ax1.set_xticks([0,1])
ax1.set_yticks([-1,0,1])


ax2.scatter(x2,y2,c = 'k',edgecolor = 'w',s = 40,zorder = 1)
ax2.set_xlim([-0.1,1.1])
ax2.set_ylim([-1.5,1.5])
ax2.set_xlabel(r'$x$', fontsize = 14,labelpad = 10)
ax2.set_ylabel(r'$y$', rotation = 0,fontsize = 14,labelpad = 10)
ax2.set_xticks([0,1])
ax2.set_yticks([-1,0,1])

plt.show()

<IPython.core.display.Javascript object>

# AUTOENCODER 

In [58]:


# import data
X = np.loadtxt(datapath + 'universal_autoencoder_samples_0.csv',delimiter=',')
# datapath = '../../mlrefined_datasets/unsuperlearn_datasets/'

# X = np.loadtxt(datapath + 'circle_data.csv',delimiter=',')


# scatter dataset
fig = plt.figure(figsize = (9,4))
gs = gridspec.GridSpec(1,1) 
ax = plt.subplot(gs[0],aspect = 'equal'); 
ax.set_xlabel(r'$x_1$',fontsize = 15);ax.set_ylabel(r'$x_2$',fontsize = 15,rotation = 0);
ax.scatter(X[0,:],X[1,:],c = 'k',s = 60,linewidth = 0.75,edgecolor = 'w')
plt.show()

<IPython.core.display.Javascript object>

In [61]:
# This code cell will not be shown in the HTML version of this notebook
max_units = 20
degrees = np.arange(1,max_units)
models = []
for j in degrees:
    print (j)
    # create instance of library
    mylib = multi.basic_lib.unsuper_setup.Setup(X)

    # perform preprocessing step(s) - especially input normalization
    mylib.preprocessing_steps(normalizer = 'standard')

    # split into training and testing sets
    if j == 1:
        mylib.make_train_val_split(train_portion = 0.66)
        x_train = mylib.x_train
        x_val = mylib.x_val
    else: # use split from first run for all further runs
        mylib.x_train = x_train
        mylib.x_val = x_val
        
    # choose features
    mylib.choose_encoder(layer_sizes = [2,j,1],activation = 'tanh',scale = 0.1)
    mylib.choose_decoder(layer_sizes = [1,j,2],activation = 'tanh',scale = 0.1)

    # choose cost
    mylib.choose_cost(name = 'autoencoder')

    # fit an optimization
    mylib.fit(max_its = 2000,alpha_choice = 10**(-1),verbose = False)
    models.append(mylib)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [63]:
# This code cell will not be shown in the HTML version of this notebook
# plot results
multi.autoencoder_demos.animate_crossvals(X,models)

# trees

In [3]:
# import data
csvname = datapath + 'noisy_sin_sample.csv'
data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib = nonlin.boost_lib2.superlearn_setup.Setup(x,y)

# choose normalizer
mylib.choose_normalizer(name = 'none')

# choose cost|
mylib.choose_cost(name = 'softmax')

# choose optimizer
mylib.choose_optimizer('newtons_method',max_its=1)

# run boosting
mylib.boost(5)

# plot history
mylib.plot_history()

<IPython.core.display.Javascript object>

# trees

In [33]:
# import data
csvname = datapath + 'noisy_sin_sample.csv'
# csvname = datapath + 'universal_regression_samples_0.csv'

data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib2 = nonlin.boost_lib3.stump_booster.Setup(x,y)

# choose normalizer
mylib2.choose_normalizer(name = 'standard')

# choose normalizer
mylib2.make_train_valid_split(train_portion = 0.66)

# choose cost|
mylib2.choose_cost(name = 'least_squares')

# choose optimizer
mylib2.choose_optimizer('newtons_method',max_its=1)

# run boosting
mylib2.boost(50)

# plot history
mylib2.plot_history()

<IPython.core.display.Javascript object>

In [34]:
# import data
csvname = datapath + 'noisy_sin_sample.csv'
# csvname = datapath + 'universal_regression_samples_0.csv'

data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib2 = nonlin.boost_lib3.stump_booster.Setup(x,y)

# choose normalizer
mylib2.choose_normalizer(name = 'standard')

# choose normalizer
mylib2.make_train_valid_split(train_portion = 0.66)

# choose cost|
mylib2.choose_cost(name = 'least_squares')

# choose optimizer
mylib2.choose_optimizer('newtons_method',max_its=1)

# run boosting
mylib2.boost(50)

# animation
frames = 51
anim = nonlin.boosting_regression_animators_v2.Visualizer(csvname)
anim.animate_trainval_boosting(mylib2,frames)

In [6]:
# import data
csvname = datapath + 'new_circle_data.csv'
data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib = nonlin.boost_lib3.stump_booster.Setup(x,y)

# choose normalizer
mylib.choose_normalizer(name = 'standard')

# choose normalizer
mylib.make_train_valid_split(train_portion = 0.66)

# choose cost|
mylib.choose_cost(name = 'softmax')

# choose optimizer
mylib.choose_optimizer('newtons_method',max_its=5)

# run boosting
mylib.boost(5,max_check = 10)

In [7]:
# plot history
mylib.plot_history()

<IPython.core.display.Javascript object>

In [8]:
# plot history
mylib.plot_misclass_history()

<IPython.core.display.Javascript object>

In [5]:
frames = 15
anim = nonlin.boosting_classification_animator_v2.Visualizer(csvname)
anim.animate_comparisons(mylib,frames)

In [20]:
frames = 15
anim = nonlin.boosting_classification_animator_v3.Visualizer(csvname)
anim.animate_trainval_boosting(mylib,frames)

# Monomials

In [57]:
# import data
csvname = datapath + 'noisy_sin_sample.csv'
# csvname = datapath + 'universal_regression_samples_0.csv'

data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib7 = nonlin.boost_lib3.kernel_booster.Setup(x,y)

# choose normalizer
mylib7.choose_normalizer(name = 'standard')

# choose normalizer
# mylib7.make_train_valid_split(train_portion = 0.66)
mylib7.x_train = mylib2.x_train
mylib7.y_train = mylib2.y_train
mylib7.x_valid = mylib2.x_valid
mylib7.y_valid = mylib2.y_valid

mylib7.train_inds = mylib2.train_inds
mylib7.valid_inds = mylib2.valid_inds

# choose cost|
mylib7.choose_cost(name = 'least_squares')

# choose optimizer
mylib7.choose_optimizer('newtons_method',max_its=1)

# run boosting
mylib7.boost(num_rounds=50,D=15)

In [58]:
# plot history
mylib7.plot_history()

<IPython.core.display.Javascript object>

In [59]:
frames = 10
anim = nonlin.boosting_regression_animators_v2.Visualizer(csvname)
anim.animate_trainval_boosting(mylib7,frames)

In [111]:
# import data
csvname = datapath + 'new_circle_data.csv'

data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib9 = nonlin.boost_lib3.kernel_booster.Setup(x,y)

# choose normalizer
mylib9.choose_normalizer(name = 'standard')

# choose normalizer
# mylib9.make_train_valid_split(train_portion = 0.66)
mylib9.x_train = mylib6.x_train
mylib9.y_train = mylib6.y_train
mylib9.x_valid = mylib6.x_valid
mylib9.y_valid = mylib6.y_valid

mylib9.train_inds = mylib6.train_inds
mylib9.valid_inds = mylib6.valid_inds

# choose cost|
mylib9.choose_cost(name = 'softmax')

# choose optimizer
mylib9.choose_optimizer('newtons_method',max_its=5)

# run boosting
mylib9.boost(num_rounds=15,D=10)

In [113]:
# plot history
mylib9.plot_misclass_history()

<IPython.core.display.Javascript object>

In [114]:
frames = 16
anim = nonlin.boosting_classification_animator_v3.Visualizer(csvname)
anim.animate_trainval_boosting(mylib9,frames)

# Networks

In [49]:
# import data
csvname = datapath + 'noisy_sin_sample.csv'
# csvname = datapath + 'universal_regression_samples_0.csv'

data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib5 = nonlin.boost_lib3.net_booster.Setup(x,y)

# choose normalizer
mylib5.choose_normalizer(name = 'standard')

# choose normalizer
# mylib5.make_train_valid_split(train_portion = 0.66)
mylib5.x_train = mylib2.x_train
mylib5.y_train = mylib2.y_train
mylib5.x_valid = mylib2.x_valid
mylib5.y_valid = mylib2.y_valid

mylib5.train_inds = mylib2.train_inds
mylib5.valid_inds = mylib2.valid_inds

# choose cost|
mylib5.choose_cost(name = 'least_squares')

# choose optimizer
mylib5.choose_optimizer('gradient_descent',max_its=5000,alpha_choice=10**(-1))

# run boosting
mylib5.boost(num_rounds=30,activation = 'relu')

In [50]:
# plot history
mylib5.plot_history()

<IPython.core.display.Javascript object>

In [51]:
frames = 31
anim = nonlin.boosting_regression_animators_v2.Visualizer(csvname)
anim.animate_trainval_boosting(mylib5,frames)

In [108]:
# import data
csvname = datapath + 'new_circle_data.csv'
data = np.loadtxt(csvname,delimiter = ',')
x = copy.deepcopy(data[:-1,:])
y = copy.deepcopy(data[-1:,:] )

# import booster
mylib6 = nonlin.boost_lib3.net_booster.Setup(x,y)

# choose normalizer
mylib6.choose_normalizer(name = 'standard')

# choose normalizer
# mylib6.make_train_valid_split(train_portion = 0.66)
mylib6.x_train = mylib9.x_train
mylib6.y_train = mylib9.y_train
mylib6.x_valid = mylib9.x_valid
mylib6.y_valid = mylib9.y_valid

mylib6.train_inds = mylib9.train_inds
mylib6.valid_inds = mylib9.valid_inds

# choose cost|
mylib6.choose_cost(name = 'softmax')

# choose optimizer
mylib6.choose_optimizer('gradient_descent',max_its=1000,alpha_choice=10**(0))

# run boosting
mylib6.boost(num_rounds=20)

In [109]:
# plot history
mylib6.plot_history()

<IPython.core.display.Javascript object>

In [79]:
# plot history
mylib6.plot_misclass_history()

<IPython.core.display.Javascript object>

In [110]:
frames = 16
anim = nonlin.boosting_classification_animator_v3.Visualizer(csvname)
anim.animate_trainval_boosting(mylib6,frames)