In [None]:
%load_ext autoreload
%load_ext line_profiler
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import expit

# 2D classification

In [None]:
from project2_tools import get_2Ddata, to_onehot, from_onehot, get_data_sigmoid
from neuralnet import NeuralNet
from plotting import PlotWrap
plotter = PlotWrap()

In [None]:
data_input, data_targets = get_2Ddata(t_crit = 2.3, onehot = True)

In [None]:
from sklearn.model_selection import train_test_split
temp = train_test_split(data_input, data_targets, test_size = 0.33)
input_train, input_test, target_train, target_test = temp

## Setup network and run. Warning, this takes time

Runs minibatch stochastic gradient descent on network with one hidden layer. Multiple parameters can be set, see the docstring of run_minbatch_sgd_one_hidden.

In [None]:
from result_functions import run_minibatch_sgd_one_hidden as sgd
n_hidden_values = [8 ,10, 16, 20, 32, 64, 96, 128]
eta_values = np.logspace(-2,2,9)

sgd(input_train,target_train, input_test, target_test, eta_values = eta_values,  n_hidden_values = n_hidden_values)

In [None]:
from result_functions import run_minibatch_sgd_one_hidden as sgd
sgd(input_train,target_train, input_test, target_test, eta_values = eta_values, 
    n_hidden_values = n_hidden_values)


# Load data

In [None]:
import glob
files = glob.glob('output/mb_sgd*.pickle')
files

# Example of how data looks

In [None]:
df = pd.read_pickle(files[3])
df['eta_val'] = np.log10(df['eta'])
df

In [None]:
df_best = df.sort_values(by='max_accuracy')
df_best = df_best.iloc[:5]

In [None]:
plt.figure(figsize = [7,4])
markers = ['--','-.','-',':']
colors = plt.cm.viridis(np.linspace(0,1,len(n_hidden_values)))


for (nhidden, item), c in zip(df_best.groupby('nhidden'),colors):
    
    print(nhidden)
    
    for (eta, eta_item),m in zip(item.groupby('eta'), markers):
        if True:#eta == 0.1:
            y = eta_item['accuracy'].values[0]
            eta_val = eta_item['eta_val'].iloc[0]
            plt.plot(y, label = 'eta = $10^{{{:6}}}$, nhidden = {}'.format(eta_val, nhidden), c= c, linestyle = m)
        # print(y[-1])
    
plt.grid()
plt.legend()

### Push sigmoid  data into nicer format

In [None]:
def eta_nhidden_df(df):
    temp = df[['nhidden','eta','max_accuracy']]
    temp = temp.set_index(['nhidden','eta'],drop=True).unstack()
    temp.columns = temp.columns.droplevel(level=0)
    temp.columns = np.log10(temp.columns)
    return temp

In [None]:

dfs_sigmoid = [pd.read_pickle(f) for f in files if 'sigmoid' in f]
data_sigmoid = [eta_nhidden_df(d) for d in dfs_sigmoid]
# dfs_sigmoid[2] = dfs_sigmoid[2][np.log10(dfs_sigmoid[2]['eta']) != -0.5]
dfs_sigmoid[1] = dfs_sigmoid[1][np.log10(dfs_sigmoid[1]['eta']) != -0.5]

d1,d2,d3 = dfs_sigmoid
eta_vals = np.linspace(-2, 2, 9)
for d in [d1,d2,d3]:
    d['eta_val'] = np.log10(d.eta)

for i, old in enumerate(np.unique(d1.eta_val)):
    print(old, eta_vals[i])
    d1.loc[d1['eta_val'] == old, 'eta_val'] = eta_vals[i]
    
    
for i, old in enumerate(np.unique(d2.eta_val)):
    print(old, eta_vals[i+5])
    d2.loc[d2['eta_val'] == old, 'eta_val'] = eta_vals[i+5]
    
d1.eta = 10**d1.eta_val
d2.eta = 10**d2.eta_val
dfs_sigmoid = [d1,d2,d3]

In [None]:
pcer = [line.rstrip() for line in open('/home/halvard/pcer.txt')]
tanh_files = ['mb_sgd{}_0.pickle'.format(pc) for pc in pcer]
df_tanh = pd.concat([pd.read_pickle(f) for f in files if f.split('/')[-1] in tanh_files], ignore_index = True)
df_relu = pd.concat([pd.read_pickle(f) for f in files if 'relu' in f], ignore_index = True)
df_sigmoid = pd.concat(dfs_sigmoid,ignore_index = True)

data_relu = eta_nhidden_df(df_relu) 
data_tanh = eta_nhidden_df(df_tanh) 

for df in [df_relu, df_sigmoid, df_tanh]:
    df['eta_val'] = np.log10(df['eta'])
    df['optimal_epoch'] = df['accuracy'].apply(np.argmax)+1
    
df_relu['hidden_act'] = 'relu'
df_tanh['hidden_act'] = 'tanh'
df_sigmoid['hidden_act'] = 'sigmoid'



In [None]:
num_rows = np.unique(df_sigmoid.eta_val).size

fig, axes = plt.subplots(num_rows,3, figsize = [16,32])
axes_T = [ax_row for ax_row in zip(*axes)]

df = df_sigmoid
for df, ax_col in zip([df_sigmoid, df_tanh, df_relu], axes_T):

    n_hidden_values = np.unique(df.nhidden)
    colors = plt.cm.viridis(np.linspace(0,1,len(n_hidden_values)))


    for i,(eta, item) in enumerate(df.groupby('eta_val')):
        ax = ax_col[i]

        print(eta)
        ax.set_title(eta)
        for (nhidden, item), c in zip(item.groupby('nhidden'),colors):


            if True:#eta == 0.1:
                y =  1-np.array(item['accuracy'].values[0])
                eta_val = item['eta_val'].iloc[0]
                ax.semilogy(np.arange(y.size) + 1  , y, label = 'nhidden = {}'.format(nhidden), c= c, linestyle = '-')
            # print(y[-1])

        ax.grid()
        ax.legend()

fig.tight_layout()

In [None]:
# plt.imshow(data)

x = nhidden_values
y = eta_values

plotter.plot_mispred(data_sigmoid, x, y, title = 'Sigmoid', filename = 'mispred_sigmoid')


In [None]:
x = data_tanh.index
y = data_tanh.columns
plotter.plot_mispred(data_tanh.values.T, x, y, title = 'tanh', filename = 'mispred_tanh')

In [None]:
x = data_relu.index
y = data_relu.columns
plotter.plot_mispred(data_relu.values.T, x, y, title = 'relu', filename = 'mispred_relu')

In [None]:
super_df = pd.concat([df_sigmoid, df_relu, df_tanh], ignore_index= True)
plotter.plot_eta_compare(super_df, eta_val = -1.5)

# Collect best parameters

In [None]:
from collections import defaultdict
values = {}


for name, df in zip(['relu','sigmoid','tanh'], [df_relu, df_sigmoid, df_tanh]):
    temp = []
    
    for nhidden, item in df.groupby('nhidden'):
        indx = item.max_accuracy.idxmax
        best = item.loc[indx]
        
        temp.append(best)
    values[name] = temp

In [None]:
dfs = [pd.DataFrame(val) for val in values.values()]
best_df = pd.concat(dfs, ignore_index = True).set_index(['hidden_act', 'nhidden'])#.stac
best_df = best_df[['eta_val','max_accuracy','optimal_epoch']]
best_df = best_df.swaplevel(axis = 0).unstack()
best_df.columns = best_df.columns.rename("", level=1)
best_df.index = best_df.index.rename("$N_h$")
best_df['eta_val'] = best_df['eta_val'].apply(lambda x:x.apply('10^{{{}}}'.format))
best_df= best_df.swaplevel(axis=1).sort_index(1)
best_df = best_df.rename(columns={'eta_val':'$\eta$','max_accuracy':'Accuracy', 'optimal_epoch':'Epoch',
                       'sigmoid':'sig'})

best_df

In [None]:
print(best_df.to_latex(float_format = '%.3f', escape = False))

In [None]:
plotter.plot_best([df_relu, df_tanh, df_sigmoid])

In [None]:
best_three = pd.concat([df.iloc[df.max_accuracy.idxmax()] for df in [df_relu, df_tanh, df_sigmoid]],axis = 1).T
best_three = best_three.set_index('hidden_act', drop = True)

for hact, item in best_three.T.items():
    y = 1-np.array(item['accuracy'])
    epoch = np.arange(len(y))
    plt.loglog(epoch, y, label = hact)
    
plt.xlabel()
plt.grid()
plt.legend()

In [None]:
df_sigmoid.sort_values(by = 'max_accuracy').iloc[0]

In [None]:
df_tanh.sort_values(by = 'max_accuracy').iloc[0]

In [None]:
plt.plot(x)
plt.plot(x[:9])

In [None]:
np.unique(y).size

In [None]:
np.unique(x.round(decimals=4))

In [None]:
data = data[indx]

In [None]:
np.block([[data[2].T],[data[0].T,data[1].T]])

In [None]:
plt.pcolormesh(mesh[0], mesh)

In [None]:
df_tot = pd.concat(dataframes[1:3],axis = 1)

In [None]:
df_tot.columns

In [None]:
np.linspace(-2,2.5,13)

In [None]:
[d.columns for d in dataframes]
for i,d in enumerate(dataframes[1:]):
    x,y = np.meshgrid(d.columns, d.index)
    plt.scatter(x.ravel(), y.ravel(), label = i)
plt.legend()

In [None]:

old_eta_vals

In [None]:
df_tot = pd.concat(dataframes[1:])
df_tot

In [None]:
df_tot = pd.merge(dataframes[1], dataframes[2], left_index=True, right_index=True, how = 'outer')
df_tot = pd.merge(df_tot, dataframes[3], left_index=True, right_index=True, how = 'outer')
if '0.01_x' in df_tot.index:
    df_tot[0.01] = np.nanmean(df_tot[['0.01_y','0.01_x']],axis = 1)
    df_tot = df_tot.drop(['0.01_x', '0.01_y'], axis = 1)
df_tot# .T.sort_values(by = 'eta').T

In [None]:
eta_n_df = eta_nhidden_df(df)

In [None]:
ax = plt.gca()

a = dataframes[3].T # eta_n_df.T#[eta_n_df.T.index > 0.001]
m = ax.matshow(a)

x = a.columns
y = a.index

ax.set_xticks(np.arange(len(x)))
ax.set_xticklabels(x)
ax.set_xlabel('Number of hidden nodes')

ax.set_yticks(np.arange(len(y)))
ax.set_yticklabels(['$10^{{{}}}$'.format(f) for f in np.linspace(-2,-0.5,5)])
ax.set_ylabel('Eta')

for i, x_val in enumerate(np.arange(len(x))):
    for j, y_val in enumerate(np.arange(len(y))):
        c = "${0:.1f}\\%$".format( 100*a.values[i,j])  
        ax.text(x_val, y_val, c, va='center', ha='center')

plt.colorbar(m)

In [None]:
np.linspace(-2, -0.5, 5)

In [None]:
10**(-0.5)

In [None]:
ax = plt.gca()
temp2 = temp.iloc[:,2:]
m = ax.imshow(temp2)

ax.set_yticks([0,1,2,3,4])
ax.set_yticklabels(temp2.index)
ax.set_ylabel('Number of hidden nodes')

ax.set_xticks([0,1])
ax.set_xticklabels(temp2.columns)
ax.set_xlabel('Eta')

plt.colorbar(m)

In [None]:
temp.plot.bar(rot = 0)

In [None]:

speed = [0.1, 17.5, 40, 48, 52, 69, 88]
lifespan = [2, 8, 70, 1.5, 25, 12, 28]
index = ['snail', 'pig', 'elephant',
         'rabbit', 'giraffe', 'coyote', 'horse']
df = pd.DataFrame({'speed': speed,
                   'lifespan': lifespan}, index=index)
ax = df.plot.bar(rot=0)


In [None]:
df

In [None]:
plt.plot(accuracy)

plt.grid()

In [None]:
print(np.abs(accuracy[-1] - accuracy[-2])/accuracy[-2])

In [None]:
from IPython.display import clear_output
i = 0

eta = 1e-3
mse = []
n_epochs = 100
n_batches = 10000

tot = n_epochs * n_batches
current = 1

import time
start = time.time()

for j in range(n_epochs):
    b = batches(input_train, target_train, n_batches = n_batches)
    for k, batch in enumerate(b):
        clear_output(wait = True)
        print('batch, epoch, MSE')
        print('{:5}/{}  {:5}/{}  {:.2f}'.format(k, n_batches, j, n_epochs, mse[-1] if len(mse) else 0))#, 'o', markersize = 9)
        now = time.time() 
        print('Time estimate: {:.0f} seconds left'.format((now - start)/current * (tot-current)))
        net.update_batch(batch, eta)
    
        current += 1

    mse.append(np.average(test_net(input_test, target_test, net)))
    if np.isnan(mse[-1]) or np.any([np.any(np.isnan(w)) for w in net.weights]):
        print('NAN!!! Break! Abort mission!')
        break

In [None]:
def f(x,y,z):
    return x+y+z

a = [1,2,3]

f(*a)

$\in$

In [None]:
from project2_tools import softmax

%timeit net.feed_forward(input_train[0])


In [None]:
net.backpropagate(input_train[0], target_train[0])

In [None]:
plt.plot(mse)

In [None]:
from nielsen_network import Network as nNetwork

net2 = nNetwork(layer_sizes)
net2.backprop(np.atleast_2d(x),np.atleast_2d(y))
# x,y = batch[0]

In [None]:
np.atleast_2d(x).shape

In [None]:
y

find last delta:
$$ \delta_j^L = f'(z_j^L) \frac{\partial \mathcal{C}}{\partial a_j^L} $$

propagate deltas for each layer
$$ \delta_j^l = \sum_k \delta_k^{l+1} w_{kj}^{l+1} f'(z_j^l)$$

update weights and bias
$$w_{jk}^l \leftarrow w_{jk}^l - \eta \delta_j^l a_k^{l-1} $$
$$b_{j}^l \leftarrow b_{j}^l - \eta \delta_j^l  $$



we have

$$ \frac{\partial \mathcal{C}}{\partial a_j^L} = a_j^L - t_j, $$
and
$$ f'(z_j^l) = f(z_j^l)  (1 - f(z_j^l) $$
for sigmoid

In [None]:
for i, batch in enumerate(batches(input_train, target_train, n_batches = 100)):
    break
    

In [None]:
x,y = batch[0]
x,y

In [None]:
net = NeuralNet(layer_sizes, act_func = ['sigmoid','identity'])

for k in range(20):
    for i, batch in enumerate(batches(input_train, target_train, n_batches = 100)):
        net.update_batch(batch, eta = 1e-2)

    plt.plot(k, np.average(test_net(input_test, target_test, net)), 'o', markersize = 10)

In [None]:
fig,[ax1,ax2] = plt.subplots(1,2, figsize = [8,4])
i = 43
test_out = np.array([net.feed_forward(inp)[1][-1] for inp in input_test]).squeeze()
ax1.scatter(test_out, target_test)
ax1.set_xlabel('predicted')
ax1.set_ylabel('target')

ax1.plot([-12,12],[-12,12])
ax1.axis('equal')


#ax2.hist(test_out,  alpha = 0.5)
#ax2.hist(target_test,  alpha = 0.5);