# Fitting in Neural Network

This tutorial is adapted from [visual proof of fitting](http://neuralnetworksanddeeplearning.com/chap4.html).

In this tutorial, we can learn how neural network fits a continuous function.


In [None]:
%matplotlib inline

from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np

plt.style.reload_library()
plt.style.use(['seaborn-whitegrid'])

import sys
sys.path.append('scripts')
from draw_neural_net import draw_neural_net

## Sigmoid function

Let us use sigmoid function as the activation function in the following experiments.

$$\sigma(z) = \frac{1}{1 + \exp{(-z)}}$$

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

z = np.linspace(-10, 10, 100)
plt.figure(figsize=(5, 4))
plt.plot(z, sigmoid(z), '-')
plt.show()

## Weights and biases in neural network

Consider a neural network having one hidden layer with two hidden neurons and sigmoid activation function which only performs at the output layer, i.e., 

$$h_1^{(1)} = w_1^{(1)}x + b_1^{(1)}$$
$$h_2^{(1)} = w_2^{(1)}x + b_2^{(1)} $$
$$y = \sigma\left(w_1^{(2)}h_1^{(1)}  + w_2^{(2)}h_2^{(1)}+ b^{(2)}\right )$$

, where $h$ denotes hidden neuron, $w$ and $b$ denotes weight and bias, ($\cdot$) is the number of layer, $y$ is the output and $\sigma$ indicates the activation function.

In [None]:
nn_121 = plt.figure(figsize=(9, 7))
ax = nn_121.gca()
ax.axis('off')
draw_neural_net(ax, .1, .8, .1, .8, [1, 2, 1])

In [None]:
def plot_nn2d(input, weight, bias, activate=sigmoid, 
            unactivated=False, activated_output=True, ylim=(0, 1.1)):

    x = h = input
    num_layers = len(weight)
    
    for i in range(num_layers):
        if not unactivated and i != 0:
            h = activate(h)
        
        w = weight[i]
        b = bias[i]
        h = h.dot(w) + b
        
    y = h
    if activated_output:
        y = activate(h)
        
        
    plt.figure(figsize=(16, 8))
    ax_plt = plt.subplot(121)
    ax_plt.set_title('')
    plt.plot(x, y, '-')
    plt.ylim(*ylim)
    
    ax_nn = plt.subplot(122)
    ax_nn.axis('off')
    
    layer_sizes = []
    layer_sizes.append(input.shape[1])
    

    for i in range(num_layers):
        layer_sizes.append(weight[i].shape[1])
    draw_neural_net(ax_nn, .1, .8, .1, .8, layer_sizes, weight, bias)
    
    return (y, ax_plt, ax_nn)


Drag the sliders of $w^{(1)}_1$ and $b^{(1)}_1$, and see how the curve changes.

In [None]:
def plot_nn121_simple(w=19, b=17):
    x = np.linspace(0, 1, 50)[:, np.newaxis]
    weight = [np.array([[w, 0]]), np.array([[1], [0]])]
    bias = [[b, 0], [0]]
    ax_plt = plot_nn2d(x, weight, bias, unactivated=True)[1]     
    if w != 0:
        ax_plt.set_title('$s=-b/w={:.2f}$'.format(-b/w))
    

interactive_plot = interactive(plot_nn121_simple, w=(-1000, 1000, 1), b=(-1000, 1000, 1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot


We can see that the value $s = -b/w$ controls the position of the step.

Now, The activation functions in the following neural networks perform at _every_ hidden layer _expect for_ the output layers.

In [None]:
def plot_nn121_step(s11=.4, s12=.6, w21=.6, w22=1.2):
    x = np.linspace(0, 1, 100)[:, np.newaxis]
    weight = [1000*np.array([[1, 1]]), np.array([[w21], [w22]])]
    bias = [1000*np.array([-s11, -s12]), [0]]
    ax_plt = plot_nn2d(x, weight, bias, activated_output=False, ylim=(-1, 2))[1]

interactive_plot = interactive(plot_nn121_step, 
                               s11=(-2., 2., .1), s12=(-2., 2., .1),
                               w21=(-5., 5., .1), w22=(-5., 5., .1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

In [None]:
def plot_nn121_bump(w2=.6):
    x = np.linspace(0, 1, 100)[:, np.newaxis]
    weight = [1000*np.array([[1, 1]]), np.array([[w2], [-w2]])]
    bias = [[-400, -600], [0]]
    ax_plt = plot_nn2d(x, weight, bias, activated_output=False, ylim=(-1, 2))[1]


interactive_plot = interactive(plot_nn121_bump, w2=(-3., 3., .1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

In [None]:
def plot_nn141_bump2(s1=.4, s2=.6, s3=.7, s4=.9, h1=-.8, h2=.9):
    x = np.linspace(0, 1, 100)[:, np.newaxis]
    weight = [1000*np.array([[1, 1, 1, 1]]), np.array([[h1], [-h1], [h2], [-h2]])]
    bias = [1000*np.array([-s1, -s2, -s3, -s4]), [0]]
    ax_plt = plot_nn2d(x, weight, bias, activated_output=False, ylim=(-1, 2))[1]

interactive_plot = interactive(plot_nn141_bump2, 
                               s1=(-3., 3., .1), s2=(-3., 3., .1),
                               s3=(-3., 3., .1), s4=(-3., 3., .1),
                               h1=(-5., 5., .1), h2=(-5., 5., .1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

## Fitting a continuous function

Given a toy function
$$f(x) = 0.2 + 0.4x^2 + 0.3x \sin(15x) + 0.05\cos(50x)$$, use a neural network to fit it.


In [None]:
def f(x):
    return .2 + .4 * x ** 2 + .3 * x * np.sin(15*x) + .05 * np.cos(50*x)

x = np.linspace(0, 1, 100)
plt.plot(x, f(x), '-')
plt.show()

The output from a general $l$-layer neural network can be

$$y = \sigma\left(\sum_j{w_j^{(l)} a_j^{(l-1)}} + b^{(l)}\right)$$

, where $\sigma$ is the activation function, $a$ denotes an activated hidden neuron.

Fitting the function $f$ implies that $y$ need to be approximate to $f(x)$ as close as possible by given $x$, i.e, 

$$ y \simeq f(x)$$

$$\sum_j{w_j^{(l)} a_j^{(l-1)}} + b^{(l)} \simeq \sigma^{-1}\left(f(x)\right) $$

**Notice**:  $\sigma^{-1}$ may not be everywhere defined over $f$.

### Logit function
The inverse function of sigmoid is given by
$$\sigma^{-1}(y) = \ln{(y)} - \ln{(1-y)}$$

In [None]:
def logit(y):
    """inverse function of sigmoid"""
    return np.log(y) - np.log(1 - y)

y = np.linspace(1e-3, .999, 100)
plt.plot(y, logit(y))
plt.show()

Now, we can plot $\sigma^{-1}(f)$ (the _yellow_ one) as follows since the range of $f$ (the blue one) is in $(0, 1)$.

In [None]:
plt.plot(x, f(x))
plt.plot(x, logit(f(x)))
plt.show()

### Error function

We use the following function to quantify the deviation between the output and the real values:

$$L = \frac{1}{n}\sum_{i=1}^{n}\left|y-y'\right|$$

In [None]:
def error(y, y_):
    return np.abs(y - y_).mean()

**Exercise**: Try to let the average deviation be less than 0.4!

In [None]:
def plot_nn1x1_bump5(h1=.4, h2=-.3, h3=-.2, h4=-.4, h5=-.5):
    x = np.linspace(0, 1, 100)[:, np.newaxis]
    weight = [1000*np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), 
              np.array([[h1], [-h1], [h2], [-h2], [h3], [-h3], [h4], [-h4], [h5], [-h5]])]
    bias = [-1000*np.array([0, .2, .2, .4, .4, .6, .6, .8, .8, 1.]), [0]]
    y, ax_plt = plot_nn2d(x, weight, bias, activated_output=False, ylim=(-3, 3))[:2]
    y_ = logit(f(x))
    ax_plt.plot(x, y_)
    deviation = error(y, y_)
    title_templ = '$error = {:.3f}$'
    
    if deviation < 0.4:
        title_templ += ' Good Job!'
        
    ax_plt.set_title(title_templ.format(deviation))


# Fill the arguments!
plot_nn1x1_bump5()

In [None]:
nn_21 = plt.figure(figsize=(10, 10))
ax = nn_21.gca()
ax.axis('off')
draw_neural_net(ax, .1, .8, .1, .8, [2, 1])

In [None]:
from mpl_toolkits.mplot3d import Axes3D

def plot_nn21(w1=8, b=-4):

    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)
    x1, x2 = np.meshgrid(x1, x2)
    y = sigmoid(x1.T*w1 + b)
    fig = plt.figure(figsize=(9,9))
    ax = fig.gca(projection='3d')
    ax.plot_surface(x1, x2, y)
    ax.set_title('$s_x=-b/w_1={:.2f}$'.format(-b/(w1+1e-6)))
    return ax

interactive_plot = interactive(plot_nn21, w1=(-1000., 1000., .1), b=(-1000., 1000., .1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

In [None]:
nn_241 = plt.figure(figsize=(16, 12))
ax = nn_241.gca()
ax.axis('off')

weights = [np.array([[1000, 1000, 0, 0], 
                      [0, 0, 1000, 1000]]),
           np.array([[.3, -.3, .3, -.3]]).T]

biases = [[-400, -600, -300, -700], [0]]
draw_neural_net(ax, .1, .8, .1, .8, [2, 4, 1], weights, biases)

In [None]:
def plot_nn241(w=1, b=0):

    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)
    x1, x2 = np.meshgrid(x1, x2)
    
    y = sigmoid(x1.T * 1000 - 400) * w + sigmoid(x1.T * 1000 - 600) * -w + \
        sigmoid(x2.T * 1000 - 300) * w + sigmoid(x2.T * 1000 - 700) * -w + b
    
    fig = plt.figure(figsize=(9,9))
    ax = fig.gca(projection='3d')
    ax.plot_surface(x1, x2, y)
    ax.set_zlim(-3, 3)
    ax.set_title('$w={}, b={}$'.format(w, b))
    return ax

interactive_plot = interactive(plot_nn241, w=(-10, 10, 1), b=(-10, 10, 1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

In [None]:
nn_2821 = plt.figure(figsize=(16, 12))
ax = nn_2821.gca()
ax.axis('off')

weights = [np.array([[1000, 1000, 0, 0, 1000, 1000, 0, 0], 
                      [0, 0, 1000, 1000, 0, 0, 1000, 1000]]),
           np.array([[.8, 0], [-.8, 0], [.8, 0], [-.8, 0],
                    [0, .5], [0, -.5], [0, .5,], [0, -.5]]),
           np.array([[.7, .5]]).T]

biases = [
    [-600, -400, 400, -500, -800, -700, 300, -900],
    [0, 0],
    [0],
]

draw_neural_net(ax, .1, .8, .1, .8, [2, 8, 2, 1], weights, biases)

In [None]:
def plot_nn2821(s11=.6, s12=.4, s23=-.4, s24=.5, s15=.8, s16=.7, s27=-.3, s28=.9, 
                w2_1=.8, w2_2=.5, w31=.7, w32=.5, b=0):

    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)
    x1, x2 = np.meshgrid(x1, x2)
    
    y = (
            sigmoid(x1.T * 1000 - 1000*s11) * w2_1 + sigmoid(x1.T * 1000 - 1000*s12) * -w2_1 + \
            sigmoid(x2.T * 1000 - 1000*s23) * w2_1 + sigmoid(x2.T * 1000 - 1000*s24) * -w2_1 + b
        ) * w31 + \
        (
            sigmoid(x1.T * 1000 - 1000*s15) * w2_2 + sigmoid(x1.T * 1000 - 1000*s16) * -w2_2 + \
            sigmoid(x2.T * 1000 - 1000*s27) * w2_2 + sigmoid(x2.T * 1000 - 1000*s28) * -w2_2 + b
        ) * w32
    
    fig = plt.figure(figsize=(9,9))
    ax = fig.gca(projection='3d')
    ax.plot_surface(x1, x2, y)
    ax.set_zlim(-3, 3)
    return ax

interactive_plot = interactive(plot_nn2821, 
                               s11=(-1., 1., .1), s12=(-1., 1., .1),
                               s23=(-1., 1., .1), s24=(-1., 1., .1),
                               s15=(-1., 1., .1), s16=(-1., 1., .1), 
                               s27=(-1., 1., .1), s28=(-1., 1., .1),
                               w21=(-10, 10, 1),w22=(-10, 10, 1),
                               w31=(0., 1., .1),w32=(0., 1., .1),
                               b=(-10, 10, 1))
output = interactive_plot.children[-1]
output.layout.height = '550px'
interactive_plot

**Homework**: Try to change the structure of neural network and even the activation function (Tanh, ReLu, etc.).