# Plotting a Surface

In [5]:
%matplotlib notebook
# %matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

"""https://matplotlib.org/mpl_toolkits/mplot3d/tutorial.html"""

from matplotlib import cm # cm stands for color map

Consider the surface given by 
$$E(w_1, w_2) = w_1^4 + w_2^4 - 16 w_1 w_2.$$

In [6]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})

"""plt.subplots() is a function that returns a tuple 
containing a figure and axes objects."""

x = y = np.linspace(-3,3,100)
X, Y = np.meshgrid(x, y)
Z = X**4+Y**4-16*X*Y

surf = ax.plot_surface(X,Y,Z, cmap=cm.coolwarm)
#surf = ax.plot_surface(X,Y,Z, cmap=cm.PiYG)

ax.set_zlim(0, 200)
fig.colorbar(surf, shrink=0.5, aspect=10)

ax.set_xlabel('X Axis')
ax.set_ylabel('Y Axis')

plt.show()

<IPython.core.display.Javascript object>

## Gradient Descent

The gradient of $E$ is
$$ \nabla E = [ 4 w_1^3-16 w_2, 4 w_2^3 - 16 w_1] . $$

The formula for gradient descent is
$$ \mathbf w_{k+1} = \mathbf w_k - \eta \nabla E ( \mathbf w_k) . $$

In [3]:
def E(u,v):
    return u**4+v**4-16*u*v

eta=0.01
x=1.2; y=1.2
print (0,'\t','x=', x,'\t','y=',y,'\t', 'E=',E(x,y))


0 	 x= 1.2 	 y= 1.2 	 E= -18.8928


In [4]:
for i in range(0,30):
    g=4*x**3-16*y
    h=4*y**3-16*x
    x=x-eta*g
    y=y-eta*h
    print (i+1,'\t','x=',round(x,3),'\t','y=',round(y,3),'\t','E=',round(E(x,y),3))
    
    #'\t' is the escape sequence for tab.

1 	 x= 1.323 	 y= 1.323 	 E= -21.875
2 	 x= 1.442 	 y= 1.442 	 E= -24.621
3 	 x= 1.553 	 y= 1.553 	 E= -26.95
4 	 x= 1.651 	 y= 1.651 	 E= -28.76
5 	 x= 1.735 	 y= 1.735 	 E= -30.048
6 	 x= 1.804 	 y= 1.804 	 E= -30.889
7 	 x= 1.858 	 y= 1.858 	 E= -31.399
8 	 x= 1.899 	 y= 1.899 	 E= -31.688
9 	 x= 1.929 	 y= 1.929 	 E= -31.843
10 	 x= 1.95 	 y= 1.95 	 E= -31.923
11 	 x= 1.966 	 y= 1.966 	 E= -31.963
12 	 x= 1.976 	 y= 1.976 	 E= -31.982
13 	 x= 1.984 	 y= 1.984 	 E= -31.992
14 	 x= 1.989 	 y= 1.989 	 E= -31.996
15 	 x= 1.992 	 y= 1.992 	 E= -31.998
16 	 x= 1.995 	 y= 1.995 	 E= -31.999
17 	 x= 1.996 	 y= 1.996 	 E= -32.0
18 	 x= 1.998 	 y= 1.998 	 E= -32.0
19 	 x= 1.998 	 y= 1.998 	 E= -32.0
20 	 x= 1.999 	 y= 1.999 	 E= -32.0
21 	 x= 1.999 	 y= 1.999 	 E= -32.0
22 	 x= 1.999 	 y= 1.999 	 E= -32.0
23 	 x= 2.0 	 y= 2.0 	 E= -32.0
24 	 x= 2.0 	 y= 2.0 	 E= -32.0
25 	 x= 2.0 	 y= 2.0 	 E= -32.0
26 	 x= 2.0 	 y= 2.0 	 E= -32.0
27 	 x= 2.0 	 y= 2.0 	 E= -32.0
28 	 x= 2.0 	 y= 2.0 	 E= -32

## Linear Regression Revisited

We will redo the example of multivariate-data in linear regression using gradient descent.

In [5]:
#data = np.genfromtxt('../../data/multivar_simulated/data.csv',skip_header=1,delimiter=',')
data = np.genfromtxt('../../data/multivar_simulated.csv',skip_header=1,delimiter=',')

In [6]:
data[:3,:]

array([[ 0.        ,  9.22345149,  1.62488669, -0.33204394],
       [ 1.        , -6.51121434,  0.25676938,  4.16345627],
       [ 2.        , -3.83393779,  1.78444569,  3.73164655]])

In [7]:
Y = data[:,1]
X1 = data[:,2:]

In [8]:
Y.shape, X1.shape, X.shape

((75,), (75, 2), (100, 100))

In [9]:
O = np.ones(shape=(X1.shape[0],1))
X = np.concatenate([X1,O],axis=1)
X.shape

(75, 3)

The error function is given by
$$ E = \sum_{j=1}^{N} (y_j-\sum_{s=1}^{k+1} x_{js}m_{s})^2 .$$

Write a function for $E$.

In [65]:
#def Er(M):
#    formula here
#    return the result

The gradient of $E$ is given by
$$ \nabla E  = -2 X^{\intercal}Y + 2
X^{\intercal}XM. $$

Write a function for $\nabla E$.

In [67]:
#def GE(M):
#    return formula here

Choose initial values.

In [70]:
#eta=
#iter_num=
#M=np.array([?,?,?])    

Calculate the initial error.

In [13]:
Er(M)

6199.674559434617

Run a loop for gradient descent and print the values of M and Er(M).

In [1]:
#Write a loop here
#
#print M and Er(M)

Compare the result with the previous result from Linear Regression which was 

[ 1.78777492, -3.47899986,  6.0608333 ]



## Newton's Method

$$ E=w_1^4+ w_2^4 - 16 w_1 w_2$$

$$ \nabla E = [ 4 w_1^3-16 w_2, 4 w_2^3 - 16 w_1]  $$

$$\mathbf HE^{-1} \nabla E = \frac 1 {9w_1^2 w_2^2 -16} \begin{bmatrix} 3 w_1^3 w_2^2 - 8 w_2^3 -16w_1 \\ 3 w_1^2 w_2^3 -8 w_1^3  -16w_2  \end{bmatrix}$$

$$\boxed{ \mathbf w_{k+1}= \mathbf w_k - \eta \mathbf H E (\mathbf w_{k})^{-1} \nabla E(\mathbf w_k)}$$

In [15]:
def E(u,v):
    return u**4+v**4-16*u*v

eta=1
x=1.2; y=1.2
print (0,'\t','x=', x,'\t','y=',y, '\t','E=',E(x,y))
for i in range(0,10):
    d=9*x**2*y**2-16
    g=(3*x**3*y**2 -8*y**3 -16*x)/d
    h=(3*x**2*y**3 -8*x**3 -16*y)/d
    x=x-eta*g
    y=y-eta*h
    print (i+1,'\t','x=', round(x,3),'\t','y=',round(y,3), '\t','E=',round(E(x,y),3))


0 	 x= 1.2 	 y= 1.2 	 E= -18.8928
1 	 x= 10.8 	 y= 10.8 	 E= 25343.539
2 	 x= 7.283 	 y= 7.283 	 E= 4778.985
3 	 x= 4.981 	 y= 4.981 	 E= 833.891
4 	 x= 3.509 	 y= 3.509 	 E= 106.231
5 	 x= 2.623 	 y= 2.623 	 E= -15.382
6 	 x= 2.169 	 y= 2.169 	 E= -31.005
7 	 x= 2.018 	 y= 2.018 	 E= -31.99
8 	 x= 2.0 	 y= 2.0 	 E= -32.0
9 	 x= 2.0 	 y= 2.0 	 E= -32.0
10 	 x= 2.0 	 y= 2.0 	 E= -32.0


### Haberman's Survival Data Set

https://archive.ics.uci.edu/ml/datasets/Haberman%27s+Survival

In [16]:
import numpy as np
from sklearn.model_selection import train_test_split

In [17]:
data=np.genfromtxt('../data/haberman.data',delimiter=',')

In [18]:
data[:10,:]

array([[30., 64.,  1.,  1.],
       [30., 62.,  3.,  1.],
       [30., 65.,  0.,  1.],
       [31., 59.,  2.,  1.],
       [31., 65.,  4.,  1.],
       [33., 58., 10.,  1.],
       [33., 60.,  0.,  1.],
       [34., 59.,  0.,  2.],
       [34., 66.,  9.,  2.],
       [34., 58., 30.,  1.]])

In [19]:
t_raw = data[:,-1]
X_raw = data[:,:3]

In [20]:
t_raw[:10]

array([1., 1., 1., 1., 1., 1., 1., 2., 2., 1.])

In [21]:
X_raw[:10,:]

array([[30., 64.,  1.],
       [30., 62.,  3.],
       [30., 65.,  0.],
       [31., 59.,  2.],
       [31., 65.,  4.],
       [33., 58., 10.],
       [33., 60.,  0.],
       [34., 59.,  0.],
       [34., 66.,  9.],
       [34., 58., 30.]])

In [22]:
t = t_raw%2    # 0: death; 1: survival
O = np.ones(shape=(X_raw.shape[0],1))
X = np.concatenate([X_raw,O],axis=1)
X.shape

(306, 4)

### Splitting the data

We split the data set into two parts: one for train and the other for test.

In [23]:
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.3)
n_train=X_train.shape[0]
n_test=X_test.shape[0]
print(X_train.shape, t_train.shape)
print(X_test.shape, t_test.shape)

(214, 4) (214,)
(92, 4) (92,)


In [24]:
X_train[:10,:]

array([[63., 63.,  0.,  1.],
       [53., 58.,  4.,  1.],
       [47., 63.,  6.,  1.],
       [47., 63., 23.,  1.],
       [61., 65.,  0.,  1.],
       [60., 67.,  2.,  1.],
       [47., 66.,  0.,  1.],
       [54., 62.,  0.,  1.],
       [55., 69., 22.,  1.],
       [53., 65.,  1.,  1.]])

Define the function $\sigma(x) = \dfrac {e^x}{e^x+1}= \dfrac 1 {1+e^{-x}}$.

In [91]:
#def sigmoid(x):    
#    return the function

Define the error function
$$ E (\mathbf{w}) = - \frac 1 N \sum_{n=1}^N \{ t_n \ln y_n + (1-t_n) \ln (1-y_n)\},  $$
where $y_n=\sigma(w_1 x_{n1}+ w_2 x_{n2} + \cdots + w_k x_{nk}+w_{k+1} )$. 

This function will be obtained in Logistic Regression.

In [86]:
#def Er(w):
#    yn=??
#    return ???

The gradient of $E$ is given by

$$\nabla E= \left [ \frac 1 N \sum_{n=1}^N (y_n-t_n)x_{nj} \right ] = \frac 1 N X^\top (\mathbf y - \mathbf t).$$


In [85]:
#def gradE(w):
#    yn=??
#    return the function

Set the initial values.

In [None]:
#w=np.array([?,?,?,?])
#eta=
#iter_num=

Run a loop for gradient descent.

In [1]:
#for i in range(iter_num):
#

In [30]:
print(w)

[-0.02089043  0.03877857 -0.09532264  0.00733235]


We compute the accuracy of the trained model.

In [31]:
t_pred=(sigmoid(X_test@w).round())
print("Train Accuracy:", sum(t_test==t_pred)*100/n_test,"%")

Train Accuracy: 78.26086956521739 %
