In [1]:
from sklearn.datasets import make_regression

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/

### Generate data

In [5]:
rng = np.random.RandomState(0)

In [6]:
n_samples, n_features = 1000, 20

In [7]:
X, y = make_regression(n_samples, n_features, random_state=rng)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

### Baseline

In [9]:
from sklearn.linear_model import Lasso, Ridge

In [10]:
# reg = Lasso()
reg = Ridge(alpha=.5)

In [11]:
reg.fit(X_train, y_train)

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [12]:
print(reg.score(X_test, y_test))

0.999999399117411


In [13]:
y_pred=reg.predict(X_test)

In [14]:
def r2_score(y_test,y_pred):
    u = ((y_test - y_pred) ** 2).sum()
    v = ((y_test - y_test.mean()) ** 2).sum()
    score = (1 - u/v)
    return score

In [15]:
r2_score(y_test,y_pred)

0.9999993991174111

### MLP

In [16]:
mu =  X_train.mean(0)
std = X_train.std(0)
X_train = np.divide(X_train - mu, std)
X_test=np.divide(X_test - mu, std)

In [17]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [18]:
def tangent(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

In [19]:
def leaky_rectified_linear(z):
    return z if z > 0 else 0.01*z

In [20]:
def rectified_linear(z):
    return np.maximum(0,z)

In [21]:
def MLP_4layers(X, y, epochs = 1000, lambda_ =0.01, learning_rate=1e-3, units_l2=5, units_l3=5, units_l4=1):
    
    units_l1 = X.shape[1]
    
    W1=np.random.rand(units_l2,units_l1)*0.5
    b1=np.zeros((units_l2,1))
      
    W2=np.random.rand(units_l3, units_l2)*0.5
    b2=np.zeros((units_l3,1))
    
    
    W3=np.random.rand(units_l4, units_l3)*0.5
    b3=np.zeros((units_l4,1))
    
    m=len(X)
      
    for epoch in range(epochs):
        
        
        J0=0
        
        delW1=np.zeros((units_l2,units_l1))
        delb1=np.zeros((units_l2,1))
        delW2=np.zeros((units_l3, units_l2))
        delb2=np.zeros((units_l3,1))
        delW3=np.zeros((units_l4, units_l3))
        delb3=np.zeros((units_l4,1))
        
        for i in range(m):
            
            # forward
            z2=np.dot(W1, X[i].reshape(-1,1))+b1

            a2=tangent(z2)
            z3=np.dot(W2,a2)+b2 
            a3=tangent(z3)
            z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
            a4=z4
            
#             print(a4)
            J0+=0.5*(a4-y[i])**2
            
            # backward
            delta4=-(y[i]-a4)
            delta3=np.dot(W3.T,delta4)*(1-a3**2)
            delta2=np.dot(W2.T,delta3)*(1-a2**2)
            
            
            derivW3=np.dot(delta4, a3.T)
            derivb3=delta4
            
            derivW2=np.dot(delta3, a2.T)
            derivb2=delta3
            
            derivW1=np.dot(delta2, X[i].reshape(-1,1).T)
            derivb1=delta2
            
            delW3+=derivW3
            delb3+=derivb3
            
            delW2+=derivW2
            delb2+=derivb2

            delW1+=derivW1
            delb1+=derivb1

        W3=W3-learning_rate*(1/m*delW3+lambda_*W3)     
        b3=b3-learning_rate*(1/m*delb3)  
        
        W2=W2-learning_rate*(1/m*delW2+lambda_*W2)     
        b2=b2-learning_rate*(1/m*delb2)

        W1=W1-learning_rate*(1/m*delW1+lambda_*W1)     
        b1=b1-learning_rate*(1/m*delb1)
#         print(W3)
#         print(b3)
#         print(W2)
#         print(b2)
#         print(W1)
#         print(b1)
#         print()
        J=(1/m)*J0+(lambda_/2)*((W1**2).sum(axis=1).sum(axis=0)
                                +(W2**2).sum(axis=1).sum(axis=0)
                                +(W3**2).sum(axis=1).sum(axis=0))
#         print(epoch)
        
        if epoch%50==0:
            print("loss after {} epochs: ".format(epoch+1), J)
        
        
        
    return W1, b1, W2, b2, W3, b3

In [22]:
W1, b1, W2, b2, W3, b3 = MLP_4layers(X_train, y_train, epochs=5001, lambda_ =0.01, learning_rate=1e-3)

loss after 1 epochs:  [[22339.95406826]]
loss after 51 epochs:  [[17120.3963288]]
loss after 101 epochs:  [[13374.19944355]]
loss after 151 epochs:  [[10871.08963165]]
loss after 201 epochs:  [[9164.76689516]]
loss after 251 epochs:  [[7842.45730918]]
loss after 301 epochs:  [[6821.48887259]]
loss after 351 epochs:  [[6019.91502494]]
loss after 401 epochs:  [[5316.23699656]]
loss after 451 epochs:  [[4778.38910399]]
loss after 501 epochs:  [[4342.61613045]]
loss after 551 epochs:  [[3989.72042338]]
loss after 601 epochs:  [[3692.37607522]]
loss after 651 epochs:  [[3440.648783]]
loss after 701 epochs:  [[3218.93771693]]
loss after 751 epochs:  [[3014.15359856]]
loss after 801 epochs:  [[2834.77244009]]
loss after 851 epochs:  [[2651.49858387]]
loss after 901 epochs:  [[2489.20989689]]
loss after 951 epochs:  [[2350.8218529]]
loss after 1001 epochs:  [[2228.71265007]]
loss after 1051 epochs:  [[2112.93954773]]
loss after 1101 epochs:  [[2024.56788638]]
loss after 1151 epochs:  [[1931.23

In [28]:
def predict_4layer(X, W1, b1, W2, b2, W3, b3 ):
    ans=[]
    for i in range(len(X)):
        z2=np.dot(W1, X[i].reshape(-1,1))+b1

        a2=tangent(z2)
        z3=np.dot(W2,a2)+b2 
        a3=tangent(z3)
        z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
        a4=z4
        ans.append(a4)
    
    return ans

In [29]:
y_pred=predict_4layer(X_test, W1, b1, W2, b2, W3, b3 )

In [30]:
r2_score(y_test,y_pred)

-493.2682224216431

In [31]:
y_pred

[array([[-260.42501788]]),
 array([[-32.0493289]]),
 array([[-3.10988851]]),
 array([[-58.1625875]]),
 array([[196.70691402]]),
 array([[-53.85254345]]),
 array([[-50.83240306]]),
 array([[-68.27526683]]),
 array([[181.96790488]]),
 array([[-40.76720421]]),
 array([[215.6884183]]),
 array([[80.46700755]]),
 array([[390.17171291]]),
 array([[13.02603207]]),
 array([[-222.21858443]]),
 array([[-31.83406254]]),
 array([[-225.13040681]]),
 array([[185.93190273]]),
 array([[-189.25511548]]),
 array([[305.67665162]]),
 array([[-80.36286018]]),
 array([[161.76828939]]),
 array([[-254.88979709]]),
 array([[-80.84925614]]),
 array([[-81.15148725]]),
 array([[-68.21243253]]),
 array([[-55.61147167]]),
 array([[80.4538324]]),
 array([[389.52424692]]),
 array([[79.96408952]]),
 array([[-79.85092882]]),
 array([[-390.85797861]]),
 array([[-85.43987925]]),
 array([[138.24205585]]),
 array([[-79.32122629]]),
 array([[-391.32122827]]),
 array([[-391.30746698]]),
 array([[-222.5811869]]),
 array([[-33.

In [32]:
y_test

array([-253.05985268,   18.97523909,   37.81213684,  -14.88623286,
        206.18505098,   -1.36268679,   -4.16039394,  -38.10569868,
        191.26480967,   18.24059751,  215.37973139,   96.69451153,
        578.34731328,   53.72826949, -146.05187323,   13.47618899,
       -153.38589681,  219.75062031, -118.58664411,  316.44531338,
        -88.76081405,  170.38205654, -249.97854404,  -86.58175428,
        -95.35435497,  -24.4516394 ,    1.12659645,   91.73337799,
        413.69894639,   89.5235367 ,  -61.47186557, -352.32932266,
        -90.47198422,  161.47548016,  -50.25507961, -394.50346997,
       -383.98753584, -160.21886979,   -4.85207633,  139.93263003,
       -138.45155543, -208.6001872 ,  154.31458275,  -83.0083049 ,
        105.7179178 ,  346.42303848, -143.94560538,  -44.10121947,
       -366.45896839,  149.36646353,  201.63963933, -186.77605718,
       -286.45715458,  -12.05426261,  -35.41070325,  -63.70189217,
         83.71082528,  357.25842752,   38.74520473,  -78.13046