In [1]:
from sklearn.datasets import make_regression

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/

### Generate data

In [5]:
rng = np.random.RandomState(0)

In [6]:
n_samples, n_features = 1000, 20

In [7]:
X, y = make_regression(n_samples, n_features, random_state=rng)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

### Baseline

In [9]:
from sklearn.linear_model import Lasso, Ridge

In [10]:
# reg = Lasso()
reg = Ridge(alpha=.5)

In [11]:
reg.fit(X_train, y_train)

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [12]:
print(reg.score(X_test, y_test))

0.999999399117411


In [13]:
y_pred=reg.predict(X_test)

In [14]:
def r2_score(y_test,y_pred):
    u = ((y_test - y_pred) ** 2).sum()
    v = ((y_test - y_test.mean()) ** 2).sum()
    score = (1 - u/v)
    return score

In [15]:
r2_score(y_test,y_pred)

0.9999993991174111

### MLP

In [68]:
mu =  X_train.mean(0)
std = X_train.std(0)
X_train = np.divide(X_train - mu, std)
X_test=np.divide(X_test - mu, std)

In [69]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [70]:
def tangent(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

In [71]:
def leaky_rectified_linear(z):
    return z if z > 0 else 0.01*z

In [72]:
def MLP_4layers(X, y, epochs = 1000, lambda_ =0.01, learning_rate=1e-3, units_l2=5, units_l3=5, units_l4=1):
    
    units_l1 = X.shape[1]
    
    W1=np.random.rand(units_l2,units_l1)*0.5
    b1=np.zeros((units_l2,1))
      
    W2=np.random.rand(units_l3, units_l2)*0.5
    b2=np.zeros((units_l3,1))
    
    
    W3=np.random.rand(units_l4, units_l3)*0.5
    b3=np.zeros((units_l4,1))
    
    m=len(X)
      
    for epoch in range(epochs):
        
        
        J0=0
        JW=0
        
        delW1=np.zeros((units_l2,units_l1))
        delb1=np.zeros((units_l2,1))
        delW2=np.zeros((units_l3, units_l2))
        delb2=np.zeros((units_l3,1))
        delW3=np.zeros((units_l4, units_l3))
        delb3=np.zeros((units_l4,1))
        
        for i in range(m):
            
            # forward
            z2=np.dot(W1, X[i].reshape(-1,1))+b1

            a2=tangent(z2)
            z3=np.dot(W2,a2)+b2 
            a3=tangent(z3)
            z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
            a4=z4
    
            J0+=(a4-y[i])**2
            JW+=W1.sum(axis=1).sum(axis=0)**2+W2.sum(axis=1).sum(axis=0)**2+W3.sum(axis=1).sum(axis=0)
            
            # backward
            delta4=-(y[i]-a4)*a4*(1-a4)
            delta3=np.dot(W3.T,delta4)*a3*(1-a3)
            delta2=np.dot(W2.T,delta3)*a2*(1-a2)
            
            
            derivW3=np.dot(delta4,a3.T)
            derivb3=delta4
            
            derivW2=np.dot(delta3,a2.T)
            derivb2=delta3
            
            derivW1=np.dot(delta2,X[i].reshape(-1,1).T)
            derivb1=delta2
            
            delW3=delW3+derivW3
            delb3=delb3+derivb3
            
            delW2=delW2+derivW2
            delb2=delb2+derivb2

            delW1=delW1+derivW1
            delb1=delb1+derivb1

        W3=W3-learning_rate*(1/m*delW3+lambda_*W3)     
        b3=b3-learning_rate*(1/m*delb3)  
        
        W2=W2-learning_rate*(1/m*delW2+lambda_*W2)     
        b2=b2-learning_rate*(1/m*delb2)

        W1=W1-learning_rate*(1/m*delW1+lambda_*W1)     
        b1=b1-learning_rate*(1/m*delb1)
        
        J=(1/m)*J0+(lambda_/2)*JW
#         print(epoch)
        
        if epoch%50==0:
            print("loss after {} epochs: ".format(epoch+1), J)
        
        
        
    return W1, b1, W2, b2, W3, b3

In [73]:
W1, b1, W2, b2, W3, b3 = MLP_4layers(X_train, y_train, epochs=1001, lambda_ =0.01, learning_rate=1e-4)

loss after 1 epochs:  [[47364.18984497]]
loss after 51 epochs:  [[46880.6705673]]
loss after 101 epochs:  [[46741.41101371]]
loss after 151 epochs:  [[46654.57492423]]
loss after 201 epochs:  [[46590.56837975]]
loss after 251 epochs:  [[46541.20381796]]
loss after 301 epochs:  [[46502.11185738]]
loss after 351 epochs:  [[46470.23471085]]
loss after 401 epochs:  [[46443.36447072]]
loss after 451 epochs:  [[46419.9384404]]
loss after 501 epochs:  [[46398.87093376]]
loss after 551 epochs:  [[46379.41607057]]
loss after 601 epochs:  [[46361.06457669]]
loss after 651 epochs:  [[46343.47000265]]
loss after 701 epochs:  [[46326.39733704]]
loss after 751 epochs:  [[46309.68766318]]
loss after 801 epochs:  [[46293.233987]]
loss after 851 epochs:  [[46276.96474383]]
loss after 901 epochs:  [[46260.83255424]]
loss after 951 epochs:  [[46244.80655985]]
loss after 1001 epochs:  [[46228.86719914]]


In [74]:
def predict_4layer(X, W1, b1, W2, b2, W3, b3 ):
    ans=[]
    for i in range(len(X)):
        z2=np.dot(W1, X[i].reshape(-1,1))+b1

        a2=tangent(z2)
        z3=np.dot(W2,a2)+b2 
        a3=tangent(z3)
        z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
        a4=z4
        ans.append(a4)
    
    return ans

In [75]:
y_pred=predict_4layer(X_test, W1, b1, W2, b2, W3, b3 )

In [76]:
r2_score(y_test,y_pred)

-249.3073398984007

In [77]:
y_pred

[array([[0.74998576]]),
 array([[-0.02164332]]),
 array([[1.02363011]]),
 array([[0.82261119]]),
 array([[1.03822589]]),
 array([[0.76155433]]),
 array([[0.11802498]]),
 array([[0.40600811]]),
 array([[1.01027174]]),
 array([[0.57107044]]),
 array([[0.67417695]]),
 array([[0.16419189]]),
 array([[1.05259261]]),
 array([[0.50443318]]),
 array([[-0.00051052]]),
 array([[0.52660541]]),
 array([[0.63602196]]),
 array([[1.05409934]]),
 array([[0.56736764]]),
 array([[0.1937617]]),
 array([[0.93269558]]),
 array([[0.99273354]]),
 array([[0.17619117]]),
 array([[1.02259744]]),
 array([[1.03625876]]),
 array([[-0.07338674]]),
 array([[0.19435837]]),
 array([[0.91324946]]),
 array([[1.04577826]]),
 array([[-0.05110444]]),
 array([[1.05435802]]),
 array([[0.32825367]]),
 array([[0.50808015]]),
 array([[0.82408166]]),
 array([[0.73526562]]),
 array([[-0.07831741]]),
 array([[0.011312]]),
 array([[0.98041834]]),
 array([[0.28478198]]),
 array([[0.81033677]]),
 array([[0.04744151]]),
 array([[-0.03

In [78]:
y_test

array([-253.05985268,   18.97523909,   37.81213684,  -14.88623286,
        206.18505098,   -1.36268679,   -4.16039394,  -38.10569868,
        191.26480967,   18.24059751,  215.37973139,   96.69451153,
        578.34731328,   53.72826949, -146.05187323,   13.47618899,
       -153.38589681,  219.75062031, -118.58664411,  316.44531338,
        -88.76081405,  170.38205654, -249.97854404,  -86.58175428,
        -95.35435497,  -24.4516394 ,    1.12659645,   91.73337799,
        413.69894639,   89.5235367 ,  -61.47186557, -352.32932266,
        -90.47198422,  161.47548016,  -50.25507961, -394.50346997,
       -383.98753584, -160.21886979,   -4.85207633,  139.93263003,
       -138.45155543, -208.6001872 ,  154.31458275,  -83.0083049 ,
        105.7179178 ,  346.42303848, -143.94560538,  -44.10121947,
       -366.45896839,  149.36646353,  201.63963933, -186.77605718,
       -286.45715458,  -12.05426261,  -35.41070325,  -63.70189217,
         83.71082528,  357.25842752,   38.74520473,  -78.13046