In [1]:
from sklearn.datasets import make_regression

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/

### Generate data

In [5]:
rng = np.random.RandomState(0)

In [6]:
n_samples, n_features = 1000, 20

In [7]:
X, y = make_regression(n_samples, n_features, random_state=rng)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

### Baseline

In [9]:
from sklearn.linear_model import Lasso

In [10]:
reg = Lasso()

In [11]:
reg.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [12]:
print(reg.score(X_test, y_test))

0.9997358820015075


In [13]:
y_pred=reg.predict(X_test)

In [14]:
def r2_score(y_test,y_pred):
    u = ((y_test - y_pred) ** 2).sum()
    v = ((y_test - y_test.mean()) ** 2).sum()
    score = (1 - u/v)
    return score

In [15]:
r2_score(y_test,y_pred)

0.9997358820015074

### MLP

In [16]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [17]:
def tangent(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

In [222]:
def leaky_rectified_linear(z):
    return z if z > 0 else 0.01*z

In [263]:
def MLP_3layers(X, y, epochs = 1000, lambda_ = 5e-7, learning_rate=1e-3, units_l2=5, units_l3=1):
    
    
    W1=np.random.rand(X.shape[1],units_l2)* 0.1
    b1=np.zeros((units_l2,1))
      
    W2=np.random.rand(W1.shape[1], X[0].reshape(-1,1).shape[1])* 0.1
    b2=np.zeros((units_l3,1))
    
    m=len(X)
      
    for epoch in range(epochs):
        
        
        J0=0
        JW=0
        delW1=np.zeros((X.shape[1],units_l2))
        delb1=np.zeros((units_l2,1))
        delW2=np.zeros((W1.shape[1], X[0].reshape(-1,1).shape[1]))
        delb2=np.zeros((units_l3,1))
        
        for i in range(m):
            
            # forward
            z2=np.dot(W1.T, X[i].reshape(-1,1))+b1

            a2=tangent(z2)
            z3=np.dot(W2.T,a2)+b2 

            a3=leaky_rectified_linear(z3)
#             a3=z3
    
            J0+=(a3-y[i])**2
            JW+=W1.sum(axis=1).sum(axis=0)**2+W2.sum(axis=1).sum(axis=0)**2
            
            # backward
            delta3=-(y[i]-a3)*a3*(1-a3)
            delta2=np.dot(W2,delta3)*a2*(1-a2)
            
            derivW2=np.dot(delta3,a2.T)
            derivb2=a3
            
            derivW1=np.dot(delta2,X[i].reshape(-1,1).T)
            derivb1=a2
            
            delW2=delW2+derivW2.T
            delb2=delb2+derivb2

            delW1=delW1+derivW1.T
            delb1=delb1+derivb1

            
        W2=W2-learning_rate*(1/m*delW2+lambda_*W2)     
        b2=b2-learning_rate*(1/m*delb2)

        W1=W1-learning_rate*(1/m*delW1+lambda_*W1)     
        b1=b1-learning_rate*(1/m*delb1)
        
        J=(1/m)*J0+(lambda_/2)*JW
#         print(epoch)
        
        if epoch%50==0:
            print("loss after {} epochs: ".format(epoch+1), J)
        
        
        
    return W1, b1, W2, b2

In [264]:
W1, b1, W2, b2= MLP_3layers(X_train, y_train, epochs=1000)

loss after 1 epochs:  [[44858.76230319]]
loss after 51 epochs:  [[44835.56760142]]
loss after 101 epochs:  [[44785.10196028]]
loss after 151 epochs:  [[44763.43369937]]
loss after 201 epochs:  [[44757.91098124]]
loss after 251 epochs:  [[44754.35172879]]
loss after 301 epochs:  [[44751.58317465]]
loss after 351 epochs:  [[44748.89623994]]
loss after 401 epochs:  [[44746.11701713]]
loss after 451 epochs:  [[44743.11196892]]
loss after 501 epochs:  [[44739.88927657]]
loss after 551 epochs:  [[44736.53614766]]
loss after 601 epochs:  [[44733.14461467]]
loss after 651 epochs:  [[44729.96025862]]
loss after 701 epochs:  [[44727.13874249]]
loss after 751 epochs:  [[44724.67218502]]
loss after 801 epochs:  [[44722.46303927]]
loss after 851 epochs:  [[44720.44923949]]
loss after 901 epochs:  [[44718.602591]]
loss after 951 epochs:  [[44716.89338499]]


In [265]:
def predict_3layer(X, W1, b1, W2, b2):
    ans=[]
    for i in range(len(X)):
        z2=np.dot(W1.T, X[i].reshape(-1,1))+b1
        a2=tangent(z2)
        z3=np.dot(W2.T,a2)+b2 
        a3=leaky_rectified_linear(z3)
        ans.append(a3)
    
    return ans

In [266]:
y_pred=predict_3layer(X, W1, b1, W2, b2)

In [267]:
r2_score(y_test,y_pred)

-1000.3069766131155