In [1]:
from sklearn.datasets import make_regression

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/

### Generate data

In [5]:
rng = np.random.RandomState(0)

In [6]:
n_samples, n_features = 1000, 20

In [7]:
X, y = make_regression(n_samples, n_features, random_state=rng)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

### Baseline

In [9]:
from sklearn.linear_model import Lasso, Ridge

In [10]:
# reg = Lasso()
reg = Ridge(alpha=.5)

In [11]:
reg.fit(X_train, y_train)

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [12]:
print(reg.score(X_test, y_test))

0.999999399117411


In [13]:
y_pred=reg.predict(X_test)

In [14]:
def r2_score(y_test,y_pred):
    u = ((y_test - y_pred) ** 2).sum()
    v = ((y_test - y_test.mean()) ** 2).sum()
    score = (1 - u/v)
    return score

In [15]:
r2_score(y_test,y_pred)

0.9999993991174111

In [18]:
from sklearn.metrics import mean_squared_error

In [19]:
mean_squared_error(y_test,y_pred)

0.027717268116372908

### MLP

In [20]:
# mu =  X_train.mean(0)
# std = X_train.std(0)
# X_train = np.divide(X_train - mu, std)
# X_test=np.divide(X_test - mu, std)

In [21]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [22]:
def tangent(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

In [23]:
def leaky_rectified_linear(z):
    return z if z > 0 else 0.01*z

In [24]:
def rectified_linear(z):
    return np.maximum(0,z)

In [25]:
def MLP_4layers(X, y, epochs = 1000, lambda_ =0.01, learning_rate=1e-3, units_l2=5, units_l3=5, units_l4=1):
    
    units_l1 = X.shape[1]
    
    W1=np.random.rand(units_l2,units_l1)*0.01
    b1=np.zeros((units_l2,1))
      
    W2=np.random.rand(units_l3, units_l2)*0.01
    b2=np.zeros((units_l3,1))
    
    
    W3=np.random.rand(units_l4, units_l3)*0.01
    b3=np.zeros((units_l4,1))
    
    m=len(X)
    
    def predict(X,W1, b1, W2, b2, W3, b3):
        ans=[]
        
        for i in range(len(X)):
            z2=np.dot(W1, X[i].reshape(-1,1))+b1

            a2=tangent(z2)
            z3=np.dot(W2,a2)+b2 
            a3=tangent(z3)
            z4=np.dot(W3,a3)+b3 
            a4=z4
            ans.append(a4)
            
        return ans
        
    for epoch in range(epochs):
        
        
        J0=0
        
        delW1=np.zeros((units_l2,units_l1))
        delb1=np.zeros((units_l2,1))
        delW2=np.zeros((units_l3, units_l2))
        delb2=np.zeros((units_l3,1))
        delW3=np.zeros((units_l4, units_l3))
        delb3=np.zeros((units_l4,1))
        
        for i in range(m):
            
            # forward
            z2=np.dot(W1, X[i].reshape(-1,1))+b1

            a2=tangent(z2)
            z3=np.dot(W2,a2)+b2 
            a3=tangent(z3)
            z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
            a4=z4
            
#             print(a4)
            J0+=0.5*(a4-y[i])**2
            
            # backward
            delta4=-(y[i]-a4)
            delta3=np.dot(W3.T,delta4)*(1-a3**2)
            delta2=np.dot(W2.T,delta3)*(1-a2**2)
            
            
            derivW3=np.dot(delta4, a3.T)
            derivb3=delta4
            
            derivW2=np.dot(delta3, a2.T)
            derivb2=delta3
            
            derivW1=np.dot(delta2, X[i].reshape(-1,1).T)
            derivb1=delta2
            
            delW3+=derivW3
            delb3+=derivb3
            
            delW2+=derivW2
            delb2+=derivb2

            delW1+=derivW1
            delb1+=derivb1

        W3=W3-learning_rate*(1/m*delW3+lambda_*W3)     
        b3=b3-learning_rate*(1/m*delb3)  
        
        W2=W2-learning_rate*(1/m*delW2+lambda_*W2)     
        b2=b2-learning_rate*(1/m*delb2)

        W1=W1-learning_rate*(1/m*delW1+lambda_*W1)     
        b1=b1-learning_rate*(1/m*delb1)
        
        
#         print(W3)
#         print(b3)
#         print(W2)
#         print(b2)
#         print(W1)
#         print(b1)
#         print()
        J=(1/m)*J0+(lambda_/2)*((W1**2).sum(axis=1).sum(axis=0)
                                +(W2**2).sum(axis=1).sum(axis=0)
                                +(W3**2).sum(axis=1).sum(axis=0))
#         print(epoch)
        
        if epoch%50==0:
            print("loss after {} epochs: ".format(epoch+1), J)
            y_pred=predict(X_test,W1, b1, W2, b2, W3, b3)
            print("score: ", r2_score(y_test, np.concatenate(np.concatenate(np.stack(y_pred,axis=0))) ))
            print()
        
        
        
    return W1, b1, W2, b2, W3, b3

In [30]:
W1, b1, W2, b2, W3, b3 = MLP_4layers(X_train, y_train, epochs=1001, lambda_ =0.005, learning_rate=0.001)

loss after 1 epochs:  [[22435.02036164]]
score:  -0.0014189433435227716

loss after 51 epochs:  [[22434.12995878]]
score:  -0.001345932054463539

loss after 101 epochs:  [[22433.31812677]]
score:  -0.0012779641004168063

loss after 151 epochs:  [[22432.54279579]]
score:  -0.0012129808944785214

loss after 201 epochs:  [[22430.36105656]]
score:  -0.001080030754541994

loss after 251 epochs:  [[19192.04742395]]
score:  0.1455105185963136

loss after 301 epochs:  [[14684.36168644]]
score:  0.34188314345427884

loss after 351 epochs:  [[11781.39043941]]
score:  0.4691311847786055

loss after 401 epochs:  [[9949.10669714]]
score:  0.5532807057360294

loss after 451 epochs:  [[8475.19292712]]
score:  0.6138365185794872

loss after 501 epochs:  [[7347.40399822]]
score:  0.6581083311982243

loss after 551 epochs:  [[6446.44594391]]
score:  0.6973453624620352

loss after 601 epochs:  [[5723.85827635]]
score:  0.730044307693495

loss after 651 epochs:  [[5216.43503618]]
score:  0.752921358413751

In [31]:
def predict_4layer(X, W1, b1, W2, b2, W3, b3 ):
    ans=[]
    for i in range(len(X)):
        z2=np.dot(W1, X[i].reshape(-1,1))+b1

        a2=tangent(z2)
        z3=np.dot(W2,a2)+b2 
        a3=tangent(z3)
        z4=np.dot(W3,a3)+b3 
#             a4=leaky_rectified_linear(z4)
        a4=z4
        ans.append(a4)
    
    return ans

In [32]:
y_pred=predict_4layer(X_test, W1, b1, W2, b2, W3, b3 )

In [33]:
r2_score(y_test, np.concatenate(np.concatenate(np.stack(y_pred,axis=0))))

0.8555519191703171

In [34]:
mean_squared_error(y_test, np.concatenate(np.concatenate(np.stack(y_pred,axis=0))))

6663.042429302522