In [153]:
import numpy as np
import matplotlib.pyplot as plt

from scipy import optimize

class NeuralNetwork(object):
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        self.W1 = np.random.rand(self.inputLayerSize, self.hiddenLayerSize)
        self.W2 = np.random.rand(self.hiddenLayerSize, self.outputLayerSize)
        
    def forward(self, X):
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3)
        
        return yHat
        
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def sigmoidPrime(self, x):
        return np.exp(-x)/(1+np.exp(-x))*(1+np.exp(-x))
    
    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        
        print(delta3)
        print(self.W2)
        dJdW2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)
        
        return dJdW1, dJdW2
    
    def setParams(self, params):
        #Set W1 and W2 using single paramater vector.
        W1_start = 0
        W1_end = self.hiddenLayerSize * self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
    
    def getParams(self):
        #Get W1 and W2 unrolled into vector:
        params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
        return params

    
class Trainer(object):
    def __init__(self, N):
        self.N = N
        
    def costFunctionWrapper(self, params, X, y):
        self.N.setParams(params)
        cost = self.N.costFunction(X, y)
        grad = self.N.computeGradients(X, y)
        
        return cost, grad
    
    def callbackF(self, params):
        self.N.setParams(params)
        self.J.append(self.N.costFunction(self.X, self.y))
        
    def train(self, X, y):
        params0 = self.N.getParams()
        
        self.X = X
        self.y = y
        
        self.J = []
        
        options = {'maxiter': 200, 'disp': True}
        _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', args=(X, y),\
                                 options = options, callback = self.callbackF)
        self.N.setParams(_res.x)
        self.optimizationResults = _res
        

In [146]:
X = np.array([[3,5], [5,1], [10, 2]])
y = np.array([[75],[82],[93]])

x_val = np.array([[8,5]])

In [147]:
NN = NeuralNetwork()

NN.forward(X)

array([[ 0.80857272],
       [ 0.79979466],
       [ 0.81169467]])

In [148]:
dJdW1, dJdW2 = NN.costFunctionPrime(X, y)

cost = NN.costFunction(X, y)

[[-17.56460886]
 [-20.32611122]
 [-21.38679718]]
[[ 0.15796288]
 [ 0.57771859]
 [ 0.76947742]]


In [149]:
T = Trainer(NN)

In [150]:
T.train(X, y)

[[-17.56460886]
 [-20.32611122]
 [-21.38679718]]
[[ 0.15796288]
 [ 0.57771859]
 [ 0.76947742]]
[[-17.56460886]
 [-20.32611122]
 [-21.38679718]]
[[ 0.15796288]
 [ 0.57771859]
 [ 0.76947742]]
[[-3.37219175]
 [-3.97664219]
 [-3.91184004]]
[[ 0.61615715]
 [ 1.16901622]
 [ 1.3944478 ]]
[[ -4.9588976 ]
 [-12.53293604]
 [-14.62349524]]
[[ 2.78902406]
 [ 0.9907829 ]
 [ 0.84989793]]
[[-2.57353928]
 [-5.11780844]
 [-6.55215148]]
[[ 1.55123934]
 [ 1.09231442]
 [ 1.16010354]]
[[-2.77354505]
 [-3.83889389]
 [-3.5519352 ]]
[[ 1.0705868 ]
 [ 1.13174081]
 [ 1.28056158]]
[[-3.06649765]
 [-3.78507801]
 [-3.56650634]]
[[ 0.8114892 ]
 [ 1.15299376]
 [ 1.34549495]]
[[-2.61815962]
 [-3.33881248]
 [-3.06575432]]
[[ 1.1366489 ]
 [ 1.09438665]
 [ 1.22150934]]
[[-1.38130795]
 [-1.79370054]
 [-1.67287978]]
[[ 2.43728771]
 [ 0.85995819]
 [ 0.72556692]]
[[-0.54840129]
 [-0.70982279]
 [-0.79758485]]
[[ 4.70719119]
 [ 0.45856327]
 [-0.06700921]]
[[-0.93276374]
 [-1.21777281]
 [-1.116741  ]]
[[ 3.34105479]
 [ 0.70014

In [151]:
T.J

[array([ 10261.26543767]),
 array([ 10260.49170813]),
 array([ 10255.25330153]),
 array([ 10253.72398432]),
 array([ 10252.10013494]),
 array([ 10251.43406033]),
 array([ 10250.94643297]),
 array([ 10250.72828088]),
 array([ 10250.61211768]),
 array([ 10250.55487023]),
 array([ 10250.52611557]),
 array([ 10250.51211712]),
 array([ 10250.5055747]),
 array([ 10250.50264909]),
 array([ 10250.5013058]),
 array([ 10250.50064557]),
 array([ 10250.50031111]),
 array([ 10250.50014378]),
 array([ 10250.50006262]),
 array([ 10250.50002495]),
 array([ 10250.50000886]),
 array([ 10250.50000289]),
 array([ 10250.50000094]),
 array([ 10250.50000032]),
 array([ 10250.50000012])]

In [154]:
plot(T.J)
grid(1)
xlabel('Iterations')
ylabel('Cost')

TypeError: 'module' object is not callable