In [32]:
import numpy as np
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
y = np.matrix(data.target).T
X = np.matrix(data.data)
M = X.shape[0]
N = X.shape[1]

# Normalize each input feature

def normalize(X):
    M = X.shape[0]
    XX = X - np.tile(np.mean(X,0),[M,1])
    XX = np.divide(XX, np.tile(np.std(XX,0),[M,1]))
    return XX

XX = normalize(X)

# Let's start with a 1-layer network with sigmoid activation function.

W = [[], np.random.normal(0,0.1,[N,1])]
b = [[], np.random.normal(0,0.1,[1,1])]
L = len(W)-1

def act(z):
    return 1/(1+np.exp(-z))

def actder(z):
    az = act(z)
    prod = np.multiply(az,1-az)
    return prod

def ff(x,W,b):
    L = len(W)-1
    a = x
    for l in range(1,L+1):
        z = W[l].T*a+b[l]
        a = act(z)
    return a

def loss(y,yhat):
    return -((1-y) * np.log(1-yhat) + y * np.log(yhat))
    
# Use mini-batch size 1

alpha = 0.01
max_iter = 500
for iter in range(0, max_iter):
    loss_this_iter = 0
    order = np.random.permutation(M)
    for i in range(0,M):
        
        # Grab the pattern order[i]
        
        x_this = XX[order[i],:].T
        y_this = y[order[i],0]

        # Feed forward step
        
        a = [x_this]
        z = [[]]
        delta = [[]]
        dW = [[]]
        db = [[]]
        for l in range(1,L+1):
            z.append(W[l].T*a[l-1]+b[l])
            a.append(act(z[l]))
            # Just to give arrays the right shape for the backprop step
            delta.append([]); dW.append([]); db.append([])
            
        loss_this_pattern = loss(y_this,a[L][0,0])
        loss_this_iter = loss_this_iter + loss_this_pattern
            
        # Backprop step... (to fill in!)
        delta[L] = a[L] - y_this
        db[L] = delta[L].copy()
        dW[L] = a[L-1] * delta[L]
                
        # Check delta calculation
        
        if False:
            print('Target: %f' % y_this)
            print('y_hat: %f' % a[L][0,0])
            print(db)
            y_pred = ff(x_this,W,b)
            diff = 1e-3
            W[1][10,0] = W[1][10,0] + diff
            y_pred_db = ff(x_this,W,b)
            L1 = loss(y_this,y_pred)
            L2 = loss(y_this,y_pred_db)
            db_finite_difference = (L2-L1)/diff
            print('Original out %f, perturbed out %f' %
                 (y_pred[0,0], y_pred_db[0,0]))
            print('Theoretical dW %f, calculated db %f' %
                  (dW[1][10,0], db_finite_difference[0,0]))
        
        W[L] = W[L] - alpha * dW[L]
        b[L] = b[L] - alpha * db[L]
        
    print('Iteration %d loss %f' % (iter, loss_this_iter))


Iteration 0 loss 113.453196
Iteration 1 loss 62.949267
Iteration 2 loss 54.054635
Iteration 3 loss 49.285947
Iteration 4 loss 46.270994
Iteration 5 loss 44.164128
Iteration 6 loss 42.535371
Iteration 7 loss 41.278778
Iteration 8 loss 40.255512
Iteration 9 loss 39.354582
Iteration 10 loss 38.562164
Iteration 11 loss 37.907338
Iteration 12 loss 37.404441
Iteration 13 loss 36.869295
Iteration 14 loss 36.378136
Iteration 15 loss 35.955143
Iteration 16 loss 35.633535
Iteration 17 loss 35.274350
Iteration 18 loss 34.929887
Iteration 19 loss 34.642657
Iteration 20 loss 34.361045
Iteration 21 loss 34.114935
Iteration 22 loss 33.886097
Iteration 23 loss 33.649737
Iteration 24 loss 33.410080
Iteration 25 loss 33.264315
Iteration 26 loss 33.056749
Iteration 27 loss 32.889636
Iteration 28 loss 32.727215
Iteration 29 loss 32.482428
Iteration 30 loss 32.379631
Iteration 31 loss 32.270100
Iteration 32 loss 32.144203
Iteration 33 loss 31.993190
Iteration 34 loss 31.872404
Iteration 35 loss 31.725339
I

Iteration 292 loss 25.257319
Iteration 293 loss 25.238840
Iteration 294 loss 25.244575
Iteration 295 loss 25.251989
Iteration 296 loss 25.230017
Iteration 297 loss 25.217188
Iteration 298 loss 25.210241
Iteration 299 loss 25.191471
Iteration 300 loss 25.193058
Iteration 301 loss 25.150786
Iteration 302 loss 25.182610
Iteration 303 loss 25.139394
Iteration 304 loss 25.148586
Iteration 305 loss 25.140783
Iteration 306 loss 25.094792
Iteration 307 loss 25.125881
Iteration 308 loss 25.091888
Iteration 309 loss 25.080812
Iteration 310 loss 25.061782
Iteration 311 loss 25.098462
Iteration 312 loss 25.097278
Iteration 313 loss 25.058711
Iteration 314 loss 25.030671
Iteration 315 loss 25.043802
Iteration 316 loss 24.981454
Iteration 317 loss 25.006872
Iteration 318 loss 25.043896
Iteration 319 loss 25.019481
Iteration 320 loss 24.992162
Iteration 321 loss 24.949441
Iteration 322 loss 24.986290
Iteration 323 loss 24.984058
Iteration 324 loss 24.972800
Iteration 325 loss 24.962665
Iteration 326 