In [2]:
import numpy as np
import pandas as pd
import math
import random
import sys
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [3]:
data=pd.read_csv('diabetes.csv')

In [4]:
data_x,data_y=data.drop('Outcome',axis=1).values, data['Outcome'].values

In [7]:
data_x_scaled = preprocessing.scale(data_x)

In [187]:
print(data_x.shape)
print(data_x.T.shape)

(768, 8)
(8, 768)


In [6]:
X_train=data_x_scaled.T
y_train=data_y.reshape(1,768)

### Cross Entropy loss

In [190]:
def compute_loss(Y, Y_hat):

    m = Y.shape[1]
    L = -(1./m) * ( np.sum( np.multiply(np.log(Y_hat),Y) ) + np.sum( np.multiply(np.log(1-Y_hat),(1-Y)) ) )

    return L

In [341]:
X = X_train
Y = y_train

n_x = X.shape[0]
n_h = 2*n_x
learning_rate = 0.2


In [388]:
def neuralnet(activation,derivative,epochs):
    np.random.seed(101)
    W1 = np.random.randn(n_h, n_x)*np.sqrt(2/n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(1, n_h)*np.sqrt(2/n_h)
    b2 = np.zeros((1, 1))
    act=activation
    der=derivative

    for i in range(epochs):

        Z1 = np.matmul(W1, X) + b1
        A1 = act(Z1)
        Z2 = np.matmul(W2, A1) + b2
        A2 = sigmoid(Z2)

        cost = compute_loss(Y, A2)

        dZ2 = A2-Y
        dW2 = (1./m) * np.matmul(dZ2, A1.T)
        db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.matmul(W2.T, dZ2)
        dZ1 = dA1* der(Z1)
        dW1 = (1./m) * np.matmul(dZ1, X.T)
        db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

        if i % 1000 == 0:
            print("Epoch", i, "cost: ", cost)

    print("Final cost:", cost)
    
    Z1 = np.matmul(W1, X_train) + b1
    A1 = act(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)

    predictions = (A2>.5)[0,:]
    labels = (y_train == 1)[0,:]
    print(confusion_matrix(predictions, labels))
    print(classification_report(predictions, labels))
    print('Accuracy =' , accuracy_score(labels, predictions))

In [374]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s
def der_sigmoid(z):
    s= sigmoid(z) * (1 - sigmoid(z))
    return s

In [391]:
%%time
neuralnet(sigmoid,der_sigmoid,20000)


Epoch 0 cost:  0.8868684619934128
Epoch 1000 cost:  0.4551600128917266
Epoch 2000 cost:  0.4431393976503132
Epoch 3000 cost:  0.43379838536746596
Epoch 4000 cost:  0.4230327577937586
Epoch 5000 cost:  0.4126611329780337
Epoch 6000 cost:  0.40258432490284823
Epoch 7000 cost:  0.39402331804922147
Epoch 8000 cost:  0.3868843960357709
Epoch 9000 cost:  0.3807793727494434
Epoch 10000 cost:  0.3751514105273869
Epoch 11000 cost:  0.3695615070263004
Epoch 12000 cost:  0.3631341400157991
Epoch 13000 cost:  0.355139142501943
Epoch 14000 cost:  0.34733460144953404
Epoch 15000 cost:  0.3393775976685996
Epoch 16000 cost:  0.3310747968737648
Epoch 17000 cost:  0.32299151124682945
Epoch 18000 cost:  0.3164564818463449
Epoch 19000 cost:  0.3108649551218648
Final cost: 0.3057873769621363
[[456  61]
 [ 44 207]]
             precision    recall  f1-score   support

      False       0.91      0.88      0.90       517
       True       0.77      0.82      0.80       251

avg / total       0.87      0.86  

### Piecewise linear function (k=4)

In [345]:
from numpy import vectorize
def piecewise_linear_4(x):
    if x<=-2:
        y=0
    elif ((x>-2)&(x<0)):
        y=(x/4)-2
    elif ((x>0) & (x<2)):
        y=(x/4)+1/2
    elif x>=2:
        y=1
    return y
pwl_4 = vectorize(piecewise_linear_4)

#derivative definition
def piecewise_linear_4_der(x):
    if x<=-2:
        y=0
    elif ((x>-2)&(x<0)):
        y=1/4
    elif ((x>0) & (x<2)):
        y=1/4
    elif x>=2:
        y=0
    return y
pwl_4_der= vectorize(piecewise_linear_4_der)

In [392]:
%%time
neuralnet(pwl_4,pwl_4_der,20000)

Epoch 0 cost:  1.0596049100656253
Epoch 1000 cost:  0.48093041738419684
Epoch 2000 cost:  0.47360109819481067
Epoch 3000 cost:  0.4750396982184158
Epoch 4000 cost:  0.47869013062753557
Epoch 5000 cost:  0.47808341813847643
Epoch 6000 cost:  0.47947331028856877
Epoch 7000 cost:  0.47858176370035865
Epoch 8000 cost:  0.48037103662268466
Epoch 9000 cost:  0.4801357434682837
Epoch 10000 cost:  0.47821915780084756
Epoch 11000 cost:  0.4753717204268402
Epoch 12000 cost:  0.47508101683690285
Epoch 13000 cost:  0.4742212189970463
Epoch 14000 cost:  0.4741272734710815
Epoch 15000 cost:  0.4769135703477567
Epoch 16000 cost:  0.4759321649501502
Epoch 17000 cost:  0.4731783132726015
Epoch 18000 cost:  0.47471363757723595
Epoch 19000 cost:  0.47982315624374855
Final cost: 0.4795374635658467
[[437 103]
 [ 63 165]]
             precision    recall  f1-score   support

      False       0.87      0.81      0.84       540
       True       0.62      0.72      0.67       228

avg / total       0.80     

### Piecewise linear function (k=6)

In [377]:
def piecewise_linear_6(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-2)):
        y=0.05*x+0.2
    elif ((x>-2) & (x<=0)):
        y=0.20*x+0.5
    elif ((x>0) & (x<=2)):
        y=0.20*x+0.5
    elif ((x>2) & (x<=4)):
        y=0.05*x+0.8
    elif x>4:
        y=1
    return y
pwl_6 = vectorize(piecewise_linear_6)
def piecewise_linear_6_der(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-2)):
        y=0.05
    elif ((x>-2) & (x<=0)):
        y=0.20
    elif ((x>0) & (x<=2)):
        y=0.20
    elif ((x>2) & (x<=4)):
        y=0.05
    elif x>4:
        y=0
    return y
pwl_6_der= vectorize(piecewise_linear_6_der)

In [395]:
%%time
neuralnet(pwl_6,pwl_6_der,20000)

Epoch 0 cost:  0.8845800900977527
Epoch 1000 cost:  0.45668748796353487
Epoch 2000 cost:  0.4472940349761423
Epoch 3000 cost:  0.4404678840499182
Epoch 4000 cost:  0.43131146801038267
Epoch 5000 cost:  0.4202072601093601
Epoch 6000 cost:  0.4127197498529249
Epoch 7000 cost:  0.4064536825779002
Epoch 8000 cost:  0.40076633994874694
Epoch 9000 cost:  0.39675079577064404
Epoch 10000 cost:  0.39378217360506074
Epoch 11000 cost:  0.39140454235925937
Epoch 12000 cost:  0.3896018433524664
Epoch 13000 cost:  0.3882110799127636
Epoch 14000 cost:  0.38652334504557695
Epoch 15000 cost:  0.38507816425153757
Epoch 16000 cost:  0.3832563283439386
Epoch 17000 cost:  0.38097974817047975
Epoch 18000 cost:  0.37817352736390186
Epoch 19000 cost:  0.3758050315160476
Final cost: 0.37420467615110076
[[445  80]
 [ 55 188]]
             precision    recall  f1-score   support

      False       0.89      0.85      0.87       525
       True       0.70      0.77      0.74       243

avg / total       0.83     

### Piecewise linear function (k=8)

In [379]:
def piecewise_linear_8(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-2.6)):
        y=0.05*x+0.2
    elif ((x>-2.6) & (x<=-1.3)):
        y=0.1*x+0.35
    elif ((x>-1.3) & (x<=0)):
        y=0.22*x+0.5
    elif ((x>0) & (x<=1.3)):
        y=0.22*x+0.5
    elif ((x>1.3) & (x<=2.6)):
        y=0.1*x+0.7
    elif ((x>2.6) & (x<=4)):
        y=0.06*x+0.7
    elif x>4:
        y=1
    return y
pwl_8 = vectorize(piecewise_linear_8)

def piecewise_linear_8_der(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-2.6)):
        y=0.05
    elif ((x>-2.6) & (x<=-1.3)):
        y=0.1
    elif ((x>-1.3) & (x<=0)):
        y=0.22
    elif ((x>0) & (x<=1.3)):
        y=0.22
    elif ((x>1.3) & (x<=2.6)):
        y=0.1
    elif ((x>2.6) & (x<=4)):
        y=0.06
    elif x>4:
        y=0
    return y
pwl_8_der= vectorize(piecewise_linear_8_der)

In [396]:
%%time
neuralnet(pwl_8,pwl_8_der,20000)

Epoch 0 cost:  0.885528880531426
Epoch 1000 cost:  0.4577393012712097
Epoch 2000 cost:  0.44590035553928165
Epoch 3000 cost:  0.43600020959032554
Epoch 4000 cost:  0.4272419684688217
Epoch 5000 cost:  0.41861171536510666
Epoch 6000 cost:  0.41179963882483006
Epoch 7000 cost:  0.4056843564232922
Epoch 8000 cost:  0.3995863378263771
Epoch 9000 cost:  0.3937643426443278
Epoch 10000 cost:  0.3877496132620789
Epoch 11000 cost:  0.3849123071664967
Epoch 12000 cost:  0.3832507410670068
Epoch 13000 cost:  0.3798992077859069
Epoch 14000 cost:  0.37728004575979157
Epoch 15000 cost:  0.37390513436699835
Epoch 16000 cost:  0.3675005252092878
Epoch 17000 cost:  0.3636962789294785
Epoch 18000 cost:  0.3602305731935407
Epoch 19000 cost:  0.35802023689807744
Final cost: 0.35687949558350684
[[448  70]
 [ 52 198]]
             precision    recall  f1-score   support

      False       0.90      0.86      0.88       518
       True       0.74      0.79      0.76       250

avg / total       0.84      0.8

### Piecewise linear function (k=10)

In [381]:
def piecewise_linear_10(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3)):
        y= 0.0470*x + 0.18
    elif ((x>-3) & (x<=-2)):
        y= 0.0720*x + 0.26
    elif ((x>-2) & (x<=-1)):
        y= 0.1490*x + 0.41
    elif ((x>-1) & (x<=0)):
        y=0.2320*x + 0.5
    elif ((x>0) & (x<=1)):
        y= 0.2300*x + 0.5
    elif ((x>1) & (x<=2)):
        y= 0.1500*x + 0.58
    elif ((x>2) & (x<=3)):
        y=0.0700*x + 0.74
    elif ((x>3) & (x<=4)):
        y=0.050*x + 0.8
    elif x>4:
        y=1
    return y
pwl_10 = vectorize(piecewise_linear_10)

def piecewise_linear_10_der(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3)):
        y= 0.0470
    elif ((x>-3) & (x<=-2)):
        y= 0.0720
    elif ((x>-2) & (x<=-1)):
        y= 0.1490
    elif ((x>-1) & (x<=0)):
        y=0.2320
    elif ((x>0) & (x<=1)):
        y= 0.2300
    elif ((x>1) & (x<=2)):
        y= 0.1500
    elif ((x>2) & (x<=3)):
        y=0.0700
    elif ((x>3) & (x<=4)):
        y=0.050
    elif x>4:
        y=0
    return y
pwl_10_der= vectorize(piecewise_linear_10_der)

In [397]:
%%time
neuralnet(pwl_10,pwl_10_der,20000)

Epoch 0 cost:  0.8858123004631412
Epoch 1000 cost:  0.45605690205512406
Epoch 2000 cost:  0.4445590631573435
Epoch 3000 cost:  0.4337201536802002
Epoch 4000 cost:  0.4230871808324803
Epoch 5000 cost:  0.41587594796870225
Epoch 6000 cost:  0.40988587814111593
Epoch 7000 cost:  0.40528206115550686
Epoch 8000 cost:  0.40126895059329715
Epoch 9000 cost:  0.39715026310669505
Epoch 10000 cost:  0.39294127883835844
Epoch 11000 cost:  0.38943064738912875
Epoch 12000 cost:  0.38538741192259307
Epoch 13000 cost:  0.3819462800740605
Epoch 14000 cost:  0.37830605505415793
Epoch 15000 cost:  0.37464583574960786
Epoch 16000 cost:  0.36998047484500085
Epoch 17000 cost:  0.3636173443827898
Epoch 18000 cost:  0.35770375716542924
Epoch 19000 cost:  0.3516327188058102
Final cost: 0.34580556002841367
[[446  74]
 [ 54 194]]
             precision    recall  f1-score   support

      False       0.89      0.86      0.87       520
       True       0.72      0.78      0.75       248

avg / total       0.84  

### Piecewise linear function (k=12)

In [383]:
def piecewise_linear_12(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3.2)):
        y= 0.0500*x + 0.2
    elif ((x>-3.2) & (x<=-2.4)):
        y=0.0500*x + 0.2
    elif ((x>-2.4) & (x<=-1.6)):
        y=0.1*x + 0.32
    elif ((x>-1.6) & (x<=-0.8)):
        y=0.175*x + 0.44
    elif ((x>-0.8) & (x<=0)):
        y=0.22*x + 0.5
    elif ((x>0) & (x<=0.8)):
        y=0.23*x + 0.5
    elif ((x>0.8) & (x<=1.6)):
        y=0.1250*x + 0.6
    elif ((x>1.6) & (x<=2.4)):
        y= 0.1000*x + 0.67
    elif ((x>2.4) & (x<=3.2)):
        y= 0.0625*x + 0.76
    elif ((x>3.2) & (x<=4)):
        y=0.05000*x + 0.8
    elif x>4:
        y=1
    return y
pwl_12 = vectorize(piecewise_linear_12)

def piecewise_linear_12_der(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3.2)):
        y= 0.0500
    elif ((x>-3.2) & (x<=-2.4)):
        y=0.0500
    elif ((x>-2.4) & (x<=-1.6)):
        y=0.1
    elif ((x>-1.6) & (x<=-0.8)):
        y=0.175
    elif ((x>-0.8) & (x<=0)):
        y=0.22
    elif ((x>0) & (x<=0.8)):
        y=0.23
    elif ((x>0.8) & (x<=1.6)):
        y=0.1250
    elif ((x>1.6) & (x<=2.4)):
        y= 0.1000
    elif ((x>2.4) & (x<=3.2)):
        y= 0.0625
    elif ((x>3.2) & (x<=4)):
        y=0.05000
    elif x>4:
        y=0
    return y
pwl_12_der= vectorize(piecewise_linear_12_der)

In [398]:
%%time
neuralnet(pwl_12,pwl_12_der,20000)

Epoch 0 cost:  0.8853227618385362
Epoch 1000 cost:  0.4578301327718933
Epoch 2000 cost:  0.4466649404017021
Epoch 3000 cost:  0.43750692110803346
Epoch 4000 cost:  0.4288830796774297
Epoch 5000 cost:  0.42056331987016526
Epoch 6000 cost:  0.4121170259154292
Epoch 7000 cost:  0.405692214997865
Epoch 8000 cost:  0.3985543392393636
Epoch 9000 cost:  0.39140146326363806
Epoch 10000 cost:  0.386552223116022
Epoch 11000 cost:  0.3834075073049397
Epoch 12000 cost:  0.3805250341993179
Epoch 13000 cost:  0.37807657085207114
Epoch 14000 cost:  0.37609876184433794
Epoch 15000 cost:  0.37457657217177254
Epoch 16000 cost:  0.3727741334225094
Epoch 17000 cost:  0.3700109458545325
Epoch 18000 cost:  0.3680779861216945
Epoch 19000 cost:  0.3661408775073516
Final cost: 0.3638665612245389
[[449  81]
 [ 51 187]]
             precision    recall  f1-score   support

      False       0.90      0.85      0.87       530
       True       0.70      0.79      0.74       238

avg / total       0.84      0.83  

### Piecewise linear function (k=14)

In [385]:
def piecewise_linear_14(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3.3)):
        y= 0.05*x + 0.2
    elif ((x>-3.3) & (x<=-2.64)):
        y= 0.04697*x + 0.19
    elif ((x>-2.64) & (x<=-1.98)):
        y= 0.08333*x + 0.28
    elif ((x>-1.98) & (x<=-1.32)):
        y= 0.1348*x + 0.38
    elif ((x>-1.32) & (x<=-0.66)):
        y=0.1970*x + 0.47
    elif ((x>-0.66) & (x<=0)):
        y=0.2424*x + 0.5
    elif ((x>0) & (x<=0.66)):
        y=0.2273*x + 0.5
    elif ((x>0.66) & (x<=1.32)):
        y= 0.1970*x + 0.5
    elif ((x>1.32) & (x<=1.98)):
        y=0.1364*x + 0.6
    elif ((x>1.98) & (x<=2.64)):
        y=0.09091*x + 0.69
    elif ((x>2.64) & (x<=3.3)):
        y= 0.04545*x + 0.8
    elif ((x>3.3) & (x<=4)):
        y=0.05714*x + 0.77
    elif x>4:
        y=1
    return y
pwl_14 = vectorize(piecewise_linear_14)

def piecewise_linear_14_der(x):
    if x<=-4:
        y=0
    elif ((x>-4)&(x<=-3.3)):
        y= 0.05
    elif ((x>-3.3) & (x<=-2.64)):
        y= 0.04697
    elif ((x>-2.64) & (x<=-1.98)):
        y= 0.08333
    elif ((x>-1.98) & (x<=-1.32)):
        y= 0.1348
    elif ((x>-1.32) & (x<=-0.66)):
        y=0.1970
    elif ((x>-0.66) & (x<=0)):
        y=0.2424
    elif ((x>0) & (x<=0.66)):
        y=0.2273
    elif ((x>0.66) & (x<=1.32)):
        y= 0.1970
    elif ((x>1.32) & (x<=1.98)):
        y=0.1364
    elif ((x>1.98) & (x<=2.64)):
        y=0.09091
    elif ((x>2.64) & (x<=3.3)):
        y= 0.04545
    elif ((x>3.3) & (x<=4)):
        y=0.05714
    elif x>4:
        y=0
    return y
pwl_14_der= vectorize(piecewise_linear_14_der)

In [399]:
%%time
neuralnet(pwl_14,pwl_14_der,20000)

Epoch 0 cost:  0.8827934639783532
Epoch 1000 cost:  0.4558364304298477
Epoch 2000 cost:  0.4453638610000218
Epoch 3000 cost:  0.4361698046572098
Epoch 4000 cost:  0.42631015509825604
Epoch 5000 cost:  0.4170864096260227
Epoch 6000 cost:  0.4088598596413058
Epoch 7000 cost:  0.40306864332237113
Epoch 8000 cost:  0.3981013672689292
Epoch 9000 cost:  0.3929929482852894
Epoch 10000 cost:  0.3884021990992073
Epoch 11000 cost:  0.38418157729846253
Epoch 12000 cost:  0.38095774576871283
Epoch 13000 cost:  0.3774859505408159
Epoch 14000 cost:  0.37446981952514946
Epoch 15000 cost:  0.3717662552614881
Epoch 16000 cost:  0.3688984227237718
Epoch 17000 cost:  0.3652597168303922
Epoch 18000 cost:  0.3613616381484808
Epoch 19000 cost:  0.3564310378748857
Final cost: 0.3519905505345262
[[452  71]
 [ 48 197]]
             precision    recall  f1-score   support

      False       0.90      0.86      0.88       523
       True       0.74      0.80      0.77       245

avg / total       0.85      0.85 

## Results on training set only
This is an implementation of a single hidden layer neural network d-2d-1 with d inputs neurons, 2d hidden layer neurons and 1 output neuron. 
I have trained the model on PIMA Indians Diabetes dataset(Link:-https://www.kaggle.com/uciml/pima-indians-diabetes-database) from UCI Machine learning repository. The accuracy and metrics reported are training set accuracy only.

After 20000 epochs, the following results(accuracy) were obtained:-
1. Sigmoid activation- 0.86328125
2. Piecewise Linear Fucntion activation (k=4) - 0.7838
3. Piecewise Linear Fucntion activation (k=6) - 0.82421875
4. Piecewise Linear Fucntion activation (k=8) - 0.8411
5. Piecewise Linear Fucntion activation (k=10) - 0.8333
6. Piecewise Linear Fucntion activation (k=12) - 0.828125
7. Piecewise Linear Fucntion activation (k=14) - 0.8450

Cost after 20,000 epochs is the following:- 
1. Sigmoid activation- 0.305
2. Piecewise Linear Fucntion activation (k=4) - 0.479
3. Piecewise Linear Fucntion activation (k=6) - 0.374204
4. Piecewise Linear Fucntion activation (k=8) - 0.3568
5. Piecewise Linear Fucntion activation (k=10) - 0.3458
6. Piecewise Linear Fucntion activation (k=12) - 0.3638
7. Piecewise Linear Fucntion activation (k=14) - 0.3519

Time taken for 20000 epochs training 
1. Sigmoid activation- 10.7 s
2. Piecewise Linear Fucntion activation (k=4) - 4min 20s
3. Piecewise Linear Fucntion activation (k=6) - 5min 32s
4. Piecewise Linear Fucntion activation (k=8) - 5min 33s
5. Piecewise Linear Fucntion activation (k=10) - 7min 22s
6. Piecewise Linear Fucntion activation (k=12) - 6min 56s 
7. Piecewise Linear Fucntion activation (k=14) - 8min 3s

# With Test set

In [403]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(data_x_scaled, data_y, test_size=0.20, random_state=42)
X_train_1= X_train_1.T
y_train_1= y_train_1.reshape(1,614)
X_test_1 =X_test_1.T
y_test_1 =y_test_1.reshape(1,154)


In [404]:
def neuralnet_1(activation,derivative,epochs):
    X = X_train_1
    Y = y_train_1
    np.random.seed(101)
    W1 = np.random.randn(n_h, n_x)*np.sqrt(2/n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(1, n_h)*np.sqrt(2/n_h)
    b2 = np.zeros((1, 1))
    act=activation
    der=derivative

    for i in range(epochs):

        Z1 = np.matmul(W1, X) + b1
        A1 = act(Z1)
        Z2 = np.matmul(W2, A1) + b2
        A2 = sigmoid(Z2)

        cost = compute_loss(Y, A2)

        dZ2 = A2-Y
        dW2 = (1./m) * np.matmul(dZ2, A1.T)
        db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.matmul(W2.T, dZ2)
        dZ1 = dA1* der(Z1)
        dW1 = (1./m) * np.matmul(dZ1, X.T)
        db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

        if i % 1000 == 0:
            print("Epoch", i, "cost: ", cost)

    print("Final cost:", cost)
    
    Z1 = np.matmul(W1, X_test_1) + b1
    A1 = act(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)

    predictions = (A2>.5)[0,:]
    labels = (y_test_1 == 1)[0,:]
    print(confusion_matrix(predictions, labels))
    print(classification_report(predictions, labels))
    print('Accuracy =' , accuracy_score(labels, predictions))

### Sigmoid hidden layer

In [412]:
%%time
neuralnet_1(sigmoid,der_sigmoid,20000)

Epoch 0 cost:  0.8826177564744794
Epoch 1000 cost:  0.45170029298052283
Epoch 2000 cost:  0.4385031317000155
Epoch 3000 cost:  0.42970420312527746
Epoch 4000 cost:  0.4193740501574361
Epoch 5000 cost:  0.409145913610257
Epoch 6000 cost:  0.4000822375231552
Epoch 7000 cost:  0.3921565649483339
Epoch 8000 cost:  0.38529754319909293
Epoch 9000 cost:  0.37923638037672813
Epoch 10000 cost:  0.3738248510785002
Epoch 11000 cost:  0.3690010746886034
Epoch 12000 cost:  0.36468415355047584
Epoch 13000 cost:  0.3605253038592752
Epoch 14000 cost:  0.3557673360323362
Epoch 15000 cost:  0.3495705478068931
Epoch 16000 cost:  0.34210663100888056
Epoch 17000 cost:  0.33360668381259523
Epoch 18000 cost:  0.3249612049049551
Epoch 19000 cost:  0.31668150971719067
Final cost: 0.30871103609999945
[[78 24]
 [21 31]]
             precision    recall  f1-score   support

      False       0.79      0.76      0.78       102
       True       0.56      0.60      0.58        52

avg / total       0.71      0.71  

### Piecewise linear function (k=4)

In [413]:
%%time
neuralnet_1(pwl_4,pwl_4_der,20000)

Epoch 0 cost:  0.8818873746538853
Epoch 1000 cost:  0.5096882101814671
Epoch 2000 cost:  0.5096881562640755
Epoch 3000 cost:  0.5096881561844885
Epoch 4000 cost:  0.509688156184371
Epoch 5000 cost:  0.5096881561843708
Epoch 6000 cost:  0.5096881561843708
Epoch 7000 cost:  0.5096881561843708
Epoch 8000 cost:  0.5096881561843708
Epoch 9000 cost:  0.5096881561843708
Epoch 10000 cost:  0.5096881561843708
Epoch 11000 cost:  0.5096881561843708
Epoch 12000 cost:  0.5096881561843708
Epoch 13000 cost:  0.5096881561843708
Epoch 14000 cost:  0.5096881561843708
Epoch 15000 cost:  0.5096881561843708
Epoch 16000 cost:  0.5096881561843708
Epoch 17000 cost:  0.5096881561843708
Epoch 18000 cost:  0.5096881561843708
Epoch 19000 cost:  0.5096881561843708
Final cost: 0.5096881561843708
[[71 23]
 [28 32]]
             precision    recall  f1-score   support

      False       0.72      0.76      0.74        94
       True       0.58      0.53      0.56        60

avg / total       0.66      0.67      0.67 

### Piecewise linear function (k=6)

In [414]:
%%time
neuralnet_1(pwl_6,pwl_6_der,20000)

Epoch 0 cost:  0.8805200707831126
Epoch 1000 cost:  0.45205530532456817
Epoch 2000 cost:  0.441360584862105
Epoch 3000 cost:  0.43400240477653873
Epoch 4000 cost:  0.4272213577895457
Epoch 5000 cost:  0.41921008514503216
Epoch 6000 cost:  0.4107145709930113
Epoch 7000 cost:  0.40203857847085617
Epoch 8000 cost:  0.3932722540553957
Epoch 9000 cost:  0.38600584709985003
Epoch 10000 cost:  0.38036655240216477
Epoch 11000 cost:  0.3760641168854932
Epoch 12000 cost:  0.37245558127917394
Epoch 13000 cost:  0.3696482375747014
Epoch 14000 cost:  0.3663774690733062
Epoch 15000 cost:  0.36395092358440573
Epoch 16000 cost:  0.36219071463497976
Epoch 17000 cost:  0.3604252934497303
Epoch 18000 cost:  0.35902947223199083
Epoch 19000 cost:  0.35772572399515473
Final cost: 0.3562666164489959
[[79 20]
 [20 35]]
             precision    recall  f1-score   support

      False       0.80      0.80      0.80        99
       True       0.64      0.64      0.64        55

avg / total       0.74      0.74

### Piecewise linear function (k=8)

In [415]:
%%time
neuralnet_1(pwl_8,pwl_8_der,20000)

Epoch 0 cost:  0.8814912052405025
Epoch 1000 cost:  0.4546171818773781
Epoch 2000 cost:  0.4399101995246463
Epoch 3000 cost:  0.4313272956257116
Epoch 4000 cost:  0.4204200115279583
Epoch 5000 cost:  0.41062982712781537
Epoch 6000 cost:  0.40168202038228784
Epoch 7000 cost:  0.3949943873818688
Epoch 8000 cost:  0.3901877602786974
Epoch 9000 cost:  0.38465107136801735
Epoch 10000 cost:  0.3797864857167854
Epoch 11000 cost:  0.6563289977481872
Epoch 12000 cost:  0.5960015112786757
Epoch 13000 cost:  0.586713604643444
Epoch 14000 cost:  0.5825164155643267
Epoch 15000 cost:  0.5807423264372055
Epoch 16000 cost:  0.5800128807131464
Epoch 17000 cost:  0.5797007152311695
Epoch 18000 cost:  0.5795580990363449
Epoch 19000 cost:  0.5794875081016301
Final cost: 0.5794488022547432
[[72 21]
 [27 34]]
             precision    recall  f1-score   support

      False       0.73      0.77      0.75        93
       True       0.62      0.56      0.59        61

avg / total       0.68      0.69      0.

### Piecewise linear function (k=10)

In [416]:
%%time
neuralnet_1(pwl_10,pwl_10_der,20000)

Epoch 0 cost:  0.8815936010343659
Epoch 1000 cost:  0.4525293510224265
Epoch 2000 cost:  0.43937176820616497
Epoch 3000 cost:  0.43041317385226546
Epoch 4000 cost:  0.4205590734562757
Epoch 5000 cost:  0.6248654724606356
Epoch 6000 cost:  0.6121170938649125
Epoch 7000 cost:  0.6077005203980033
Epoch 8000 cost:  0.6055430816274949
Epoch 9000 cost:  0.6044051922882407
Epoch 10000 cost:  0.6037455462549146
Epoch 11000 cost:  0.6033198915740974
Epoch 12000 cost:  0.6030200097013609
Epoch 13000 cost:  0.6027953832413481
Epoch 14000 cost:  0.6026200930600106
Epoch 15000 cost:  0.6024794183862809
Epoch 16000 cost:  0.6023642231045625
Epoch 17000 cost:  0.6022684284949416
Epoch 18000 cost:  0.6021877741513265
Epoch 19000 cost:  0.6021191569414158
Final cost: 0.6020603052773791
[[28  3]
 [71 52]]
             precision    recall  f1-score   support

      False       0.28      0.90      0.43        31
       True       0.95      0.42      0.58       123

avg / total       0.81      0.52      0.

### Piecewise linear function (k=12)

In [417]:
%%time
neuralnet_1(pwl_12,pwl_12_der,20000)

Epoch 0 cost:  0.8811576177900142
Epoch 1000 cost:  0.4541869243068809
Epoch 2000 cost:  0.4409038565480759
Epoch 3000 cost:  0.4331581045766795
Epoch 4000 cost:  0.42487246710201687
Epoch 5000 cost:  0.416248694995148
Epoch 6000 cost:  0.4093766562273735
Epoch 7000 cost:  0.4023850311445431
Epoch 8000 cost:  0.39741628194357326
Epoch 9000 cost:  0.3921043213540893
Epoch 10000 cost:  0.3872863033188964
Epoch 11000 cost:  0.38383943766158835
Epoch 12000 cost:  0.38024700192491295
Epoch 13000 cost:  0.3758601939898355
Epoch 14000 cost:  0.37189778300892706
Epoch 15000 cost:  0.3685503787635239
Epoch 16000 cost:  0.3656577105454712
Epoch 17000 cost:  0.36316801419728384
Epoch 18000 cost:  0.35763463640194976
Epoch 19000 cost:  0.352755244390978
Final cost: 0.3492045974961631
[[77 19]
 [22 36]]
             precision    recall  f1-score   support

      False       0.78      0.80      0.79        96
       True       0.65      0.62      0.64        58

avg / total       0.73      0.73     

### Piecewise linear function (k=14)

In [418]:
%%time
neuralnet_1(pwl_14,pwl_14_der,20000)

Epoch 0 cost:  0.8785852067282991
Epoch 1000 cost:  0.4526077923927907
Epoch 2000 cost:  0.4403286311070453
Epoch 3000 cost:  0.4318828588635498
Epoch 4000 cost:  0.4224250485865676
Epoch 5000 cost:  0.4124234578029452
Epoch 6000 cost:  0.4050502529469068
Epoch 7000 cost:  0.39841341039899936
Epoch 8000 cost:  0.39172178853051226
Epoch 9000 cost:  0.3863757827098945
Epoch 10000 cost:  0.3804623480727383
Epoch 11000 cost:  0.3750809461901533
Epoch 12000 cost:  0.3714253340080512
Epoch 13000 cost:  0.3668899089941519
Epoch 14000 cost:  0.36324616314279035
Epoch 15000 cost:  0.3591284116917878
Epoch 16000 cost:  0.3558077463873029
Epoch 17000 cost:  0.6182622816557671
Epoch 18000 cost:  0.5928721070989912
Epoch 19000 cost:  0.5862600044838332
Final cost: 0.5832806632194709
[[82 33]
 [17 22]]
             precision    recall  f1-score   support

      False       0.83      0.71      0.77       115
       True       0.40      0.56      0.47        39

avg / total       0.72      0.68      0

# Conclusion on test set (After 20000 epochs)
After 20000 epochs, the following results(accuracy) were obtained: -
1. Sigmoid activation- 0.70
2. Piecewise Linear Function activation (k=4) - 0.66
3. Piecewise Linear Function activation (k=6) - 0.74
4. Piecewise Linear Function activation (k=8) - 0.68
5. Piecewise Linear Function activation (k=10) - 0.51
6. Piecewise Linear Function activation (k=12) - 0.73
7. Piecewise Linear Function activation (k=14) - 0.67

Cost after 20000 epochs is the following: - 
1. Sigmoid activation- 0.308
2. Piecewise Linear Function activation (k=4) - 0.50
3. Piecewise Linear Function activation (k=6) - 0.35
4. Piecewise Linear Function activation (k=8) - 0.579
5. Piecewise Linear Function activation (k=10) - 0.60
6. Piecewise Linear Function activation (k=12) - 0.34
7. Piecewise Linear Function activation (k=14) - 0.58

Time taken for 20000 epochs training 
1. Sigmoid activation- 8.87 s
2. Piecewise Linear Function activation (k=4) - 3min 36s
3. Piecewise Linear Function activation (k=6) - 4min 16s
4. Piecewise Linear Function activation (k=8) - 4min 41s
5. Piecewise Linear Function activation (k=10) - 6min 4s
6. Piecewise Linear Function activation (k=12) - 5min 44s
7. Piecewise Linear Function activation (k=14) - 6min 38s



# Conclusion
After significant training effort (20,000 epochs) we can see that training accuracy increases as we try to fit the piecewise linear function by large number of segments in this case 14. As the accuracy increases so does the computation effort which can be seen in terms of time taken by the model to complete training. In piecewise linear function the training accuracy first increases, decreases and then increases, whereas the test accuracy goes completely opposite trend. This proves that if we try to overfit the model i.e perform well on training data it might perform bad on test data or unseen data. 
Sigmoid function performs better in terms of computation and performance on test set. 
