In [1]:
import numpy as np

In [2]:
mean = 0
variance = 0
def normalizeData(data):
    n_test = data.shape[1]
    global mean, variance
    mean = (1/n_test) * np.sum(data, dtype = float, axis = 1, keepdims=1)
    variance = (1/n_test) * np.sum(np.square(data),axis =1, keepdims = 1)
    variance = np.sqrt(variance)
    data-=mean
    data/=variance
    return data

In [3]:
import csv
def loadTrainingData():
    file = open("framingham.csv")
    data = csv.reader(file)
    features=[]
    output=[]
    trainingData=[]
    trainingOutput=[]
    testData=[]
    testOutput=[]
    for row in data:
        features.append(row[:-1])
        output.append(row[-1:])
    output = output[1:]
    features = features[1:]
    output = np.array(output, dtype="float")
    features = np.array(features, dtype="float")
    output=output.T
    features=features.T
    features = normalizeData(features)
    trainingData = features
    trainingOutput = output
    dataArray=[trainingData, trainingOutput]
    return dataArray



In [4]:
def loadTestData():
    file = open("framinghamTest.csv")
    data = csv.reader(file)
    features=[]
    output=[]
    trainingData=[]
    trainingOutput=[]
    testData=[]
    testOutput=[]
    for row in data:
        features.append(row[:-1])
        output.append(row[-1:])
    output = output[1:]
    features = features[1:]
    output = np.array(output, dtype="float")
    features = np.array(features, dtype="float")
    output=output.T
    features=features.T
    global mean,variance
    features = (features-mean)/variance
    testData = features
    testOutput = output
    dataArray=[testData, testOutput]
    return dataArray

In [5]:
def paramsDefine(nx,nh,ny):
    params = dict()
    params["W1"] = np.random.randn(nh, nx) 
    params["b1"] = np.zeros((nh,1))
    params["W2"] = np.random.randn(ny, nh) 
    params["b2"] = np.zeros((ny,1))
    return params

In [6]:
def activationFunction(Z,activation = "relu"):
    if activation == "relu":
        A = np.maximum(0,Z)
    elif activation == "sigmoid":
        A = 1/(1+np.exp(-Z))
    return A

In [7]:
def forwardProp(params, Data):
    # print(Data)
    Z1 = np.dot(params["W1"], Data) + params["b1"]
    A1 = activationFunction(Z1, activation = "relu")
    Z2 = np.dot(params["W2"], A1) + params["b2"]
    A2 = activationFunction(Z2, activation = "sigmoid")
    outputArray = {"Z1": Z1, "Z2":Z2, "A1":A1, "A2":A2}
    return outputArray

In [8]:
def findCost(A2, output):
    n_test = A2.shape[1]
    cost = np.sum(np.multiply(output,np.log(A2)) + np.multiply((1-output),np.log(1-A2)), axis=1)*(-1/n_test)
    cost = np.squeeze(cost)
    return cost

In [9]:
def backwardProps(outputArray, output, params, data):
    # print(outputArray["A2"].shape == output.shape)
    dZ2 = np.subtract(outputArray["A2"], output)
    dA1 = np.dot(params["W2"].T, dZ2)
    dZ1 = np.array(dA1, copy=True)
    dZ1[outputArray["Z1"] <= 0] = 0
    dZ1 = np.multiply(dA1, dZ1)
    n_test = output.shape[1]
    dW2 = np.dot(dZ2, outputArray["A1"].T)*(1/n_test)
    db2 = np.sum(dZ2, axis=1, keepdims = 1)* (1/n_test)
    dW1 = np.dot(dZ1, data.T)*(1/n_test)
    db1 = np.sum(dZ1, axis=1, keepdims = 1) * (1/n_test)
    grads = {"dW1": dW1, "db1": db1, "dW2":dW2, "db2":db2}
    return grads

In [10]:
def updateParams(params, grads, learningRate):
    params["W1"] = params["W1"] - learningRate * grads["dW1"]
    params["b1"] = params["b1"] - learningRate * grads["db1"]
    params["W2"] = params["W2"] - learningRate * grads["dW2"]
    params["b2"] = params["b2"] - learningRate * grads["db2"]
    return params

In [20]:

def trainNetwork(learningRate):
    dataArray = loadTrainingData()
    trainingData = np.array(dataArray[0])
    trainingOutput = np.array(dataArray[1])
    nx = trainingData.shape[0]
    nh = 4
    ny = trainingOutput.shape[0]
    params = paramsDefine(nx, nh, ny)
    costs = []
    # print(trainingOutput.shape)
    for i in range(10000):
        outputArray = forwardProp(params, trainingData)
        cost = findCost(outputArray["A2"], trainingOutput)
        costs.append(cost)
        grads = backwardProps(outputArray, trainingOutput, params, trainingData)
        params = updateParams(params, grads, learningRate)
        if(i%100 == 0):
            print("The cost at "+ str(i) + "th interation is " + str(cost))
        if(i%1500==0):
            learningRate/=1.1
    return params
        

In [21]:
def predict(params):
    dataArray = loadTestData()
    data = np.array(dataArray[0])
    output = np.array(dataArray[1])
    predictions = forwardProp(params, data)
    predictions = predictions["A2"]
    predictions = predictions > 0.5
    print ('Accuracy: %f' % float((np.dot(output,predictions.T) + np.dot(1-output,1-predictions.T))/float(output.size)*100) + '%')

In [22]:
params = trainNetwork(0.01)

The cost at 0th interation is 1.2552067633521868
The cost at 100th interation is 0.7789070471111881
The cost at 200th interation is 0.6256645824684304
The cost at 300th interation is 0.5687513071822307
The cost at 400th interation is 0.5354177354819976
The cost at 500th interation is 0.510184900258135
The cost at 600th interation is 0.4900385712787102
The cost at 700th interation is 0.47359774223201184
The cost at 800th interation is 0.46006546312437196
The cost at 900th interation is 0.44898768107992076
The cost at 1000th interation is 0.4401088113504651
The cost at 1100th interation is 0.4332156152394692
The cost at 1200th interation is 0.42799766539147704
The cost at 1300th interation is 0.42405861636629244
The cost at 1400th interation is 0.42103974170936703
The cost at 1500th interation is 0.41868355241795885
The cost at 1600th interation is 0.41696963533690684
The cost at 1700th interation is 0.415574071532884
The cost at 1800th interation is 0.4144298188628197
The cost at 1900th

In [23]:
predict(params)

[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.
  1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1.
  0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0.
  0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 