In [22]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [6]:
def error_check(y, h):
    m = y.shape[1]
    cost = -(1/m) * (np.sum((y * np.log(h)) + (1 - y) * np.log(1 - h)))
    return cost

In [63]:
def sigmoid(z):
    s = 1/(1 + np.exp(-z))
    return s
def sigmoid_back(dA):
    return dA * (1 - dA)
def relu(z):
    return np.where(z>=0, z, 0)
def relu_back(dA):
    return np.where(dA>=0, 1, 0)

In [84]:
def for_prop(x, y, params):
    A_prev = x
    caches = []
    for i in range(len(params)-1):
        w, b = params[i]
        z = np.dot(w, A_prev) + b
        cache = [A_prev, w, b, z]
        A_prev = relu(z)
        caches.append(cache)
    w, b = params[-1]
    z = np.dot(w, A_prev) + b
    cache = [A_prev, w, b, z]
    A_prev = sigmoid(z)
    caches.append(cache)
    
    cost = error_check(y, A_prev)
    
    return A_prev, caches, cost

In [77]:
def back_prop(A, y, caches):
    m = y.shape[1]
    gradients = []
    dA = -(np.divide(y, np.log(A)) + np.divide((1 - y), np.log(1 - A)))
    
    A_prev, w, b, z = caches[-1]
    dz = np.multiply(sigmoid_back(dA), dA)
    dw = np.dot(A_prev, dz.T)/m
    db = np.sum(dz, axis = 1, keepdims = True)/m
    
    gradients.append([dw, db])
    
    dA = np.dot(w.T, dz)/m
    
    for i in range(len(caches) - 2, -1, -1):
        A_prev, w, b, z = caches[i]
        dz = np.multiply(relu_back(dA), dA)
        dw = np.dot(dz, A_prev.T)/m
        db = np.sum(dz, axis = 1, keepdims = True)/m
        assert(w.shape == dw.shape)
        assert(b.shape == db.shape)
        
        gradients.append([dw, db])
        
        dA = np.dot(w.T, dz)/m
        
    return gradients

In [83]:
def update(x, y, params, learning_rate, epochs):
    
    costs = []
    for j in range(epochs+1):
        A, caches, cost = for_prop(x, y, params)
        gradients = back_prop(A, y, caches)
        grads = []

        for i in reversed(gradients):
            grads.append(i)

        for i in range(len(params)):
            params[i][0] = params[i][0] - (learning_rate * grads[i][0])
            params[i][1] = params[i][1] - (learning_rate * grads[i][1])
        
        if j%10==0:
            print("cost at iteration ", i, ": ", cost)
        costs.append(cost)
    return params, costs

In [14]:
def initialize(dim):
    params = []
    for i in range(1, len(dim)):
        w = np.full((dim[i], dim[i - 1]), 0.1)
        b = np.full((dim[i], 1), 0.1)
        params.append([w, b])
    return params

In [16]:
def predict(x, y, params):
    A, _, _ = for_prop(x, y, params)
    y_hat = []
    for i in range(A.shape[1]):
        if A[0, i] >= 0.5:
            y_hat.append(1)
        else:
            y_hat.append(0)
    y_hat = np.array(y_hat).reshape(1, -1)
    acc = 100 - (np.mean(np.abs(y - y_hat)) * 100)
    return acc

In [18]:
def model(trainx, trainy, testx, testy, dim, learning_rate, epochs):
    params = initialize(dim)
    
    params, costs = update(trainx, trainy, params, learning_rate, epochs)
    
    train_acc = predict(trainx, trainy, params)
    test_acc = predict(testx, testy, params)
    
    pf = {'params': params,
          'costs': costs,
         'train_acc': train_acc,
         'test_acc': test_acc}
    return pf

In [59]:
with open('2d_dataset_8.csv', 'r') as file:
    df = pd.read_csv(file)
a = df.alpha.values.reshape(1, -1)
b = df.beta.values.reshape(1, -1)
min_a = np.min(a)
max_a = np.max(a)
min_b = np.min(b)
max_b = np.max(b)
a = (a - min_a)/(max_a - min_a)
b = (b - min_b)/(max_b - min_b)
y = df.y.values.reshape(1, -1)
x1 = []
y1 = []
x2 = []
y2 = []
for i in range(y.shape[1]):
    if y[0, i]==1:
        x1.append(a[0, i])
        y1.append(b[0, i])
    else:
        x2.append(a[0, i])
        y2.append(b[0, i])
x = np.array([df.alpha.values, df.beta.values], dtype = float).T
y = df.y.values.reshape(-1, 1)
trainx, testx, trainy, testy = train_test_split(x, y, test_size = 0.1, random_state = 1)
trainx = trainx.T
testx = testx.T
trainy = trainy.reshape(1, -1)
testy = testy.reshape(1, -1)

In [85]:
pf = model(trainx, trainy, testx, testy, [trainx.shape[0], 3, 1], 1, 100)

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
cost at iteration  1 :  nan
