In [165]:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
import matplotlib.pyplot as plt

def sigmoid(z):
    return 1./(1 +np.exp(-z))

def Relu(z):
    s = np.maximum(0,z)
    return s

def initialize_parameter(layer_dims):
    L = len(layer_dims)
    parameters = {}
    for l in range(1,L):
        parameters['W'+str(l)] = np.random.rand(layer_dims[l], layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

def linear_forward(W,X,b):
    Z = np.dot(W,X) + b
    return Z

def linear_forward_activation(parameters, X, L, activation='Relu'):
    cache = {}
    cache['A0'] = X
    cache['Z0'] = np.zeros(X.shape)
    for l in range(1,L-1):
        cache['Z' + str(l)] = linear_forward(parameters['W'+str(l)], cache['A' +str(l-1)], parameters['b'+str(l)])
        if(activation == 'Relu'):
            cache['A' + str(l)] = Relu(cache['Z' + str(l)])
        else:
             cache['A' + str(l)] =  cache['Z' + str(l)]
    cache['Z' + str(L-1)] = linear_forward(parameters['W'+str(L-1)],cache['A' +str(l)],parameters['b'+str(L-1)])
    cache['A'+str(L-1)] = sigmoid(cache['Z'+str(L-1)])
    return cache


def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    
    return dZ

def sigmoid_backward(dA, cache):
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ




def compute_cost(A,Y):
    cost = np.sum((-Y*np.log(A)) - ((1-Y) * np.log(1-A)))
    cost = 1./float(Y.shape[1]) * cost
    return cost

def linear_backward_propagation(parameters, cache, Y, L, activation = 'Relu'):
    cache['dZ' + str(L-1)] = cache['A' + str(L-1)] - Y
    gradients = {}
    for l in reversed(range(1,L)):
        gradients['dw' + str(l)] = np.dot(cache['dZ' + str(l)],cache['A' + str(l-1)].T)
        gradients['db' + str(l)] = np.sum( cache['dZ' + str(l)], axis = 1, keepdims = True)  
        cache['dA' + str(l-1)] =  np.dot(parameters['W' + str(l)].T, cache['dZ' + str(l)])
        if activation == 'Relu':
            cache['dZ' + str(l-1)] = relu_backward(cache['dA' + str(l-1)],cache['Z' + str(l-1)])
        elif activation == 'sigmoid':
            cache['dZ' + str(l-1)] =  sigmoid_backward(cache['dA' + str(l-1)],cache['Z' + str(l-1)])
        elif activation == 'linear':
            cache['dZ' + str(l-1)] = np.array(cache['dA' + str(l-1)], copy=True)
    return cache,gradients

def update_parameters(parameters, L, gradient, learning_rate):
    for l in range(1,L):
        parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * gradient['dw'+str(l)]
        parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * gradient['db'+str(l)]
    return parameters

def L_model_network(layer_dims, X, Y, learning_rate, no_of_iterations, activation = 'Relu'):
    L = len(layer_dims)
    parameters = initialize_parameter(layer_dims)
    for i in range(no_of_iterations):
        cache = linear_forward_activation(parameters,X, L, activation)
        cache, gradient = linear_backward_propagation(parameters, cache, Y, L, activation)
        parameters = update_parameters(parameters, L, gradient, learning_rate)
    return parameters  
        

In [166]:
def forward_propagation(X,parameters):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']
    
    Z1 = tf.add(tf.matmul(W1,X),b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(W2,A1),b2)

    
    return Z2

def compute_cost(Y, Z2):
    logits = tf.transpose(Z2)
    labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
    return cost

def initializer_parameters():
    W1 = tf.get_variable("W1", [4,8], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [4,1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", [1,4], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", [1,1], initializer = tf.zeros_initializer())
   
    parameters = {"W1" : W1,
                 "W2": W2,
                 "b1": b1,
                 "b2": b2}
    return parameters


In [238]:
def model(train_X, train_Y, learning_rate= 0.001, n_epochs = 2000):
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape =(8,None))
    Y = tf.placeholder(tf.float32, shape=(1,None))
    parameters = initializer_parameters()
    
    Z2 = forward_propagation(X,parameters)
    
    cost = compute_cost(Y,Z2)
    optimizer =tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    final_cost = []
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        epoch_cost = 0
        for i in range(n_epochs):
            sess.run(optimizer, feed_dict={X:train_X,Y:train_Y})
            final_cost.append(cost.eval({X: train_X, Y: train_Y}))
        plt.plot(range(n_epochs),final_cost)
        plt.show()
        parameters = sess.run(parameters)
        
        correct_prediction = tf.equal(tf.nn.sigmoid(Z2),Y)
        
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: train_X, Y: train_Y}))
        
        return parameters