<b> Data Mining Home Work 4 </b>
 

<i>:: importing dependency libraries</i>

In [61]:
import time
import numpy as np
import matplotlib.pyplot as plt


In [62]:
import cloudpickle as pickle
mnist23 = pickle.load( open( "./datasets/mnist23.data", "rb" ) )
from sklearn.decomposition import PCA

<i>:: initializing training and testing data</i>

In [63]:
training_samples = 10000


In [91]:
def loadTrainingData():
    train_x = mnist23.data[:training_samples] 
    train_y = np.array([mnist23.target[:training_samples]])
    return train_x,train_y

In [92]:
def loadValidationData():
    validation_x = mnist23.data[training_samples:]
    validation_y = np.array([mnist23.target[training_samples:]]) 
    return validation_x,validation_y

<i> :: normalizing the data </i>

In [93]:
def normalize(X,y):
    mean = X.mean(axis=0)

    norm_x =  (X - mean) / 255 
    
    y = y - 2
    
    return norm_x,y

<i> :: define forward activation functions </i>

In [67]:
def sigmoid(Z):

    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

def relu(Z):

    A = np.maximum(0,Z)
    
    assert(A.shape == Z.shape)
    
    cache = Z 
    return A, cache

<i> :: define backward activation function</i>

In [68]:
def relu_backward(dA, cache):

    Z = cache
    dZ = np.array(dA, copy=True)

    dZ[Z <= 0] = 0
    
    return dZ

def sigmoid_backward(dA, cache):

    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    return dZ

<i> :: initialize network parameters -- weights and bias</i>

In [69]:
def initialize_parameters_deep(layers_dims):
    
    np.random.seed(3)
    parameters = {}
    L = len(layers_dims) - 1 
     
    for l in range(1, L + 1):
        parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])
        parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
        
    return parameters

<i> :: forward propagation based on computation graph </i>

In [70]:
def linear_forward(A, W, b):

    Z = W.dot(A) + b
    cache = (A, W, b)
    
    return Z, cache

def linear_activation_forward(A_prev, W, b, activation):
    
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    
    cache = (linear_cache, activation_cache)

    return A, cache

def L_model_forward(X, parameters):

    caches = []
    A = X
    L = len(parameters) // 2                  # number of layers in the neural network

    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
    caches.append(cache)
            
    return AL, caches

<i> :: backward propagation based on computation graph </i>

In [71]:
def linear_backward(dZ, cache,lambd=0.65):

    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = 1./m * np.dot(dZ,A_prev.T)  + (lambd * W) / m
    db = 1./m * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T,dZ)
    
    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

def L_model_backward(AL, Y, caches):

    grads = {}
    L = len(caches) 
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) 
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[L-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, activation = "relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

<i>:: Computing cost for the network</i>

In [72]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    cost = np.squeeze(cost)  
    return cost


<i> :: updating weights after each iteration </i>

In [73]:
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
        
    return parameters

<i>:: L layer neural network model function</i>

In [74]:
def L_layer_model(X, Y, layers_dims, learning_rate=0.085, num_iterations=5000, print_cost=False): 
    
    np.random.seed(1)
    costs = []                         

    parameters = initialize_parameters_deep(layers_dims)

    for i in range(0, num_iterations):

        AL, caches = L_model_forward(X, parameters)

        cost = compute_cost(AL, Y)

        grads = L_model_backward(AL, Y, caches)

        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" % (i, cost))
            costs.append(cost)
    return parameters

<i>:: given features, predict y', and return its accuracy </i>

In [90]:
def predict(X, y, parameters):

    m = X.shape[1]
    n = len(parameters) // 2 # number of layers in the neural network
    p = np.zeros((1, m),dtype=int)
    
    # Forward propagation
    probas, caches = L_model_forward(X, parameters)

    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    accuracy = np.sum(p == y)/float(m)
    
    p = p + 2
    return p,accuracy

<h2><b>:: Training section with prediction </b></h2>

In [98]:
train_x, train_y = loadTrainingData()

train_x, train_y = normalize(train_x,train_y)

pca = PCA(n_components=250)

pca.fit(train_x)

# applying pca (dimensionality reduction) to data 
train_x = pca.transform(train_x)

train_x = train_x.T

# define number of hidden layers and number of activation units for each layer
layers_dims = [train_x.shape[0], 20, 7, 3, 1]

parameters = L_layer_model(train_x,train_y , layers_dims, num_iterations=300, print_cost=True)

#predict on training data.... should be close to 100%
training_prediction = predict(train_x, train_y, parameters)

training_accuracy = training_prediction[1]

print("Training data accuracy :", str(training_accuracy))

Cost after iteration 0: 0.829173
Cost after iteration 100: 0.101265
Cost after iteration 200: 0.067833
Accuracy: 0.983
Training data accuracy : 0.983


<h2><b> Validating the given parameters </b></h2>


In [82]:
validation_x, validation_y = loadValidationData()

validation_x, validation_y = normalize(validation_x,validation_y)

# applying pca (dimensionality reduction) to data 
validation_x = pca.transform(validation_x)

validation_x = validation_x.T

validation_prediction = predict(validation_x, validation_y, parameters)

validation_accuracy = validation_prediction[1]

print("Validation data accuracy :", str(validation_accuracy))

Accuracy: 0.9824727617243013


<h2> <b> Predicting on Test data  </b> </h2>

<i> :: Loading test data </i>

In [83]:
def loadTestData():
    mnist23_test = pickle.load( open( "./datasets/mnist23.data", "rb" ) )
    test_x = mnist23.data
    test_y = np.array([mnist23.target])
    return test_x,test_y

<i> :: predicting result for the test data </i>

In [97]:
test_x, test_y = loadTestData()

test_x, test_y = normalize(test_x,test_y)

# applying pca (dimensionality reduction) to data 
test_x = pca.transform(test_x)

test_x = test_x.T

test_prediction = predict(test_x, test_y, parameters)

test_accuracy = test_prediction[1]

print("Test data accuracy :", str(test_accuracy))

Accuracy: 0.9829081000743126
Training data accuracy : [[2 2 3 ... 3 2 3]]
