In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc, roc_curve
from sklearn.tree import DecisionTreeClassifier, export_graphviz

In [None]:
df = pd.read_csv('data/mushroom/mushrooms.csv')
df.columns
# Encode the label
labelencoder=LabelEncoder()
for column in df.columns:
    df[column] = labelencoder.fit_transform(df[column])

In [None]:
# Preprocessing the train and test set
X_train, X_test = train_test_split(df, train_size = 0.8)
y_train = X_train.pop('class')
y_test = X_test.pop('class')
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [None]:
#Calcualte sigmoid function
def sigmoid(z):
    
    s = 1 / (1+ np.exp(-z))
    return s

In [None]:
#Initialize weights
def initialize(n_x, n_h1, n_h2, n_h3, n_y):
    W1 = np.random.uniform(-0.05,0.05, (n_h1+1,n_x+1))
    W2 = np.random.uniform(-0.05,0.05, (n_h2+1,n_h1+1))
    W3 = np.random.uniform(-0.05,0.05, (n_h3+1, n_h2+1))
    W4 = np.random.uniform(-0.05, 0.05, (n_y,n_h3+1))
    parameters = {"W1": W1,
                 "W2": W2,
                 "W3": W3,
                 "W4": W4}
    return parameters

In [None]:
#Return layer sizes
def layer_sizes(X, Y, n_h1, n_h2, n_h3):
    """
    n_x -- the size of the input layer
    n_h -- the size of the hidden layer
    n_y -- the size of the output layer
    
    """
    n_x = X
    n_h1 = n_h1
    n_h2 = n_h2
    n_h3 = n_h3
    n_y = Y
    return(n_x, n_h1, n_h2, n_h3, n_y)

In [None]:
#Forward propagation
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]
    W4 = parameters["W4"]
    
    Z1 = np.dot(W1,X)
    # Calcualate activation of output of input layer
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1)
    # Calcualate activation of output of hidden layer
    A2 = np.tanh(Z2)
    Z3 = np.dot(W3, A2)
    A3 = np.tanh(Z3)
    Z4 = np.dot(W4, A3)
    A4 = sigmoid(Z4)
    cache = {"Z1": Z1,
            "A1": A1,
            "Z2": Z2,
            "A2": A2,
            "Z3": Z3,
            "A3": A3,
            "Z4": Z4,
            "A4": A4}
    return A4, cache

In [None]:
# Back propagation
def backward_propagation(parameters, cache, X, Y, learning_rate, alpha, prev_dWkj, prev_dWji, prev_dWjj2, prev_dWjj3, n_h1, n_h2, n_h3, num_features):
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters["W3"]
    W4 = parameters["W4"]
    
    A1 = cache['A1']
    A2 = cache['A2']
    A3 = cache["A3"]
    A3 = np.reshape(A3, ((n_h3+1,1)))
    A4 = cache["A4"]
    # Create target value
    target = Y
    # Calulate error of output
    if(target==1):
        dk =  A4*(1-A4)*(0.9 - A4)
    else:
        dk =  A4*(1-A4)*(0.1 - A4)
    # dk =  A4*(1-A4)*(target - A4)
    dk = np.reshape(dk,(1,1))
    #print(dk)
    # Calulate error of hidden layer 3
    dj3 = (1-A3**2)*np.dot(W4.T,dk)
    dj3 = np.reshape(dj3,(n_h3+1,1))
    # Calculate error of hidden layer 2
    dj2 = (1- A2**2)*np.dot(W3.T,dj3).T
    dj2 = np.reshape(dj2,(n_h2+1,1))
    #Calculate error of hidden layer 1
    dj1 = (1-A1**2)*np.dot(W2.T,dj2).T
    dj1 = np.reshape(dj1,(n_h1+1,1))
    # Calulate gradient descent of hidden-to-output weight
    dWkj = learning_rate*dk*A3.T + alpha*prev_dWkj
    # Calculate gradient descent of hidden layer 2 to hidden layer 3
    dWjj3 = learning_rate*dj3*A2.T + alpha*prev_dWjj3
    # Calculate gradient descent of hidden layer 1 to hidden layer 2
    dWjj2 = learning_rate*dj2*A1.T + alpha*prev_dWjj2
    # Calulate gradient descent of input-to-hidden weight 
    dWji = learning_rate*dj1*X + alpha*(prev_dWji)
    
    
    
    
    grads = {"dWkj": dWkj,
             "dWjj3": dWjj3,
             "dWjj2": dWjj2,
             "dWji": dWji}
    return grads

In [None]:
# Update weight
def update_parameters(parameters, grads):
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    W4 = parameters['W4']
    # gradient descent of hidden-to-output weight
    dWkj = grads["dWkj"]
    # gradient descent of hidden 2 to hidden 3 weight
    dWjj3 = grads["dWjj3"]
    # gradient descent of hidden 1 to hidden 2 weight
    dWjj2 = grads["dWjj2"]
    # gradient descent of input-to-hidden weight
    dWji = grads["dWji"]
    
    # Update input-to-hidden weight
    W1 = W1 + dWji
     # Update hidden-to-hidden weight
    W2 = W2 + dWjj2
     # Update hidden-to-hidden weight
    W3 = W3 + dWjj3
    # Update hidden-to-output weight
    W4 = W4 + dWkj
    
    parameters = {"W1": W1,
                  "W2": W2,
                 "W3": W3,
                 "W4": W4}
    return parameters

In [None]:
# Predict if highest activation is match with label or not
def predict(parameters, X, Y):
    A4, cache = forward_propagation(X, parameters)
    predicted = 1 if A4 >= 0.5 else 0
    prediction = 1 if predicted == Y else 0
    return prediction

In [None]:
# Training the model
def model(train_x, train_y, test_x, test_y, n_h1, n_h2, n_h3, num_iterations, num_features):
    n_x = layer_sizes(train_x.shape[1],num_features,n_h1, n_h2, n_h3)[0]
    n_y = layer_sizes(train_x.shape[1], 1,n_h1, n_h2, n_h3)[4]
    
    # Initialize weights
    parameters = initialize(n_x, n_h1, n_h2, n_h3, n_y)
    #train_x = np.append(train_x, parameters["b1"])
    
    # Initialize prev delta_Wkj and prev delta_Wji  
    prev_dWkj= np.zeros((1,n_h3+1)) 
    prev_dWjj3 = np.zeros((n_h3+1, n_h2+1))
    prev_dWjj2 = np.zeros((n_h2+1, n_h1+1))
    prev_dWji= np.zeros((n_h1+1, num_features+1))
    
    accuracy_train = []
    accuracy_test = []
    
    confusion_matrix_train = []
    
    confusion_matrix_test = []
    
    
    # Loop through the epochs
    for i in range(0, num_iterations):
        pred_train = []
        pred_test = []
        
        pred_list_train = []
        actual_list_train = []
        
        pred_list_test = []
        actual_list_test = []
        
        
        #Training the model
        for j in range(len(train_x)):   
            actual_list_train.append(train_y[j])
            # Add bias node
            X = np.append(train_x[j], 1)
        
            A4, cache = forward_propagation(X, parameters)
            
            pred_list_train.append(A4>=0.5)
            
            predicted = predict(parameters, X,train_y[j])
        
            grads = backward_propagation(parameters, cache, X, train_y[j], 0.1, 0, prev_dWkj, prev_dWji, prev_dWjj2, prev_dWjj3, n_h1, n_h2, n_h3, num_features)
            prev_dWkj = grads["dWkj"]
            prev_dWji = grads["dWji"]
            prev_dWjj2 = grads["dWjj2"]
            prev_dWjj3 = grads["dWjj3"]
            
            parameters = update_parameters(parameters, grads)
            
            pred_train.append(predicted)
        count = 0
        for i in pred_train:
            if i == 1:
                count+=1
        accuracy_train.append(float(count)/len(train_y)*100)
        confusion_matrix_train.append(confusion_matrix(actual_list_train,pred_list_train))
        
        # Test the model with test set
        for j in range(len(test_x)):   
            actual_list_test.append(test_y[j])
            
            # Add bias node
            X = np.append(test_x[j], 1)
        
            A4, cache = forward_propagation(X, parameters)
            pred_list_test.append(A4>=0.5)
            predicted = predict(parameters, X,test_y[j])

            pred_test.append(predicted)
        count = 0
        for i in pred_test:
            if i == 1:
                count+=1
        accuracy_test.append(float(count)/len(test_y)*100)
        
        confusion_matrix_test.append(confusion_matrix(actual_list_test, pred_list_test))
                
    return accuracy_train, accuracy_test, parameters, confusion_matrix_train, confusion_matrix_test

In [None]:
acc_train, acc_test,parameters, cm_train, cm_test = model(X_train, y_train, X_test, y_test, 3, 3, 3, 50, 22)

In [None]:
plt.plot(acc_train, label = 'train')
plt.plot(acc_test, label = 'test')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.show

for i in cm_test:
    print(i)