In [1]:
# import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# load the dataset
df = pd.read_csv(".\Datasets\ecommerce_data.csv", encoding="UTF-8")

In [3]:
# Normalize the n_products_viewed and visit_duration columns
df["n_products_viewed"] = (df["n_products_viewed"] - df["n_products_viewed"].mean()) / df["n_products_viewed"].std()
df["visit_duration"] = (df["visit_duration"] - df["visit_duration"].mean()) / df["visit_duration"].std()

In [4]:
# input and output features
X = df.iloc[:,:-1].to_numpy()
Y = df.iloc[:,-1].to_numpy()

In [5]:
# Apply one hot encoding to "time_of_day" column
V = np.zeros((X.shape[0], (X[:,-1].max()+1).astype(np.int32)))

for i,j in enumerate(X[:,-1]):
    V[i,int(j)] = 1
    
X = np.concatenate((X[:,:-1].reshape(X.shape[0],X.shape[1]-1),V), axis=1)

In [6]:
# Apply one hot encoding to Y and take as T
T = np.zeros((Y.shape[0], np.unique(Y).shape[0]))

for i,j in enumerate(Y):
    T[i,j] = 1

In [21]:
def oneHotEncode(y):
    # Target index matrix from Ytrain
    T = np.zeros((y.shape[0], np.unique(y).shape[0]))

    for i,val in enumerate(y):
        T[i,val] = 1

    return T

In [22]:
def nonlinearDT(z, func):
    if func == "sigmoid":
        return z*(1-z)
    if func == "tanh":
        return (1-z**2)

In [7]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [15]:
def tanh(z):
    return (np.exp(z)-np.exp(-z)) / (np.exp(z)+np.exp(-z))

In [14]:
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

In [16]:
def cost(t,y):
    return -np.mean(t*np.log(y))

In [12]:
def predict(y):
    return np.argmax(y, axis=1)

In [13]:
def accuracy(y, yhat):
    return np.mean(y==yhat)

In [17]:
def forwardProp(X,T,h_layers,lr,activation_func,W,B,A,n,random_state):

    D = X.shape[1] # N.Input nodes
    K = T.shape[1] # N.Output nodes
    L = len(h_layers) # N.Hidden Layers
    np.random.seed(random_state) # Seed

    for i in range(L+1): # including output layer
        if i==0: # At First Layer
            if n==0:
                W[i] = np.random.randn(D,h_layers[i])
                B[i]  = np.random.randn(h_layers[i])
            Z = np.dot(X, W[i]) + B[i]
            A[i] = activation_func(Z)
            
        elif i==L: # At Last Layer
            if n==0:
                W[i] = np.random.randn(h_layers[i-1], K)
                B[i] = np.random.randn(K)
            Z = np.dot(A[i-1], W[i]) + B[i]
            A[i] = softmax(Z) # Multiclass Classification

        else: # At Middle Layers
            if n==0:
                W[i] = np.random.randn(h_layers[i-1], h_layers[i])
                B[i] = np.random.randn(h_layers[i])
            Z = np.dot(A[i-1], W[i]) + B[i]
            A[i] = activation_func(Z)
            
    return W,B,A

In [18]:
def backProp(X,T,h_layers,lr,activation_func,W,B,A):
    
    Y = A[list(A.keys())[-1]] # Predicted Output
    L = len(h_layers) # N.Hidden Layers
    error, delta = {}, {}
    
    for i in range(L,-1,-1):
        if i==L: # At Last Layer
            error[i] = T-Y
            delta[i] = error[i] # softmax layer
            W[i] += lr*np.dot(A[i-1].T, delta[i])
            B[i] += lr*np.sum(delta[i], axis=0)
            #print(B[i])
        elif i==0: # At First Layer
            error[i] = np.dot(delta[i+1], W[i+1].T)
            delta[i] = error[i]*nonlinearDT(A[i], activation_func.__name__)
            W[i] += lr*np.dot(X.T, delta[i])
            B[i] += lr*np.sum(delta[i], axis=0)
            
        else: # At Middle Layers
            error[i] = np.dot(delta[i+1], W[i+1].T)
            delta[i] = error[i]*nonlinearDT(A[i], activation_func.__name__)
            W[i] += lr*np.dot(A[i-1].T, delta[i])
            B[i] += lr*np.sum(delta[i], axis=0)
            
    return W, B, Y

In [19]:
def fit(X, Y, h_layers=[2], lr=0.01, activation_func=sigmoid, iterations=100, random_state=1):
    
    T = oneHotEncode(Y) # Apply One hot encoding
    W, B, A = {}, {}, {} # weights, biases and outputs at each layers
    c = [] # Cost
    
    for n in range(iterations):
        W, B, A = forwardProp(X,T,h_layers,lr,activation_func,W,B,A,n,random_state)
        W, B, Y = backProp(X,T,h_layers,lr,activation_func,W,B,A)
        c.append(cost(T,Y))
    
    Yhat = predict(Y) # Final Prediction for Training Data
    
    return c,Yhat

In [23]:
# Train DNN
C, Yhat = fit(X, Y, h_layers=[5], lr=0.001, activation_func=tanh, iterations=10000, random_state=1)

In [24]:
# accuracy
results = accuracy(Y, Yhat)
print("Accuracy: ", str(results*100))

Accuracy:  99.8
