### course project

In [1]:
import numpy as np
import pandas as pd

In [7]:
pd.read_csv('ecommerce_data.csv')

Unnamed: 0,is_mobile,n_products_viewed,visit_duration,is_returning_visitor,time_of_day,user_action
0,1,0,0.657510,0,3,0
1,1,1,0.568571,0,2,1
2,1,0,0.042246,1,1,0
3,1,1,1.659793,1,1,2
4,0,1,2.014745,1,1,2
...,...,...,...,...,...,...
495,1,3,0.731594,0,0,2
496,0,0,6.368775,1,3,0
497,0,0,0.172853,1,3,0
498,1,0,0.209964,0,3,0


In [19]:
# load data from csv
def get_data():
    df = pd.read_csv('ecommerce_data.csv')
    data = df.values # convert to np matrix
    
    X = data[:,:-1] # get all but last columns
    Y = data[:,-1:] # last column => data[-1:] will give last row instead
    
    # standardize numerical variables
    X[:,1] = (X[:,1] - X[:,1].mean())/X[:,1].std() # n products viewed
    X[:,2] = (X[:,2] - X[:,2].mean())/X[:,2].std() # visit duaration
    
    # do 1 hot encoding for time of a day
    N, D = X.shape # get current shape of X (N-number of items, D-number of columns)
    X2 = np.zeros((N, D+3)) # create new matrix that has 3 more columns (there are 4 times of the day, so we add 3 columns)
    X2[:,0:(D-1)] = X[:,0:(D-1)] # copy values to new mutrix (all but last time of a day column)
    
    # option 1 with a loop
    # for i in xrange(N):
        # t = X[i,D-1] # get value at a cell with row i and column D-1 (time of a day)
        # X2[i,D-1+t] = 1 # assign 1 to a cell with row i and column with index d-1+t
    
    # option 2
    Z= np.zeros((N, 4)) # create new matrix with n rows and 4 columns
    Z[np.arange(N), X[:,D-1].astype(int)] = 1
    X2[:,-4:] = Z # assign values from Z to last 4 columns in X2
    
    return X2, Y

array([[ 1.        , -0.81697841, -0.40827769,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [ 1.        ,  0.13967078, -0.4994283 ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ],
       [ 1.        , -0.81697841, -1.03884323,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ],
       [ 1.        ,  0.13967078,  0.6189325 ,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.13967078,  0.9827116 ,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ]])

In [39]:
# function to generate binary data (original has multiple classes, select only rows with 0 and 1 target value)
def get_binary_data():
    X, Y = get_data()
    X2 = X[Y <= 1]
    Y2 = Y[Y <= 1]
    return X2, Y2

In [52]:
def softmax(a):
    expA = np.exp(a)
    return expA/expA.sum(axis=1, keepdims=True)

In [48]:
def forward(X, W1, b1, W2, b2):
    Z = np.tanh(X.dot(W1)+b1)
    Y = softmax(Z.dot(W2)+b2)
    return Y

In [58]:
def classification_rate_old(Y,P):
    n_correct = 0
    n_total = 0
    for i in range(len(Y)):
        n_total += 1
        if Y[i]==P[i]:
            n_correct +=1
    return float(n_correct)/n_total

In [59]:
def classification_rate(Y,P):
    return np.mean(Y==P)

In [73]:
X, Y = get_data()

# set random weights
M=5 # number of hidden units
D=X.shape[1] # number of features
K=len(np.unique(Y)) # number of unique values in Y (number of target classes)
# weights and biasis
W1 = np.random.randn(D, M)
b1 = np.zeros(M)
W2 = np.random.randn(M, K)
b2 = np.zeros(K)

# do predictions with random weights
p_Y_given_X = forward(X, W1, b1, W2, b2)
predictions = np.argmax(p_Y_given_X, axis=1)
c_rate_old = classification_rate_old(Y,predictions)
c_rate = classification_rate(Y[:,-1],predictions) #Y[:,-1] -> change the shape from (500,1) to (500,)

# just to test number of correct predictions
correct = (Y[:,-1]==predictions)

# output
c_rate_old, c_rate, correct.sum()

(0.18, 0.18, 90)