In [0]:
from __future__ import print_function
import collections
import mxnet as mx
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from mxnet import nd, autograd
from matplotlib import pyplot as plt
from keras.models import model_from_json, load_model 
import math

In [0]:
config = {
    "num_hidden_layers": 2,
    "num_hidden_units": 400,
    "batch_size": 128,
    "epochs": 10,
    "learning_rate": 0.001,
    "num_samples": 1,
    "pi": 0.25,
    "sigma_p": 1.0,
    "sigma_p1": 0.75,
    "sigma_p2": 0.1,
}

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

root_path = 'gdrive/My Drive/BNN_RecSys/'

df_use = pd.read_csv(root_path+"engineered_data_100.csv")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [6]:
# load embeddings model
json_file = open(root_path+'NN_embed_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(root_path+"NN_embed_model_weights.h5")
print("Loaded model from disk")

# Exctracted embeddings from pr-trained model
embeddings_prior = loaded_model.layers[2].get_weights()[0]
embeddings_user = loaded_model.layers[3].get_weights()[0]

Instructions for updating:
Colocations handled automatically by placer.
Loaded model from disk


In [7]:
# Number of product IDs available
N_products = df_use['product_id'].nunique()
N_shoppers = df_use['user_id'].nunique()
print('Unique Products:', N_products)
print('Unique_Users: ', N_shoppers)

Unique Products: 6018
Unique_Users:  100


In [0]:
def val2idx(df, cols):
    """
    Helper to index values of embedding columns
    """
    val_types = dict()
    for c in cols:
        val_types[c] = df[c].unique()

    val_to_idx = dict()

    for k, v in val_types.items():
        val_to_idx[k] = {o: i for i, o in enumerate(val_types[k])}

    for k, v in val_to_idx.items():
        df[k] = df[k].apply(lambda x: v[x]+1)

    unique_vals = dict()
    for c in cols:
        unique_vals[c] = df[c].nunique()

    return df, unique_vals, val_to_idx

In [0]:
EMBEDDING_COLUMNS = ["user_id", "product_id"]
df_deep, values, mappings = val2idx(df_use, EMBEDDING_COLUMNS)

df_deep.drop(['product_name','department', 'Unnamed: 0', 'index'], axis=1, inplace=True)

CATEGORICAL_COLUMNS = ["order_dow", "order_hour_of_day","aisle_id","department_id"]
CONTINUOUS_COLUMNS = ["days_since_prior_order","order_number","add_to_cart_order","reordered_total","product_id_orders","user_distinct_products","user_period",\
                     "user_orders","average_cart_position"]

#One-hot encoding categorical columns
df_deep = pd.get_dummies(df_deep, columns=[x for x in CATEGORICAL_COLUMNS])

#Normalising the feature columns
df_deep[CONTINUOUS_COLUMNS] = MinMaxScaler().fit_transform(df_deep[CONTINUOUS_COLUMNS].values)

y = df_deep.reordered.values
df_deep.drop(['reordered'], axis=1, inplace = True)

In [16]:
df_deep.shape[1]

198

In [0]:
X_train, X_test, y_train, y_test = train_test_split(df_deep, y, test_size=0.20, random_state=42, stratify=y)

In [0]:
num_inputs = df_deep.shape[1]
num_outputs = 1
batch_size = config['batch_size']                           

In [0]:
def relu(X):
    return nd.maximum(X, nd.zeros_like(X))

In [0]:
# Neural network modeling
num_layers = config['num_hidden_layers']

# define function for evaluating MLP
def net(X, layer_params):
    layer_input = X
    for i in range(len(layer_params) // 2 - 2):
        h_linear = nd.dot(layer_input, layer_params[2*i]) + layer_params[2*i + 1]
        layer_input = relu(h_linear)
    # last layer without ReLU
    output = nd.dot(layer_input, layer_params[-2]) + layer_params[-1]
    return output

# define network weight shapes
layer_param_shapes = []
num_hidden = config['num_hidden_units']
for i in range(num_layers + 1):
    if i == 0: # input layer
        W_shape = (num_inputs, num_hidden)
        b_shape = (num_hidden,)
    elif i == num_layers: # last layer
        W_shape = (num_hidden, num_outputs)
        b_shape = (num_outputs,)
    else: # hidden layers
        W_shape = (num_hidden, num_hidden)
        b_shape = (num_hidden,)
    layer_param_shapes.extend([W_shape, b_shape])

In [0]:
# Likelihood
def log_softmax_likelihood(yhat_linear, y):
    return nd.nansum(y * nd.log_softmax(yhat_linear), axis=0, exclude=True)

In [0]:
# Defining Gaussian priors

LOG2PI = np.log(2.0 * np.pi)

def log_gaussian(x, mu, sigma):
    return -0.5 * LOG2PI - nd.log(sigma) - (x - mu) ** 2 / (2 * sigma ** 2)

def gaussian_prior(x):
    sigma_p = nd.array([config['sigma_p']], ctx=ctx)

    return nd.sum(log_gaussian(x, 0., sigma_p))
  
## Alternate prior: scale mixture prior

def gaussian(x, mu, sigma):
    scaling = 1.0 / nd.sqrt(2.0 * np.pi * (sigma ** 2))
    bell = nd.exp(- (x - mu) ** 2 / (2.0 * sigma ** 2))

    return scaling * bell

def scale_mixture_prior(x):
    sigma_p1 = nd.array([config['sigma_p1']], ctx=ctx)
    sigma_p2 = nd.array([config['sigma_p2']], ctx=ctx)
    pi = config['pi']

    first_gaussian = pi * gaussian(x, 0., sigma_p1)
    second_gaussian = (1 - pi) * gaussian(x, 0., sigma_p2)

    return nd.log(first_gaussian + second_gaussian)

In [0]:
# Construct loss function as ELBO
def combined_loss(output, label_one_hot, params, mus, sigmas, log_prior, log_likelihood):

    # Calculate data likelihood
    log_likelihood_sum = nd.sum(log_likelihood(output, label_one_hot))

    # Calculate prior
    log_prior_sum = sum([nd.sum(log_prior(param)) for param in params])

    # Calculate variational posterior
    log_var_posterior_sum = sum([nd.sum(log_gaussian(params[i], mus[i], sigmas[i])) for i in range(len(params))])

    # Calculate total loss
    return 1.0 / num_batches * (log_var_posterior_sum - log_prior_sum) - log_likelihood_sum

In [0]:
# Optimizer
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

In [0]:
# Evaluation metric
def evaluate_accuracy(data_iterator, net, layer_params):
    numerator = 0.
    denominator = 0.
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        output = net(data, layer_params)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
    return (numerator / denominator).asscalar()

In [0]:
# Parameter initialization
weight_scale = .1
rho_offset = -3

# initialize variational parameters; mean and variance for each weight
mus = []
rhos = []

for shape in layer_param_shapes:
    mu = nd.random_normal(shape=shape, scale=weight_scale)
    rho = rho_offset + nd.zeros(shape=shape)
    mus.append(mu)
    rhos.append(rho)

variational_params = mus + rhos

In [0]:
for param in variational_params:
    param.attach_grad()

In [0]:
# Main training loop
def sample_epsilons(param_shapes):
    epsilons = [nd.random_normal(shape=shape, loc=0., scale=1.0) for shape in param_shapes]
    return epsilons
  
def softplus(x):
    return nd.log(1. + nd.exp(x))

def transform_rhos(rhos):
    return [softplus(rho) for rho in rhos]
  
def transform_gaussian_samples(mus, sigmas, epsilons):
    samples = []
    for j in range(len(mus)):
        samples.append(mus[j] + sigmas[j] * epsilons[j])
    return samples

In [0]:
X_train.reset_index(inplace=True)


In [56]:
X_train.drop(['index'],axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [57]:
# Complete training loop

epochs = config['epochs']
learning_rate = config['learning_rate']
smoothing_constant = .01
train_acc = []
test_acc = []

for e in range(epochs):
  for i in range(int(X_train.shape[0]/32)):
    i+=1
    data = np.ndarray(X_train.loc[(i-1)*32 : i*32])
    label = y_train[(i-1)*32, i*32]
    #label_one_hot = nd.one_hot(label, 2)

    with autograd.record():
      # sample epsilons from standard normal
      epsilons = sample_epsilons(layer_param_shapes)

      # compute softplus for variance
      sigmas = transform_rhos(rhos)

      # obtain a sample from q(w|theta) by transforming the epsilons
      layer_params = transform_gaussian_samples(mus, sigmas, epsilons)

      # forward-propagate the batch
      output = net(data, layer_params)

      # calculate the loss
      loss = combined_loss(output, label, layer_params, mus, sigmas, gaussian_prior, log_softmax_likelihood)

      # backpropagate for gradient calculation
      loss.backward()

      # apply stochastic gradient descent to variational parameters
      SGD(variational_params, learning_rate)

      # calculate moving loss for monitoring convergence
      curr_loss = nd.mean(loss).asscalar()
      moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)


      test_accuracy = evaluate_accuracy(X_test, net, mus)
      train_accuracy = evaluate_accuracy(X_train, net, mus)
      train_acc.append(np.asscalar(train_accuracy))
      test_acc.append(np.asscalar(test_accuracy))
      print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
            (e, moving_loss, train_accuracy, test_accuracy))

plt.plot(train_acc)
plt.plot(test_acc)
plt.show()

ValueError: ignored

In [52]:
X_train.index

Int64Index([2004262,  585478, 2180168,  960588, 2032816, 2354607,  163371,
            1264868, 1359153, 1585117,
            ...
              88733,  504370, 1604522, 1752943,  382450, 2041371, 1531510,
              68213,   17057, 1811363],
           dtype='int64', length=1920119)