In [2]:
import pandas as pd
import theano
import pymc3 as pm
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [3]:
df = pd.read_csv('/Users/BharathiSrinivasan/Documents/GitHub/Thesis/data_final.csv')

In [4]:
df.drop(['prd_reorder_freq'], axis=1,inplace = True)

In [5]:
print(df.isnull().sum().sum())

0


In [6]:
CATEGORICAL_COLUMNS = ["order_dow", "order_hour_of_day","aisle_id","department_id"]
CONTINUOUS_COLUMNS = ["days_since_prior_order","order_number","add_to_cart_order", \
                     "user_period","user_distinct_products", \
                     "user_average_basket","product_id_orders"]

In [8]:
#One-hot encoding categorical columns
df = pd.get_dummies(df, columns=[x for x in CATEGORICAL_COLUMNS])

In [10]:
#Normalising the feature columns
df[CONTINUOUS_COLUMNS] = preprocessing.MinMaxScaler().fit_transform(df[CONTINUOUS_COLUMNS].values)

In [12]:
df.drop(['product_name','department','product_id','user_id'], axis=1, inplace=True)

In [13]:
y = df.reordered.values

In [14]:
df.drop(['reordered'], axis=1, inplace = True)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.20, random_state=42, stratify=y)

In [18]:
X_train = np.array(X_train, dtype = theano.config.floatX)
y_train = np.array(y_train, dtype = theano.config.floatX)
X_test = np.array(X_test, dtype = theano.config.floatX)
y_test = np.array(y_test, dtype = theano.config.floatX)

In [20]:
def construct_nn(ann_input, ann_output):

  n_hidden = 100
    
    # Initialize random weights between each layer
  init_1 = np.random.randn(df.shape[1], n_hidden).astype(theano.config.floatX)
  init_2 = np.random.randn(n_hidden, n_hidden).astype(theano.config.floatX)
  init_3 = np.random.randn(n_hidden, n_hidden).astype(theano.config.floatX)
  init_out = np.random.randn(n_hidden).astype(theano.config.floatX)
  
  with pm.Model() as neural_network:
        # Weights from input to hidden layer
        weights_in_1 = pm.Normal('w_in_1', 0, sd=1, 
                                 shape=(df.shape[1], n_hidden), 
                                 testval=init_1)
        
        # Weights from 1st to 2nd layer
        weights_1_2 = pm.Normal('w_1_2', 0, sd=1, 
                                shape=(n_hidden, n_hidden), 
                                testval=init_2)
        
        # Weights from 2nd to 3rd layer
        weights_2_3 = pm.Normal('w_2_3', 0, sd=1, 
                                shape=(n_hidden, n_hidden), 
                                testval=init_3)
        
        # Weights from hidden layer to output
        weights_3_out = pm.Normal('w_3_out', 0, sd=1, 
                                  shape=(n_hidden,), 
                                  testval=init_out)
        
        # Build neural-network using tanh activation function
        act_1 = pm.math.tanh(pm.math.dot(ann_input, 
                                         weights_in_1))
        act_2 = pm.math.tanh(pm.math.dot(act_1, 
                                         weights_1_2))
        act_3 = pm.math.tanh(pm.math.dot(act_2, 
                                         weights_2_3))
        act_out = pm.math.dot(act_3, weights_3_out)
        
        # Binary classification -> Bernoulli likelihood
        out = pm.Normal('out', 
                           act_out,
                           observed=ann_output,
                           total_size=y_train.shape[0] # IMPORTANT for minibatches
                          )
        return neural_network

  
# Trick: Turn inputs and outputs into shared variables. 
# It's still the same thing, but we can later change the values of the shared variable 
# (to switch in the test-data later) and pymc3 will just use the new data. 
# Kind-of like a pointer we can redirect.
# For more info, see: http://deeplearning.net/software/theano/library/compile/shared.html
ann_input = theano.shared(X_train)
ann_output = theano.shared(y_train)
neural_network = construct_nn(ann_input, ann_output)

# Usually we would add a constant b to the inputs but I omitted it here to keep the code cleaner.

In [21]:
from pymc3.theanof import set_tt_rng, MRG_RandomStreams
set_tt_rng(MRG_RandomStreams(42))

In [23]:
%%time

with neural_network:
    inference = pm.ADVI()
    approx = pm.fit(n=10, method=inference)

  0%|          | 0/10 [00:00<?, ?it/s]
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [None]:
trace = approx.sample(draws=1000)

plt.plot(-inference.hist)
plt.ylabel('ELBO')
plt.xlabel('iteration');

In [None]:
# Replace arrays our NN references with the test data
ann_input.set_value(X_test)
ann_output.set_value(y_test)

with neural_network:
    ppc = pm.sample_ppc(trace, samples=200, progressbar=False)

# Use probability of > 0.5 to assume prediction of class 1
pred = ppc['out'].mean(axis=0)