In [1]:
%matplotlib inline
import theano
import pymc3 as pm
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.cross_validation import train_test_split

  from ._conv import register_converters as _register_converters


In [2]:
#Importing Instacart data
df_full = pd.read_csv('/Users/BharathiSrinivasan/Documents/GitHub/Thesis/merged_data.csv', index_col = False)

In [3]:
#Sampling a fraction for data for initial training
df_small = df_full.sample(frac = 0.01)
print(df_small.shape)

(338191, 14)


In [4]:
df_small.head()

Unnamed: 0.1,Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_name,aisle_id,department_id,department
16539013,16539013,1744653,39877,2,1,73422,82,0,11,3.0,Organic Granny Smith Apple,24,4,produce
16523878,16523878,1743127,11513,13,1,148689,11,1,17,30.0,1 Apple + 1 Pear Fruit Bar,3,19,snacks
26374855,26374855,2781301,34530,10,0,190467,10,2,18,29.0,Organic Chicken Thighs,7,12,meat seafood
7315877,7315877,772290,11776,2,1,124606,11,2,14,5.0,Baked Sea Salt & Vinegar Potato Chips,107,19,snacks
9419736,9419736,994375,7076,5,1,134421,27,0,9,8.0,Grain Free Chicken Formula Cat Food,41,8,pets


In [5]:
#Preparation of training data

CATEGORICAL_COLUMNS = ["order_dow", "order_hour_of_day"]
CONTINUOUS_COLUMNS = ["days_since_prior_order","order_number","add_to_cart_order"]
EMBEDDING_COLUMNS = ["user_id", "product_id","aisle_id","department_id"]

#One-hot encoding categorical columns
df_small = pd.get_dummies(df_small, columns=[x for x in CATEGORICAL_COLUMNS])

#Normalising the feature columns
df_small[CONTINUOUS_COLUMNS] = MinMaxScaler().fit_transform(df_small[CONTINUOUS_COLUMNS].values)

#Drop product names and department names for the time being! Incorporate as embedded vectors later!
df_small.drop(['product_name','department'],axis=1, inplace = True)
df_small.drop(['Unnamed: 0'], axis = 1, inplace = True)


In [6]:
#Target variable
y = df_small['reordered'].values
df_small.drop(['reordered'], axis=1, inplace = True)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df_small, y, test_size=0.25, random_state=42, stratify=y)

In [16]:
def create_BNN_model(ann_input, ann_output, n_hidden):
    
    #Initialise random weights for the layers
    init_1 = np.random.randn(X_train.shape[1], n_hidden).astype(float)
    init_2 = np.random.randn(n_hidden, n_hidden).astype(float)
    init_out = np.random.randn(n_hidden).astype(float)
    
    with pm.Model() as BNN:
        #Weights from input to layer 1
        weights1 = pm.Normal('weights1',0, sd =1, shape=(X_train.shape[1], n_hidden), testval=init_1)
        
        #Weights from layer 1 to layer 2
        weights2 = pm.Normal('weights2',0, sd =1, shape=(n_hidden, n_hidden), testval=init_2)
        
        #Weights from layer 2 to output
        weights3 = pm.Normal('weights3',0, sd =1, shape=(n_hidden,), testval=init_out) 
        
        #Building neural network with tanh activation
        act1 = pm.math.tanh(pm.math.dot(ann_input, weights1))
        act2 = pm.math.tanh(pm.math.dot(act1, weights2))
        act_out = pm.math.sigmoid(pm.math.dot(act2, weights3))
        
        #Binary classification --> Bernoulli likelihood
        out = pm.Bernoulli('out', act_out, observed = ann_output, total_size = y_train.shape[0])
        
        return BNN
    

In [17]:
ann_input = theano.shared(X_train)
ann_output = theano.shared(y_train)
neural_network = create_BNN_model(ann_input, ann_output, 100)

AsTensorError: ('Variable type field must be a TensorType.', <Generic>, <theano.gof.type.Generic object at 0x112e5e5c0>)