#### Modeling and Analysis 

In [7]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import random

In [8]:
# reading aisles dataset
aisles = pd.read_csv('data/aisles.csv')

In [9]:
# reading department dataset
department = pd.read_csv('data/departments.csv')

In [22]:
p = 0.05
all_orders = pd.read_csv('data/all_orders.csv', header = 0, skiprows = lambda i: i>0 and random.random() > p)

In [23]:
all_orders.head(2)

Unnamed: 0,product_id,product_name,aisle_id,department_id,order_id,add_to_cart_order,reordered,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,quantity,aisle,department
0,19123,Mini Lemon Pie Coco-Roons,61,19,2111270,15,1,55877,train,21,0,15,26.0,1.0,cookies cakes,snacks
1,25177,Sweet Toffee Milano Slices,61,19,2131981,34,0,95547,train,26,5,10,7.0,1.0,cookies cakes,snacks


In [24]:
# create basket of products per order_id
basket = (all_orders
        .groupby(['order_id', 'product_name'])['quantity']
        .sum().unstack().reset_index().fillna(0)
        .set_index('order_id'))

In [25]:
basket.shape

(6059, 3685)

In [30]:
# show a subset of columns
basket.iloc[:, [0, 1, 2, 3,4, 5, 6, 7]].head()

product_name,0% Fat Organic Greek Vanilla Yogurt,0% Fat Strawberry Greek Yogurt,0% Greek Strained Yogurt,0% Greek Yogurt Black Cherry on the Bottom,1 Apple + 1 Mango Fruit Bar,1 Apple + 1 Pear Fruit Bar,1% Low Fat Milk,1% Lowfat Milk
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
456,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
775,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

In [32]:
basket_sets = basket.applymap(encode_units)

In [33]:
basket_sets.head()

product_name,0% Fat Organic Greek Vanilla Yogurt,0% Fat Strawberry Greek Yogurt,0% Greek Strained Yogurt,0% Greek Yogurt Black Cherry on the Bottom,1 Apple + 1 Mango Fruit Bar,1 Apple + 1 Pear Fruit Bar,1% Low Fat Milk,1% Lowfat Milk,1% Milkfat Low Fat Buttermilk,1/3 Less Fat Cream Cheese,...,Zero Go-Go Mixed Berry Vitamin Water,Zero Ion4 Grape Sports Drink,Zucchini Noodles,Zucchini Soufflé,Zucchini Squash,"\""Mokaccino\"" Milk + Blue Bottle Coffee Chocolate",gel hand wash sea minerals,smartwater® Electrolyte Enhanced Water,with Olive Oil Mayonnaise Dressing,with a Splash of Mango Coconut Water
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
275,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
456,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
775,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1143,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
frequent_itemsets = apriori(basket_sets, min_support = 0.01, use_colnames = True)

In [35]:
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.013369,(Bag of Organic Bananas)
1,0.015514,(Banana)


In [40]:
# Create the rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.01)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


In [38]:
rules[ (rules['lift'] >= 6) &
       (rules['confidence'] >= 0.8) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
