#### Modeling and Analysis 

In [9]:
import pandas as pd
import numpy as np
from fpgrowth_py import fpgrowth
import pyfpgrowth
import random
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

In [62]:
# reading aisles dataset
aisles = pd.read_csv('data/aisles.csv')

In [63]:
# reading department dataset
department = pd.read_csv('data/departments.csv')

In [115]:
p = 0.10
all_orders = pd.read_csv('data/orders_train_prior.csv', header = 0, skiprows = lambda i: i>0 and random.random() > p)

In [114]:
all_orders_full = pd.read_csv('data/orders_train_prior.csv')

In [97]:
morning_orders = pd.read_csv('data/morning_orders.csv')

In [182]:
daytime_orders = pd.read_csv('data/day_orders.csv')

In [183]:
evening_orders = pd.read_csv('data/evening_orders.csv')

**Morning Orders**

In [167]:
# group products by order_id into a df
df_morning = morning_orders.groupby('order_id')['product_name'].agg([('product_name', ','.join)]).reset_index()

In [168]:
# change order_id column into an index
df_morning.set_index('order_id', inplace = True)

In [169]:
df_morning_split = df_morning.product_name.apply(lambda x: x.split(','))

In [172]:
df_morning_split.head()

order_id
174              [organic garlic, island vanilla cereal]
217                     [organic short grain brown rice]
521                                       [spring water]
598                                [breaded fish sticks]
750    [multi-seeds cracker, wheat gluten free waffle...
Name: product_name, dtype: object

In [173]:
df_morning_orders = pd.DataFrame(df_morning_split)

In [174]:
df_morning_orders.head()

Unnamed: 0_level_0,product_name
order_id,Unnamed: 1_level_1
174,"[organic garlic, island vanilla cereal]"
217,[organic short grain brown rice]
521,[spring water]
598,[breaded fish sticks]
750,"[multi-seeds cracker, wheat gluten free waffle..."


In [175]:
df_morning.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16460 entries, 174 to 3421049
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   product_name  16460 non-null  object
dtypes: object(1)
memory usage: 257.2+ KB


In [176]:
df_morning_orders.shape

(16460, 1)

In [177]:
patterns = pyfpgrowth.find_frequent_patterns(df_morning_orders['product_name'], 100)

In [178]:
patterns

{('clementines',): 101,
 ('honeycrisp apple',): 103,
 ('organic lemon',): 107,
 ('organic gala apples',): 110,
 ('organic zucchini',): 111,
 ('organic garlic',): 113,
 ('organic yellow onion',): 119,
 ('organic cucumber',): 123,
 ('seedless red grapes',): 124,
 ('organic blueberries',): 126,
 ('limes',): 150,
 ('organic whole milk',): 153,
 ('organic raspberries',): 165,
 ('strawberries',): 167,
 ('large lemon',): 184,
 ('organic avocado',): 188,
 ('organic hass avocado',): 211,
 ('organic baby spinach',): 261,
 ('organic strawberries',): 286,
 ('bag of organic bananas',): 498,
 ('banana',): 541}

In [179]:
rules = pyfpgrowth.generate_association_rules(patterns, 0.001)

In [180]:
rules

{}

**Daytime orders**

In [184]:
# group products by order_id into a df
df_daytime = daytime_orders.groupby('order_id')['product_name'].agg([('product_name', ','.join)]).reset_index()

In [185]:
# change order_id column into an index
df_daytime.set_index('order_id', inplace = True)

In [186]:
df_daytime_split = df_daytime.product_name.apply(lambda x: x.split(','))

In [188]:
df_daytime_split.head()

order_id
254                               [organic strawberries]
360                                  [shiitake mushroom]
470                         [sparkling water grapefruit]
729             [special k chocolatey strawberry cereal]
762    [organic strawberries, celery hearts, organic ...
Name: product_name, dtype: object

In [189]:
df_daytime_orders = pd.DataFrame(df_daytime_split)

In [190]:
df_daytime_orders.shape

(29208, 1)

In [197]:
patterns_daytime = pyfpgrowth.find_frequent_patterns(df_daytime_orders['product_name'], 100)

In [198]:
patterns_daytime

{('uncured genoa salami',): 101,
 ('spring water',): 102,
 ('unsweetened almondmilk',): 102,
 ('bunched cilantro',): 103,
 ('roma tomato',): 103,
 ('jalapeno peppers',): 106,
 (' bunch',): 106,
 ('organic navel orange',): 107,
 ('red vine tomato',): 108,
 ('organic italian parsley bunch',): 108,
 ('blueberries',): 108,
 ('organic tomato cluster',): 112,
 ('organic unsweetened almond milk',): 113,
 ('hass avocados',): 113,
 ('lime sparkling water',): 114,
 ('organic red bell pepper',): 115,
 ('organic granny smith apple',): 116,
 ('boneless skinless chicken breasts',): 124,
 ('organic garnet sweet potato (yam)',): 125,
 ('organic peeled whole baby carrots',): 127,
 ('original hummus',): 127,
 ('raspberries',): 129,
 ('half & half',): 133,
 ('100% whole wheat bread',): 134,
 ('clementines',): 135,
 ('michigan organic kale',): 135,
 ('organic blackberries',): 138,
 ('green bell pepper',): 139,
 ('broccoli crown',): 141,
 ('fresh cauliflower',): 142,
 ('organic gala apples',): 145,
 ('red 

In [202]:
rules = pyfpgrowth.generate_association_rules(patterns_daytime, 0.001)

In [203]:
rules

{}

**Evening Orders**

In [205]:
# group products by order_id into a df
df_evening = evening_orders.groupby('order_id')['product_name'].agg([('product_name', ','.join)]).reset_index()

In [206]:
# change order_id column into an index
df_evening.set_index('order_id', inplace = True)

In [207]:
df_evening_split = df_evening.product_name.apply(lambda x: x.split(','))

In [208]:
df_evening_split.head()

order_id
527     [organic jalapeno pepper, flat fillets anchovies]
1700    [organic classic rich crackers, strawberry fla...
2032               [raspberry blackberry sparkling water]
2458    [frozen organic wild blueberries, organic lemo...
2468    [original/ranch/bbq/chile picante con limon va...
Name: product_name, dtype: object

In [209]:
df_evening_orders = pd.DataFrame(df_evening_split)

In [210]:
df_evening_orders.shape

(7704, 1)

In [211]:
patterns_evening = pyfpgrowth.find_frequent_patterns(df_evening_orders['product_name'], 100)

In [212]:
patterns_daytime

{('uncured genoa salami',): 101,
 ('spring water',): 102,
 ('unsweetened almondmilk',): 102,
 ('bunched cilantro',): 103,
 ('roma tomato',): 103,
 ('jalapeno peppers',): 106,
 (' bunch',): 106,
 ('organic navel orange',): 107,
 ('red vine tomato',): 108,
 ('organic italian parsley bunch',): 108,
 ('blueberries',): 108,
 ('organic tomato cluster',): 112,
 ('organic unsweetened almond milk',): 113,
 ('hass avocados',): 113,
 ('lime sparkling water',): 114,
 ('organic red bell pepper',): 115,
 ('organic granny smith apple',): 116,
 ('boneless skinless chicken breasts',): 124,
 ('organic garnet sweet potato (yam)',): 125,
 ('organic peeled whole baby carrots',): 127,
 ('original hummus',): 127,
 ('raspberries',): 129,
 ('half & half',): 133,
 ('100% whole wheat bread',): 134,
 ('clementines',): 135,
 ('michigan organic kale',): 135,
 ('organic blackberries',): 138,
 ('green bell pepper',): 139,
 ('broccoli crown',): 141,
 ('fresh cauliflower',): 142,
 ('organic gala apples',): 145,
 ('red 

In [213]:
rules = pyfpgrowth.generate_association_rules(patterns_evening, 0.001)

In [214]:
rules

{}

**All Orders**

In [151]:
# group products by order_id into a df
df_all_orders = all_orders_full.groupby('order_id')['product_name'].agg([('product_name', ','.join)]).reset_index()

In [154]:
# reset order_id as index
df_all_orders.set_index('order_id', inplace=True)

In [155]:
df_orders_split = df_all_orders.product_name.apply(lambda x: x.split(','))

In [156]:
df_orders = pd.DataFrame(df_orders_split)

In [201]:
df_orders.head(2)

Unnamed: 0_level_0,product_name
order_id,Unnamed: 1_level_1
1,"[organic celery hearts, organic 4% milk fat wh..."
4,"[tiny twists pretzels, chewy 25% low sugar cho..."


In [158]:
df_orders.shape

(1305846, 1)

In [224]:
patterns_all = pyfpgrowth.find_frequent_patterns(df_orders['product_name'], 8_000)

In [217]:
patterns_all

{('organic blueberries',): 10014,
 ('organic yellow onion',): 10097,
 ('organic whole milk',): 11818,
 ('organic raspberries',): 12473,
 ('limes',): 12947,
 ('strawberries',): 13526,
 ('large lemon',): 15634,
 ('organic avocado',): 16188,
 ('organic hass avocado',): 17740,
 ('organic baby spinach',): 21851,
 ('organic strawberries',): 23968,
 ('bag of organic bananas',): 34462,
 ('banana',): 42736}

In [222]:
rules = pyfpgrowth.generate_association_rules(patterns_all, 0.01)

In [223]:
rules

{}