# Baking Strategy

In [1]:
## Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import datetime

from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [2]:
# Read in and examine the data set

transactions = pd.read_csv(r'C:\Users\jbean\Dropbox\Other\Python\Bakery\transaction_data.csv')

transactions.head()

In [4]:
transactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21293 entries, 0 to 21292
Data columns (total 4 columns):
Date           21293 non-null object
Time           21293 non-null object
Transaction    21293 non-null int64
Item           21293 non-null object
dtypes: int64(1), object(3)
memory usage: 665.5+ KB


# Data Cleaning and Manipulation

In [5]:
# Convert date to datetime object
transactions['Date'] = pd.to_datetime(transactions['Date'], format='%m/%d/%Y')

In [6]:
# Convert time column to datetime; add hour and minute column; drop Time column

transactions['Time'] = pd.to_datetime(transactions['Time'])

transactions['hour'] = transactions.Time.dt.hour
transactions['minute'] = transactions.Time.dt.minute

transactions = transactions.drop(['Time'], axis=1)

In [7]:
# Filter out missing values

transactions = transactions[transactions.Item != 'NONE']

In [9]:
def time_of_day(hour):
    """
    Determines whether a purchase was made in the morning, afternoon, or evening.
    """
    
    if hour < 12:
        
        return 'morning'
    
    elif hour < 17:
        
        return 'afternoon'
    
    else:
        
        return 'evening'

In [10]:
# Apply time of day function to 'hour' column

transactions['time_of_day'] = transactions.hour.apply(time_of_day)

In [97]:
# Add a day of week variable and a second classifier of weekday or weekend

from datetime import date
import calendar

transactions['day_of_week'] = [calendar.day_name[i.weekday()] for i in transactions['Date']]

transactions['day_type'] = ['weekend' if (i == 'Sunday') | (i == 'Saturday') else 'weekday' for i in 
                                                                            transactions['day_of_week']]

In [14]:
# Add an item count variable equal to one for future groupby operations

transactions['item_count'] = 1

# Data Exploration

In [8]:
# Look at the average number of items purchased...

avg_items = transactions.groupby(by='Transaction')['Item'].count().mean()

print('The average number of items bought by a customer is: %.2f items' %avg_items)

The average number of items bought by a customer is: 2.17 items


In [110]:
# ...and how that varies by time of day

single_transaction = pd.DataFrame(transactions.groupby(by='Transaction')['item_count'].sum())

single_transaction = single_transaction.merge(transactions[['hour','minute','time_of_day','Date', 'Transaction']]\
                                              .drop_duplicates('Transaction'), left_on=single_transaction.index, 
                                              right_on='Transaction', how='left', copy=False)

print('The average number of items by time of day: \n{0}'.format(
    single_transaction.groupby(by='time_of_day')['item_count'].mean()))

The average number of items by time of day: 
time_of_day
afternoon    2.273335
evening      1.956044
morning      2.048257
Name: item_count, dtype: float64


In [118]:
# Find the frequency of each item purchased and by time of day

item_frequencies_all = transactions.Item.value_counts()
item_frequencies_morning = transactions[transactions.time_of_day == 'morning'].Item.value_counts()
item_frequencies_afternoon = transactions[transactions.time_of_day == 'afternoon'].Item.value_counts()
item_frequencies_evening = transactions[transactions.time_of_day == 'evening'].Item.value_counts()

In [125]:
# Plot the frequency of purchase for each item

import plotly.plotly as py
import plotly.graph_objs as go

button_layer_1_height = 1.12
button_layer_2_height = 1.065

item_threshold_all = 100
item_threshold_other = 50

trace_all = go.Bar(
            x=item_frequencies_all[item_frequencies_all > item_threshold_all].index,
            y=item_frequencies_all[item_frequencies_all > item_threshold_all].values,
            visible=True,
            name='All'
    )

trace_morning = go.Bar(
            x=item_frequencies_morning[item_frequencies_morning > item_threshold_other].index,
            y=item_frequencies_morning[item_frequencies_morning > item_threshold_other].values,
            visible=False,
            name='Morning'
    )

trace_afternoon = go.Bar(
            x=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].index,
            y=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].values,
            visible=False,
            name='Afternoon'
    )

trace_evening = go.Bar(
            x=item_frequencies_evening[item_frequencies_evening > 10].index,
            y=item_frequencies_evening[item_frequencies_evening > 10].values,
            visible=False,
            name='Evening'
    )

data = [trace_all, trace_morning, trace_afternoon, trace_evening]

updatemenus = list([
    
    dict(
         buttons=list([   
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, False, False, False]},
                         {'title': 'All Day'}]),
             
            dict(label = 'Morning',
                 method = 'update',
                 args = [{'visible': [False, True, False, False]},
                         {'title': 'Morning'}]),
             
            dict(label = 'Afternoon',
                 method = 'update',
                 args = [{'visible': [False, False, True, False]},
                         {'title': 'Afternoon'}]),
             
            dict(label = 'Evening',
                 method = 'update',
                 args = [{'visible': [False, False, False, True]},
                         {'title': 'Evening'}])
        ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' )
])
                
layout = dict(title='Bakery Transactions by Time of Day', showlegend=False,
              updatemenus=updatemenus)
                
fig = dict(data=data, layout=layout)

py.iplot(fig, filename='frequencies-bar')

In [130]:
# Sales trend by date

grouped_by_date = pd.DataFrame(transactions.groupby(by='Date')['item_count'].sum())

In [136]:
grouped_by_date.head()

Unnamed: 0_level_0,item_count
Date,Unnamed: 1_level_1
2016-10-30,170
2016-10-31,199
2016-11-01,150
2016-11-02,164
2016-11-03,189


In [141]:
from plotly.grid_objs import Grid, Column
import plotly.figure_factory as ff

# Create a plot.ly-friendly table for minutes per day

table = ff.create_table(grouped_by_date)
py.iplot(table, filename='sales_per_day_bakery')

In [142]:
# Formatting for the plot.ly chart based on code adapted from an example in their gallery

number_of_items_sold = list(grouped_by_date['item_count'])

my_columns = []

for k in range(len(grouped_by_date.index) - 1):
    
    my_columns.append(Column(grouped_by_date.index[:k + 1], 'x{}'.format(k + 1)))   
    my_columns.append(Column(number_of_items_sold[:k + 1], 'y{}'.format(k + 1)))
    
grid = Grid(my_columns)
py.grid_ops.upload(grid, 'items_sold', auto_open=False)

'https://plot.ly/~jbean01/146/'

In [143]:
data=[dict(type='scatter',
           xsrc=grid.get_column_reference('x1'),
           ysrc= grid.get_column_reference('y1'),
           name='Bakery',
           mode='lines',
           line=dict(color= 'rgb(0, 153, 255)'),
           fill='tozeroy',
           fillcolor='rgba(153, 204, 255, 0.5)')]

axis=dict(ticklen=4,
          mirror=True,
          zeroline=False,
          showline=True,
          autorange=False,
          showgrid=False)

layout = dict(title='Sales per Day',
              font=dict(family='Balto'),
              showlegend=False,
              autosize=False,
              width=800,
              height=400,
              xaxis=dict(axis, **{'nticks':12, 'tickangle':-45,
                                  'range': [min(grouped_by_date.index),
                                            max(grouped_by_date.index)]}),
              yaxis=dict(axis, **{'title': 'Items Sold', 'range':[0,max(grouped_by_date.item_count)+5]}),
              updatemenus=[dict(type='buttons',
                                showactive=False,
                                y=1,
                                x=1.1,
                                xanchor='right',
                                yanchor='top',
                                pad=dict(t=0, r=10),
                                buttons=[dict(label='Play',
                                              method='animate',
                                              args=[None, dict(frame=dict(duration=50, redraw=False), 
                                                               transition=dict(duration=0),
                                                               fromcurrent=True,
                                                               mode='immediate')])])])

frames=[{'data':[{'xsrc': grid.get_column_reference('x{}'.format(k + 1)),
                  'ysrc': grid.get_column_reference('y{}'.format(k + 1))}],
         'traces': [0]
        } for k in range(len(grouped_by_date.index) - 1)]

fig=dict(data=data, layout=layout, frames=frames)
py.icreate_animations(fig, 'Sales-per-Day-Bakery')

In [147]:
weekday_sales = pd.DataFrame(transactions[transactions.day_type == 'weekday'].groupby(by='Date')['item_count'].sum())
weekend_sales = pd.DataFrame(transactions[transactions.day_type == 'weekend'].groupby(by='Date')['item_count'].sum())

In [157]:
weekday_sales.item_count.min()

47

In [162]:
# Histogram of items sold by type of day

trace_weekday = go.Histogram(
    x=weekday_sales[weekday_sales.item_count > 50].item_count,
    opacity=0.75,
    name = 'weekday',
)

trace_weekend = go.Histogram(
    x=weekend_sales[weekend_sales.item_count > 1].item_count,
    opacity=0.75,
    name = 'weekend',
    nbinsx = 12
)

data = [trace_weekday, trace_weekend]
layout = go.Layout(title = 'Distribution of Sales by Weekday / Weekend',
                   xaxis=dict(title='Number of Items Sold'),
                   yaxis=dict(title='Frequency of Sales'),
                   barmode='overlay')

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='day-type-histogram')

In [165]:
# Find the number of minutes played per day of week

merged_sales_per_day = grouped_by_date.merge(transactions[['day_of_week', 'Date']], left_index=True, right_on='Date',
                                                        how='left', validate='1:m')

merged_sales_per_day = merged_sales_per_day[~merged_sales_per_day.index.duplicated(keep='first')]

days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']

sales_per_day = []

for day in days:
    
    df_days = merged_sales_per_day[merged_sales_per_day.day_of_week == day]
    
    sales_per_day.append(list(df_days['item_count']))

In [170]:
traces = []

for i, j in enumerate(days):
    
    trace = go.Box(
    y=sales_per_day[i],
    name = j
    )
    traces.append(trace)

data = traces

layout = go.Layout(title='Sales per Day',
                  xaxis = dict(title = 'Day of Week'),
                  yaxis = dict(title = 'Number of Items Sold'))

fig = go.Figure(data = data, layout = layout)

py.iplot(fig, filename='Sales-per-day-boxplot')

In [208]:
group_by_day_and_time = transactions.groupby(by=['day_of_week','time_of_day'])['item_count'].sum()

In [213]:
group_by_day_and_time = pd.DataFrame(group_by_day_and_time).reset_index()

In [217]:
group_by_day_and_time

Unnamed: 0,day_of_week,time_of_day,item_count
0,Friday,afternoon,1705
1,Friday,evening,70
2,Friday,morning,1349
3,Monday,afternoon,1365
4,Monday,evening,44
5,Monday,morning,915
6,Saturday,afternoon,2464
7,Saturday,evening,153
8,Saturday,morning,1988
9,Sunday,afternoon,1832


In [219]:
group_by_day_and_time[group_by_day_and_time.time_of_day == 'morning'].item_count

2     1349
5      915
8     1988
11    1242
14    1079
17     923
20     908
Name: item_count, dtype: int64

In [223]:
trace_morning = go.Bar(
    y = group_by_day_and_time[group_by_day_and_time.time_of_day == 'morning'].item_count,
    x = group_by_day_and_time[group_by_day_and_time.time_of_day == 'morning'].day_of_week,
    name = "morning",
    opacity=0.6
)

trace_afternoon = go.Bar(
    y = group_by_day_and_time[group_by_day_and_time.time_of_day == 'afternoon'].item_count,
    x = group_by_day_and_time[group_by_day_and_time.time_of_day == 'afternoon'].day_of_week,
    name = "afternoon",
    opacity=0.6
)

trace_evening = go.Bar(
   y = group_by_day_and_time[group_by_day_and_time.time_of_day == 'evening'].item_count,
    x = group_by_day_and_time[group_by_day_and_time.time_of_day == 'evening'].day_of_week,
    name = "evening",
    opacity=0.6
)
    
data = [trace_morning, trace_afternoon, trace_evening]

py.iplot(data, filename='sales-by-day-time')

In [231]:
sales_by_hour = pd.DataFrame(
                transactions[(transactions.hour > 1) & (transactions.hour < 21)].groupby(by='hour')['item_count'].sum())

In [232]:
sales_by_hour

Unnamed: 0_level_0,item_count
hour,Unnamed: 1_level_1
7,24
8,645
9,1966
10,2666
11,3102
12,2854
13,2617
14,2640
15,2115
16,1343


In [254]:
trace = go.Scatter(x=list(sales_by_hour.index),
                   y=list(sales_by_hour.item_count),
                   fill = 'tozeroy',
                   fillcolor = 'lightblue')

data = [trace]

layout = dict(
    title='Total volume of sales by hour',
    yaxis = dict( title = 'Volume of Item Sales'),
    xaxis=dict( title = 'Hour',
        rangeselector=dict(
            buttons=list([
                dict(count = 12,
                     label='Morning',
                     step='hour',
                     stepmode='backward'),
                dict(count = 8,
                     label='Afternoon',
                     step='hour',
                     stepmode='backward'),
                dict(count = 4,
                    label='Evening',
                    step='hour',
                    stepmode='backward'),
                dict(step='all')
            ]),
            visible = True
        ),
        rangeslider=dict(
            visible = True
        )
    )
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = 'sales-by-hour')

# Apriori Algorithm, Method 1

In [19]:
# Create a list of lists that is conducive to the format expected from apyori

list_of_lists = []

for transaction in list(set(transactions.Transaction)):
    
    df = transactions[transactions.Transaction == transaction]
    values = df.Item.values
    
    list_of_lists.append(list(values))

In [20]:
# Create a list of association rules

from apyori import apriori

association_rules = apriori(list_of_lists, min_confidence = 0, min_support = 0.005, min_length=2)  
association_results = list(association_rules)

In [200]:
def apriori_results(min_confidence=0, min_support=0.005, sort_by_value = 'lift', min_length =None,
                    time_of_day=None, day_of_week = None, df=transactions):
    
    """
    Takes in a data frame, filters it by time of day, calculates the results of the Apriori algorithm,
    and returns a data frame sorted by lift.
    """
    
    from apyori import apriori
    
    if time_of_day != None:
        
        df = df[df.time_of_day == time_of_day]
        
    if day_of_week:
        
        df = df[df.day_type == day_of_week]
        
    list_of_lists = []

    for transaction in list(set(df.Transaction)):

        df_t = df[df.Transaction == transaction]
        values = df_t.Item.values

        list_of_lists.append(list(values))

    if min_length is not None:
        
        list_of_lists = [i for i in list_of_lists if len(i) >= min_length]
        
    association_rules = apriori(list_of_lists, min_confidence = min_confidence, min_support = min_support)  
    association_results = list(association_rules)

    item_1 = []
    item_2 = []
    support = []
    confidence = []
    lift = []

    for item in association_results:

        pair = item[0] 
        items = [x for x in pair]
        
        if min_length is not None:
            
            if len(items) < min_length:

                continue

            else:

                item_1.append(items[0])

            #try:

                item_2.append(items[1])

            #except:

            #    item_2.append('None')
            
        else:
            
            item_1.append(items[0])

            try:

                item_2.append(items[1])

            except:

                item_2.append('None')

        support.append(item[1])

        confidence.append(item[2][0][2])
        lift.append(item[2][0][3])

    df_apriori = pd.DataFrame({'item_1':item_1, 'item_2':item_2, 'support':support,'confidence':confidence,'lift':lift})
    
    df_apriori = df_apriori.sort_values(by=sort_by_value, ascending=False)
    
    return df_apriori

In [174]:
all_day_results = apriori_results()

In [256]:
print(all_day_results)

                item_1          item_2   support  confidence      lift
87            Sandwich            Coke  0.005177    0.266304  3.706722
89               Juice         Cookies  0.006128    0.112621  2.920442
99            Sandwich            Soup  0.005494    0.076471  2.220227
111             Coffee   Hot chocolate  0.006867    0.125483  2.151618
93            Sandwich           Juice  0.005811    0.150685  2.097401
88       Hot chocolate         Cookies  0.006022    0.110680  1.897794
60       Hot chocolate            Cake  0.011410    0.109868  1.883874
102                Tea            Soup  0.009086    0.263804  1.849557
61               Juice            Cake  0.007079    0.068159  1.767458
95              Pastry       Medialuna  0.009192    0.148718  1.727135
101                Tea           Scone  0.008347    0.241590  1.693816
73    Keeping It Local          Coffee  0.005388    0.011263  1.692169
64                 Tea            Cake  0.023772    0.228891  1.604781
85    

In [175]:
morning_results = apriori_results(time_of_day='morning')

In [257]:
print(morning_results)

              item_1          item_2   support  confidence      lift
67             Juice         Cookies  0.006093    0.128205  4.015463
77          Sandwich             Tea  0.005362    0.255814  2.380056
48     Hot chocolate            Cake  0.006337    0.100386  1.898084
50               Tea            Cake  0.012186    0.193050  1.796111
72               Tea           Juice  0.006093    0.190840  1.775545
88            Pastry          Coffee  0.006093    0.213675  1.532709
75               Tea          Muffin  0.005849    0.163265  1.518997
56  Keeping It Local          Coffee  0.009262    0.017984  1.505877
78               Tea           Toast  0.008043    0.074830  1.505035
87               Tea          Coffee  0.005362    0.161765  1.505035
66            Coffee           Toast  0.035827    0.069569  1.399230
69     Hot chocolate       Medialuna  0.006824    0.129032  1.393209
63            Coffee  Spanish Brunch  0.008287    0.016091  1.375434
85            Coffee           Toa

In [176]:
afternoon_results = apriori_results(time_of_day='afternoon')

In [258]:
print(afternoon_results)

                   item_1          item_2   support  confidence      lift
97               Sandwich            Coke  0.009236    0.335714  2.895678
106         Mineral water        Sandwich  0.005699    0.266055  2.294838
99                  Juice         Cookies  0.005699    0.096667  2.225958
124                Coffee   Hot chocolate  0.009236    0.128415  2.087877
111              Sandwich        Truffles  0.006681    0.057627  1.942148
120                Pastry          Coffee  0.007860    0.087336  1.907529
103              Sandwich           Juice  0.009432    0.217195  1.873395
98          Hot chocolate         Cookies  0.006681    0.113333  1.842662
67          Hot chocolate            Cake  0.014934    0.109195  1.775385
127              Sandwich          Coffee  0.006485    0.104101  1.714465
112                   Tea           Scone  0.011790    0.280374  1.651415
114                   Tea  Spanish Brunch  0.006485    0.266129  1.567512
113                   Tea            S

In [177]:
evening_results = apriori_results(time_of_day='evening')

In [202]:
weekday_results = apriori_results(day_of_week = 'weekday')

In [203]:
weekend_results = apriori_results(day_of_week = 'weekend')

In [204]:
weekday_morning_results = apriori_results(day_of_week = 'weekday', time_of_day = 'morning')

In [205]:
weekday_morning_results

Unnamed: 0,item_1,item_2,support,confidence,lift
59,Juice,Cookies,0.007175,0.133803,4.320852
69,Sandwich,Tea,0.007175,0.316667,2.636897
80,Tea,Coffee,0.005287,0.126126,2.24149
58,Hot chocolate,Cookies,0.005287,0.098592,2.193869
44,Hot chocolate,Cake,0.005287,0.092715,2.063109
45,Tea,Cake,0.012462,0.218543,1.819818
64,Tea,Juice,0.006042,0.195122,1.624789
67,Tea,Muffin,0.005665,0.180723,1.504887
70,Tea,Toast,0.009819,0.081761,1.453041
51,Keeping It Local,Coffee,0.010574,0.019971,1.391696


In [274]:
weekday_afternoon_results = apriori_results(day_of_week = 'weekday', time_of_day = 'afternoon')

In [276]:
weekday_afternoon_results[:20]

Unnamed: 0,item_1,item_2,support,confidence,lift
97,Mineral water,Sandwich,0.006917,0.333333,2.777778
88,Sandwich,Coke,0.009925,0.326733,2.722772
90,Juice,Cookies,0.006617,0.1,2.427007
113,Coffee,Hot chocolate,0.00812,0.122727,2.229881
89,Hot chocolate,Cookies,0.006917,0.104545,1.899528
94,Sandwich,Juice,0.009323,0.226277,1.885645
105,Tea,Spanish Brunch,0.005414,0.315789,1.868327
102,Sandwich,Truffles,0.006617,0.055138,1.851852
62,Hot chocolate,Cake,0.012632,0.100719,1.830011
109,Pastry,Coffee,0.007218,0.086643,1.823333


# Apriori Algorithm, Method 2

In [195]:
def apriori_results_method_2(df = transactions, min_support = 0.005, time_of_day = None, day_of_week = None,
                            min_threshold = 1, metric = 'lift'):
    
    """
    Takes in a data frame and returns the results of the Apriori algorithm using the mlxtend Python package.
    """
    
    from mlxtend.frequent_patterns import apriori
    from mlxtend.frequent_patterns import association_rules

    if time_of_day:
        
        df = df[df.time_of_day == time_of_day]
        
    if day_of_week:
        
        df = df[df.day_type == day_of_week]
        
    dummies_df = df.groupby(['Transaction', 'Item'])['Item'].count().unstack().\
                 reset_index().fillna(0).set_index('Transaction')
    
    def encode_units(x):
    
        if x <= 0:
            return 0
        if x >= 1:
            return 1
    
    dummies_df = dummies_df.applymap(encode_units)
    
    frequent_itemsets = apriori(dummies_df, min_support= min_support, use_colnames=True)

    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    
    rules.drop_duplicates(subset='leverage', keep='first', inplace=True)
    rules = rules.sort_values(by='lift', ascending=False)
    
    return rules

In [189]:
all_day_method_2 = apriori_results_method_2()

In [190]:
morning_method_2 = apriori_results_method_2(time_of_day = 'morning')

In [259]:
morning_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
34,(Juice),(Cookies),0.031928,0.047526,0.006093,0.19084,4.015463,0.004576,1.177114
46,(Sandwich),(Tea),0.02096,0.107482,0.005362,0.255814,2.380056,0.003109,1.199321
64,"(Tea, Coffee)",(Cake),0.040946,0.063125,0.005362,0.130952,2.074508,0.002777,1.078048
8,(Hot chocolate),(Cake),0.052888,0.063125,0.006337,0.119816,1.898084,0.002998,1.064408
10,(Tea),(Cake),0.107482,0.063125,0.012186,0.113379,1.796111,0.005401,1.056681
40,(Tea),(Juice),0.107482,0.031928,0.006093,0.056689,1.775545,0.002661,1.02625
70,"(Coffee, Hot chocolate)",(Pastry),0.028516,0.13941,0.006093,0.213675,1.532709,0.002118,1.094446
44,(Tea),(Muffin),0.107482,0.035827,0.005849,0.054422,1.518997,0.001999,1.019665
18,(Keeping It Local),(Coffee),0.011942,0.514989,0.009262,0.77551,1.505877,0.003111,2.160503
48,(Tea),(Toast),0.107482,0.04972,0.008043,0.07483,1.505035,0.002699,1.027141


In [191]:
afternoon_method_2 = apriori_results_method_2(time_of_day = 'afternoon')

In [260]:
print(afternoon_method_2)

                 antecedents       consequents  antecedent support  \
58                (Sandwich)            (Coke)            0.115936   
72                (Sandwich)   (Mineral water)            0.115936   
114  (Coffee, Hot chocolate)            (Cake)            0.030261   
62                   (Juice)         (Cookies)            0.043427   
134            (Tea, Coffee)           (Scone)            0.057182   
115           (Coffee, Cake)   (Hot chocolate)            0.071920   
128           (Coffee, Soup)        (Sandwich)            0.027707   
80                (Sandwich)        (Truffles)            0.115936   
102          (Coffee, Bread)          (Pastry)            0.089998   
66                (Sandwich)           (Juice)            0.115936   
60           (Hot chocolate)         (Cookies)            0.061505   
18           (Hot chocolate)            (Cake)            0.061505   
122            (Tea, Coffee)            (Cake)            0.057182   
126       (Sandwich,

In [192]:
evening_method_2 = apriori_results_method_2(time_of_day = 'evening')

In [196]:
weekend_method_2 = apriori_results_method_2(day_of_week = 'weekend')

In [271]:
weekend_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
90,(Juice),(Spanish Brunch),0.041566,0.030422,0.00512,0.123188,4.049361,0.003856,1.1058
28,(Jammie Dodgers),(Cake),0.018675,0.118675,0.005422,0.290323,2.446373,0.003205,1.241867
135,"(Coffee, Cake)",(Hot chocolate),0.059337,0.071084,0.009036,0.152284,2.142304,0.004818,1.095787
156,"(Pastry, Coffee)",(Medialuna),0.043675,0.078614,0.007229,0.165517,2.10543,0.003795,1.10414
20,(Hot chocolate),(Brownie),0.071084,0.051807,0.00753,0.105932,2.044738,0.003847,1.060538
134,"(Coffee, Hot chocolate)",(Cake),0.037349,0.118675,0.009036,0.241935,2.038644,0.004604,1.162599
88,(Sandwich),(Juice),0.065361,0.041566,0.005422,0.082949,1.995592,0.002705,1.045126
152,"(Tea, Coffee)",(Cake),0.052711,0.118675,0.011747,0.222857,1.877883,0.005492,1.134058
104,(Tea),(Soup),0.13012,0.021084,0.00512,0.039352,1.866402,0.002377,1.019016
158,"(Coffee, Medialuna)",(Pastry),0.047289,0.083434,0.007229,0.152866,1.832187,0.003283,1.081962


In [262]:
weekday_method_2 = apriori_results_method_2(day_of_week = 'weekday')

In [270]:
weekday_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
42,(Sandwich),(Coke),0.075346,0.020504,0.005696,0.075594,3.686705,0.004151,1.059594
46,(Juice),(Cookies),0.036941,0.062002,0.00716,0.193833,3.12625,0.00487,1.163528
76,"(Coffee, Hot chocolate)",(Cake),0.025386,0.09585,0.005696,0.224359,2.340723,0.003262,1.165681
52,(Sandwich),(Juice),0.075346,0.036941,0.006021,0.079914,2.1633,0.003238,1.046705
77,"(Coffee, Cake)",(Hot chocolate),0.052238,0.051424,0.005696,0.109034,2.120302,0.003009,1.064661
12,(Juice),(Cake),0.036941,0.09585,0.007323,0.198238,2.068203,0.003782,1.127703
44,(Hot chocolate),(Cookies),0.051424,0.062002,0.006509,0.126582,2.041596,0.003321,1.07394
60,(Sandwich),(Soup),0.075346,0.04166,0.006184,0.082073,1.970083,0.003045,1.044027
82,"(Tea, Coffee)",(Cake),0.048332,0.09585,0.009113,0.188552,1.967153,0.00448,1.114243
10,(Hot chocolate),(Cake),0.051424,0.09585,0.009601,0.186709,1.947922,0.004672,1.111717


In [198]:
weekend_morning_method_2 = apriori_results_method_2(day_of_week = 'weekend', time_of_day = 'morning')

In [269]:
weekend_morning_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
91,"(Coffee, Brownie)",(Hot chocolate),0.026117,0.067354,0.006186,0.236842,3.51638,0.004426,1.222088
90,"(Coffee, Hot chocolate)",(Brownie),0.041924,0.042612,0.006186,0.147541,3.462454,0.004399,1.12309
6,(Hot chocolate),(Brownie),0.067354,0.042612,0.009622,0.142857,3.352535,0.006752,1.116953
110,"(Coffee, Farm House)",(Medialuna),0.014433,0.125773,0.005498,0.380952,3.028884,0.003683,1.412213
97,"(Coffee, Cake)",(Hot chocolate),0.032302,0.067354,0.006186,0.191489,2.843031,0.00401,1.153536
80,"(Coffee, Bread)",(Spanish Brunch),0.102405,0.023368,0.005498,0.053691,2.297671,0.003105,1.032044
62,(Tea),(Juice),0.084536,0.033677,0.006186,0.073171,2.172723,0.003339,1.042612
107,"(Coffee, Cake)",(Tea),0.032302,0.084536,0.005498,0.170213,2.013492,0.002768,1.103251
96,"(Coffee, Hot chocolate)",(Cake),0.041924,0.074227,0.006186,0.147541,1.987705,0.003074,1.086003
122,"(Pastry, Coffee)",(Hot chocolate),0.068041,0.067354,0.008935,0.131313,1.949598,0.004352,1.073627


In [265]:
weekday_morning_method_2 = apriori_results_method_2(day_of_week = 'weekday', time_of_day = 'morning')

In [268]:
weekday_morning_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
26,(Juice),(Cookies),0.030967,0.053625,0.007175,0.231707,4.320852,0.005515,1.231789
38,(Sandwich),(Tea),0.022659,0.120091,0.007175,0.316667,2.636897,0.004454,1.287672
61,"(Tea, Coffee)",(Toast),0.041918,0.056269,0.005287,0.126126,2.24149,0.002928,1.07994
50,"(Tea, Coffee)",(Cake),0.041918,0.057024,0.005287,0.126126,2.211801,0.002897,1.079075
24,(Hot chocolate),(Cookies),0.04494,0.053625,0.005287,0.117647,2.193869,0.002877,1.072558
6,(Hot chocolate),(Cake),0.04494,0.057024,0.005287,0.117647,2.063109,0.002724,1.068706
8,(Tea),(Cake),0.120091,0.057024,0.012462,0.103774,1.819818,0.005614,1.052163
32,(Tea),(Juice),0.120091,0.030967,0.006042,0.050314,1.624789,0.002323,1.020373
36,(Tea),(Muffin),0.120091,0.031344,0.005665,0.04717,1.504887,0.0019,1.016609
40,(Tea),(Toast),0.120091,0.056269,0.009819,0.081761,1.453041,0.003061,1.027762


In [272]:
weekday_afternoon_method_2 = apriori_results_method_2(day_of_week = 'weekday', time_of_day = 'afternoon')

In [273]:
weekday_afternoon_method_2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
66,(Mineral water),(Sandwich),0.020752,0.12,0.006917,0.333333,2.777778,0.004427,1.32
50,(Sandwich),(Coke),0.12,0.030376,0.009925,0.082707,2.722772,0.00628,1.057049
54,(Juice),(Cookies),0.041203,0.066165,0.006617,0.160584,2.427007,0.00389,1.112481
100,"(Coffee, Hot chocolate)",(Cake),0.02797,0.125414,0.00812,0.290323,2.314922,0.004612,1.232372
101,"(Coffee, Cake)",(Hot chocolate),0.066165,0.055038,0.00812,0.122727,2.229881,0.004479,1.077159
52,(Hot chocolate),(Cookies),0.055038,0.066165,0.006917,0.125683,1.899528,0.003276,1.068073
60,(Sandwich),(Juice),0.12,0.041203,0.009323,0.077694,1.885645,0.004379,1.039565
120,"(Coffee, Soup)",(Sandwich),0.030677,0.12,0.006917,0.22549,1.879085,0.003236,1.136203
78,(Tea),(Spanish Brunch),0.169023,0.017143,0.005414,0.032028,1.868327,0.002516,1.015378
72,(Sandwich),(Truffles),0.12,0.029774,0.006617,0.055138,1.851852,0.003044,1.026844


In [279]:
weekday_afternoon_method_2.groupby(by='antecedents')['lift'].sum().sort_values(ascending=False)

antecedents
(Tea)                      16.837318
(Coffee)                   12.533971
(Sandwich)                 10.925505
(Tea, Coffee)               6.229868
(Coffee, Cake)              4.515262
(Juice)                     4.231257
(Coffee, Bread)             4.002817
(Hot chocolate)             3.729539
(Mineral water)             2.777778
(Coffee, Cookies)           2.611594
(Sandwich, Coffee)          2.552629
(Coffee, Hot chocolate)     2.314922
(Pastry)                    2.263813
(Coffee, Soup)              1.879085
(Sandwich, Cake)            1.455461
(Salad)                     1.419074
(Hot chocolate, Cake)       1.403480
(Tea, Bread)                1.393448
(Sandwich, Soup)            1.321405
(Cake, Cookies)             1.267659
(Bread, Cake)               1.252878
(Cookies)                   1.123556
(Bread)                     1.096957
(Hearty & Seasonal)         1.055209
(Pastry, Bread)             1.007627
Name: lift, dtype: float64