In [8]:
!pip install apyori
!pip install mlxtend

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [9]:
%pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
# import necessary library

import pandas as pd
import numpy as np
from apyori import apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns.fpgrowth import fpgrowth

In [11]:
df = pd.read_csv("/content/Bakery.csv",on_bad_lines='skip')
df.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend


In [12]:
df.dropna()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend
...,...,...,...,...,...
20502,9682,Coffee,2017-09-04 14:32:58,Afternoon,Weekend
20503,9682,Tea,2017-09-04 14:32:58,Afternoon,Weekend
20504,9683,Coffee,2017-09-04 14:57:06,Afternoon,Weekend
20505,9683,Pastry,2017-09-04 14:57:06,Afternoon,Weekend


In [13]:
#df = df.head(200)

In [14]:
df.columns

Index(['TransactionNo', 'Items', 'DateTime', 'Daypart', 'DayType'], dtype='object')

In [15]:
# transform data set to meet the apriori function requirements

# Group data by order_id as instruction
# Transfer product names into a list for each transaction
df_grouped = df.groupby(by = ['TransactionNo'])['Items'].apply(list).reset_index(name='Items')

In [16]:
# Unpack list into their own column
df_grouped = df_grouped['Items'].apply(pd.Series)

In [17]:
# Trasform dataframe into a list to meet the apriori requirements
data = df_grouped.astype(str).values.tolist()

In [18]:
# remove mull value from each row
data = [[ele for ele in sub if ele != 'nan'] for sub in data]

In [19]:
# Apply apriori algorithm
association_rules = apriori(transactions = data, min_support=0.005) 
association_results = list(association_rules)
apriori_result = pd.DataFrame(association_results)

In [20]:
apriori_result.head()

Unnamed: 0,items,support,ordered_statistics
0,(Alfajores),0.036344,"[((), (Alfajores), 0.036344426835710514, 1.0)]"
1,(Baguette),0.016059,"[((), (Baguette), 0.016059165346011622, 1.0)]"
2,(Bakewell),0.005071,"[((), (Bakewell), 0.005071315372424723, 1.0)]"
3,(Bread),0.327205,"[((), (Bread), 0.32720549392498677, 1.0)]"
4,(Brownie),0.040042,"[((), (Brownie), 0.04004226096143687, 1.0)]"


In [21]:
apriori_result['ordered_statistics'][0]

[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'Alfajores'}), confidence=0.036344426835710514, lift=1.0)]

#FP Growth

In [22]:
# copy data to transaction to avoid 'generator' object is not callable error created by name
transactions = data.copy()

# instantiate a transaction encoder
te = TransactionEncoder()

# fit the transaction encoder using the data
te.fit(transactions)

# transform the data into an array of encoded transactions
trans_encoded = te.transform(transactions)

# convert the array of encoded transactions into a dataframe
df_encoded = pd.DataFrame(trans_encoded, columns=te.columns_)
df_encoded

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9460,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9461,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
9462,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9463,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [23]:
# our min support is 7, but it has to be expressed as a percentage for mlxtend
min_support = 7/len(transactions) 

# compute the frequent itemsets using fpgriowth from mlxtend
frequent_itemsets = fpgrowth(df_encoded, min_support=min_support, use_colnames = True)

# print the frequent itemsets
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.327205,(Bread)
1,0.029054,(Scandinavian)
2,0.058320,(Hot chocolate)
3,0.054411,(Cookies)
4,0.015003,(Jam)
...,...,...
654,0.002958,"(Coffee, Baguette)"
655,0.000951,"(Baguette, Cake)"
656,0.000740,"(Baguette, Cookies)"
657,0.000951,"(Scandinavian, Baguette)"


In [24]:
# Compute the association rules based on the frequent itemsets
from mlxtend.frequent_patterns import association_rules

# compute and print the association rules
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Hot chocolate),(Coffee),0.058320,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553
1,"(Medialuna, Hot chocolate)",(Coffee),0.004754,0.478394,0.003064,0.644444,1.347100,0.000789,1.467017
2,"(Hot chocolate, Cake)",(Coffee),0.011410,0.478394,0.006867,0.601852,1.258067,0.001409,1.310080
3,"(Bread, Hot chocolate, Cake)",(Coffee),0.001796,0.478394,0.000951,0.529412,1.106644,0.000092,1.108413
4,"(Pastry, Hot chocolate)",(Coffee),0.005705,0.478394,0.003803,0.666667,1.393551,0.001074,1.564818
...,...,...,...,...,...,...,...,...,...
129,"(Spanish Brunch, Soup)",(Coffee),0.001057,0.478394,0.000740,0.700000,1.463229,0.000234,1.738686
130,(Extra Salami or Feta),(Coffee),0.004015,0.478394,0.003275,0.815789,1.705267,0.001355,2.831575
131,"(Extra Salami or Feta, Bread)",(Coffee),0.001057,0.478394,0.000740,0.700000,1.463229,0.000234,1.738686
132,"(Salad, Extra Salami or Feta)",(Coffee),0.001690,0.478394,0.001479,0.875000,1.829036,0.000670,4.172847
