#### Import the necessary libraries:


In [159]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


#### Load the Bakery data into a pandas DataFrame:


In [160]:
df = pd.read_excel('bakery_dataset.xlsx')


##### Convert the transaction data into a one-hot encoded format using the pd.get_dummies() method:



In [161]:
onehot = pd.get_dummies(df['Items'])
onehot['TransactionNo'] = df['TransactionNo']

#### encoding categorical variables as numeric values.

In [162]:
df['Items'] = df['Items'].astype('category').cat.codes
df['daypart'] = df['daypart'].astype('category').cat.codes
df['DayType'] = df['DayType'].astype('category').cat.codes

##### Group the one-hot encoded data by transaction number and sum the values to get a binary matrix indicating the presence or absence of each item in each transaction:


In [163]:
grouped = onehot.groupby('TransactionNo').sum()
basket_sets = grouped.applymap(lambda x: 1 if x >= 1 else 0)


#### Generate frequent itemsets using the apriori algorithm:


In [164]:
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)




#### Generate association rules from the frequent itemsets using the association_rules function:


In [165]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)


## Analyze the results to answer the questions

#### 1) What items are frequently purchased together?


In [189]:
df_2=frequent_itemsets.sort_values(by='support', ascending=False)
df_2

Unnamed: 0,support,itemsets
4,0.478394,(Coffee)
1,0.327205,(Bread)
16,0.142631,(Tea)
3,0.103856,(Cake)
20,0.090016,"(Coffee, Bread)"
...,...,...
24,0.023772,"(Cake, Tea)"
32,0.023666,"(Toast, Coffee)"
19,0.023349,"(Bread, Cake)"
27,0.020602,"(Juice, Coffee)"


#### 2)How often do customers buy certain items together?


In [188]:
df_3=rules.sort_values(by=['support', 'confidence'], ascending=False)
df_3

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
3,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.102840
2,(Coffee),(Cake),0.478394,0.103856,0.054728,0.114399,1.101515,0.005044,1.011905,0.176684
15,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682,0.146161
14,(Coffee),(Pastry),0.478394,0.086107,0.047544,0.099382,1.154168,0.006351,1.014740,0.256084
17,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384,0.109205
...,...,...,...,...,...,...,...,...,...,...
5,(Tea),(Cake),0.142631,0.103856,0.023772,0.166667,1.604781,0.008959,1.075372,0.439556
18,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582,0.332006
19,(Coffee),(Toast),0.478394,0.033597,0.023666,0.049470,1.472431,0.007593,1.016699,0.615122
10,(Juice),(Coffee),0.038563,0.478394,0.020602,0.534247,1.116750,0.002154,1.119919,0.108738


#### 3)Can we upsell or cross-sell certain items based on customers' buying patterns?


In [192]:
df_4=rules.sort_values(by='lift', ascending=False)
df_4

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
5,(Tea),(Cake),0.142631,0.103856,0.023772,0.166667,1.604781,0.008959,1.075372,0.439556
4,(Cake),(Tea),0.103856,0.142631,0.023772,0.228891,1.604781,0.008959,1.111865,0.420538
19,(Coffee),(Toast),0.478394,0.033597,0.023666,0.049470,1.472431,0.007593,1.016699,0.615122
18,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582,0.332006
13,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871,0.170091
...,...,...,...,...,...,...,...,...,...,...
6,(Coffee),(Cookies),0.478394,0.054411,0.028209,0.058966,1.083723,0.002179,1.004841,0.148110
9,(Hot chocolate),(Coffee),0.058320,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553,0.060403
8,(Coffee),(Hot chocolate),0.478394,0.058320,0.029583,0.061837,1.060311,0.001683,1.003749,0.109048
0,(Bread),(Pastry),0.327205,0.086107,0.029160,0.089119,1.034977,0.000985,1.003306,0.050231


### Export the data to save the results of the question analysis

In [193]:
with pd.ExcelWriter('python_output.xlsx') as writer:
    df_2.to_excel(writer, sheet_name=' items_purchased_together', index=False)
    df_3.to_excel(writer, sheet_name='customers_buy_certain_items', index=False)
    df_4.to_excel(writer, sheet_name='buying patterns', index=False)