**Import libraries**

In [None]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from mlxtend.frequent_patterns import association_rules, apriori
warnings.filterwarnings('ignore')

## **Data Consolidation**

**Import raw dataset from a text file into a dataframe**

In [None]:
grocery_items = set()
with open("/content/Q1.txt") as f:
    reader = csv.reader(f, delimiter=",")
    for i, line in enumerate(reader):
        grocery_items.update(line)
output_list = list()
with open("/content/Q1.txt") as f:
    reader = csv.reader(f, delimiter=",")
    for i, line in enumerate(reader):
        row_val = {item:0 for item in grocery_items}
        row_val.update({item:1 for item in line})
        output_list.append(row_val)
df = pd.DataFrame(output_list)
df

Unnamed: 0,fruit/vegetable juice,soda,soups,baby food,hamburger meat,salt,canned fruit,bathroom cleaner,misc. beverages,Instant food products,...,berries,domestic eggs,cocoa drinks,beverages,seasonal products,specialty chocolate,dog food,liqueur,pip fruit,candy
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9830,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9831,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9832,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
9833,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## **Data Cleaning**

**Check duplicates**

In [None]:
df.duplicated().sum()

2824

**Drop duplicates**



In [None]:
df.drop_duplicates(inplace=True, ignore_index=True)
df

Unnamed: 0,fruit/vegetable juice,soda,soups,baby food,hamburger meat,salt,canned fruit,bathroom cleaner,misc. beverages,Instant food products,...,berries,domestic eggs,cocoa drinks,beverages,seasonal products,specialty chocolate,dog food,liqueur,pip fruit,candy
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7006,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
7007,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7008,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
7009,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## **Association Rules Mining**

In [None]:
# using the 'apriori algorithm' with min_support=0.01 (e.g., 1% of 9465)
# It means the item should be present in at least 94 transaction out of 9465 transactions only when we considered that item in
# frequent itemset
frequent_items = apriori(df, min_support = 0.02,use_colnames = True)
frequent_items

Unnamed: 0,support,itemsets
0,0.095136,(fruit/vegetable juice)
1,0.195692,(soda)
2,0.044929,(hamburger meat)
3,0.034089,(misc. beverages)
4,0.075738,(butter)
...,...,...
186,0.031094,"(whole milk, yogurt, other vegetables)"
187,0.023962,"(whole milk, other vegetables, tropical fruit)"
188,0.021823,"(whole milk, rolls/buns, yogurt)"
189,0.020397,"(whole milk, root vegetables, yogurt)"


In [None]:
# now making the rules from frequent itemset generated above

rules = association_rules(frequent_items, metric = "lift", min_threshold = 1.0)
rules.sort_values('lift', ascending = False, inplace = True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
162,(root vegetables),(beef),0.146627,0.068606,0.023820,0.162451,2.367872,0.013760,1.112047
163,(beef),(root vegetables),0.068606,0.146627,0.023820,0.347193,2.367872,0.013760,1.307237
222,(root vegetables),"(whole milk, other vegetables)",0.146627,0.103552,0.032520,0.221790,2.141830,0.017337,1.151936
219,"(whole milk, other vegetables)",(root vegetables),0.103552,0.146627,0.032520,0.314050,2.141830,0.017337,1.244074
216,(whipped/sour cream),"(whole milk, other vegetables)",0.094138,0.103552,0.020397,0.216667,2.092355,0.010648,1.144402
...,...,...,...,...,...,...,...,...,...
118,(whipped/sour cream),(rolls/buns),0.094138,0.211525,0.020397,0.216667,1.024309,0.000484,1.006564
37,(other vegetables),(bottled water),0.251890,0.134646,0.034660,0.137599,1.021936,0.000744,1.003425
36,(bottled water),(other vegetables),0.134646,0.251890,0.034660,0.257415,1.021936,0.000744,1.007441
77,(other vegetables),(shopping bags),0.251890,0.127086,0.032520,0.129105,1.015889,0.000509,1.002319


In [None]:
# arranging the data from highest to lowest with respect to 'confidence'

rules.sort_values('confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
244,"(root vegetables, yogurt)",(whole milk),0.036086,0.316931,0.020397,0.565217,1.783411,0.008960,1.571060
226,"(yogurt, other vegetables)",(whole milk),0.059763,0.316931,0.031094,0.520286,1.641642,0.012153,1.423911
250,"(yogurt, tropical fruit)",(whole milk),0.040793,0.316931,0.021110,0.517483,1.632795,0.008181,1.415637
214,"(whipped/sour cream, other vegetables)",(whole milk),0.039795,0.316931,0.020397,0.512545,1.617215,0.007784,1.401297
33,(butter),(whole milk),0.075738,0.316931,0.038368,0.506591,1.598430,0.014365,1.384389
...,...,...,...,...,...,...,...,...,...
251,(whole milk),"(yogurt, tropical fruit)",0.316931,0.040793,0.021110,0.066607,1.632795,0.008181,1.027656
84,(whole milk),(sugar),0.316931,0.045785,0.021110,0.066607,1.454764,0.006599,1.022307
26,(whole milk),(hamburger meat),0.316931,0.044929,0.020682,0.065257,1.452424,0.006442,1.021746
245,(whole milk),"(root vegetables, yogurt)",0.316931,0.036086,0.020397,0.064356,1.783411,0.008960,1.030215


**Question: What are the most insightful results?**

Suggested answers:
*   Whole milk is the best selling item.
*   As shown by confidence, many people buy food then buy whole milk, but not vice versa.

**Question 2: If you were the supermarket manager, what would be your sales / marketing strategies?**

Suggested answers:
*   Promotional discounts on some items (e.g., yogurt) can entice customers to buy whole milk.
*   Arranging placements of these items close to whole milk ordering counter can be a good strategy to tempt customers into buying them.