In [1]:
import pandas as pd
import warnings
import csv
from mlxtend.frequent_patterns import apriori, association_rules
warnings.filterwarnings("ignore") 

# 1. Data Preparation

In [2]:
grocery_items = set()
with open("grocery.csv") as f:
    reader = csv.reader(f, delimiter=",")
    for i, line in enumerate(reader):
        grocery_items.update(line)

output_list = list()
with open("grocery.csv") as f:
    reader = csv.reader(f, delimiter=",")
    for i, line in enumerate(reader):
        row_val = {item:0 for item in grocery_items}
        row_val.update({item:1 for item in line})
        output_list.append(row_val)
grocery = pd.DataFrame(output_list)

grocery.head()

Unnamed: 0,beef,liquor,mayonnaise,cling film/bags,honey,frozen meals,rum,chocolate,bottled beer,vinegar,...,organic products,tea,canned vegetables,semi-finished bread,preservation products,white bread,ready soups,nuts/prunes,rolls/buns,baby cosmetics
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# 2. Data Exploration

In [3]:
df_total_sold = pd.DataFrame(grocery.sum()).reset_index()
df_total_sold.columns = ['item','total_sold']
df_total_sold.sort_values('total_sold',ascending=False,inplace=True)

In [4]:
df_total_sold

Unnamed: 0,item,total_sold
42,whole milk,2513
11,other vegetables,1903
167,rolls/buns,1809
120,soda,1715
107,yogurt,1372
...,...,...
43,kitchen utensil,4
114,bags,4
163,preservation products,2
27,sound storage medium,1


# 3. Modeling

In [9]:
frequent_itemsets = apriori(grocery, min_support=0.047, use_colnames=True)
frequent_itemsets.head(10)

Unnamed: 0,support,itemsets
0,0.052466,(beef)
1,0.049619,(chocolate)
2,0.080529,(bottled beer)
3,0.193493,(other vegetables)
4,0.088968,(pastry)
5,0.058973,(frankfurter)
6,0.063447,(domestic eggs)
7,0.053279,(curd)
8,0.071683,(whipped/sour cream)
9,0.058058,(coffee)


In [6]:
my_rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [7]:
my_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013
2,(root vegetables),(other vegetables),0.108998,0.193493,0.047382,0.434701,2.246605,0.026291,1.426693
3,(other vegetables),(root vegetables),0.193493,0.108998,0.047382,0.244877,2.246605,0.026291,1.179941
4,(whole milk),(root vegetables),0.255516,0.108998,0.048907,0.191405,1.756031,0.021056,1.101913
5,(root vegetables),(whole milk),0.108998,0.255516,0.048907,0.448694,1.756031,0.021056,1.350401
6,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157
7,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132
8,(whole milk),(rolls/buns),0.255516,0.183935,0.056634,0.221647,1.205032,0.009636,1.048452
9,(rolls/buns),(whole milk),0.183935,0.255516,0.056634,0.307905,1.205032,0.009636,1.075696


## Reference
1. https://github.com/satishrath185/Market-Basket-Analysis