In [1]:
import pandas as pd
import requests
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [2]:
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/groceries.csv"
txt = requests.get(url).text
lines = txt.splitlines()
lines[:10]

['citrus fruit,semi-finished bread,margarine,ready soups',
 'tropical fruit,yogurt,coffee',
 'whole milk',
 'pip fruit,yogurt,cream cheese ,meat spreads',
 'other vegetables,whole milk,condensed milk,long life bakery product',
 'whole milk,butter,yogurt,rice,abrasive cleaner',
 'rolls/buns',
 'other vegetables,UHT-milk,rolls/buns,bottled beer,liquor (appetizer)',
 'pot plants',
 'whole milk,cereals']

In [3]:
transactions = []
for transaction in lines:
    items = transaction.split(',')
    transactions.append(items)

transactions[:10]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product'],
 ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner'],
 ['rolls/buns'],
 ['other vegetables',
  'UHT-milk',
  'rolls/buns',
  'bottled beer',
  'liquor (appetizer)'],
 ['pot plants'],
 ['whole milk', 'cereals']]

In [4]:
encoder = TransactionEncoder()
transactions_array = encoder.fit(transactions).transform(transactions)
transactions_array[1:10]

array([[False, False, False, ..., False,  True, False],
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False],
       ...,
       [False,  True, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ...,  True, False, False]])

In [5]:
df = pd.DataFrame(transactions_array, columns=encoder.columns_)

In [6]:
df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [7]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

In [8]:
frequent_itemsets[:20]

Unnamed: 0,support,itemsets
0,0.033452,(UHT-milk)
1,0.017692,(baking powder)
2,0.052466,(beef)
3,0.033249,(berries)
4,0.026029,(beverages)
5,0.080529,(bottled beer)
6,0.110524,(bottled water)
7,0.06487,(brown bread)
8,0.055414,(butter)
9,0.027961,(butter milk)


In [9]:
df_association_rules = association_rules(frequent_itemsets, num_itemsets=1, metric = "confidence", min_threshold = 0.01)
df_association_rules[:10]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(other vegetables),(beef),0.193493,0.052466,0.019725,0.101944,1.943066,1.0,0.009574,1.055095,0.601792,0.087191,0.052218,0.238957
1,(beef),(other vegetables),0.052466,0.193493,0.019725,0.375969,1.943066,1.0,0.009574,1.292416,0.512224,0.087191,0.226255,0.238957
2,(rolls/buns),(beef),0.183935,0.052466,0.013625,0.074074,1.411858,1.0,0.003975,1.023337,0.357463,0.061159,0.022805,0.166882
3,(beef),(rolls/buns),0.052466,0.183935,0.013625,0.25969,1.411858,1.0,0.003975,1.102329,0.307866,0.061159,0.09283,0.166882
4,(root vegetables),(beef),0.108998,0.052466,0.017387,0.159515,3.040367,1.0,0.011668,1.127366,0.753189,0.120677,0.112977,0.245455
5,(beef),(root vegetables),0.052466,0.108998,0.017387,0.331395,3.040367,1.0,0.011668,1.332628,0.708251,0.120677,0.249603,0.245455
6,(whole milk),(beef),0.255516,0.052466,0.021251,0.083168,1.58518,1.0,0.007845,1.033487,0.495856,0.074113,0.032402,0.244103
7,(beef),(whole milk),0.052466,0.255516,0.021251,0.405039,1.58518,1.0,0.007845,1.251315,0.389597,0.074113,0.200841,0.244103
8,(yogurt),(beef),0.139502,0.052466,0.011693,0.083819,1.597601,1.0,0.004374,1.034222,0.434703,0.064862,0.03309,0.153344
9,(beef),(yogurt),0.052466,0.139502,0.011693,0.222868,1.597601,1.0,0.004374,1.107275,0.394774,0.064862,0.096882,0.153344


In [10]:
df_association_rules.sort_values("confidence",ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
440,"(citrus fruit, root vegetables)",(other vegetables),0.017692,0.193493,0.010371,0.586207,3.029608,1.0,0.006948,1.949059,0.681990,0.051646,0.486932,0.319903
512,"(tropical fruit, root vegetables)",(other vegetables),0.021047,0.193493,0.012303,0.584541,3.020999,1.0,0.008231,1.941244,0.683367,0.060835,0.484867,0.324062
456,"(yogurt, curd)",(whole milk),0.017285,0.255516,0.010066,0.582353,2.279125,1.0,0.005649,1.782567,0.571107,0.038313,0.439011,0.310874
432,"(other vegetables, butter)",(whole milk),0.020031,0.255516,0.011490,0.573604,2.244885,1.0,0.006371,1.745992,0.565878,0.043512,0.427260,0.309285
588,"(tropical fruit, root vegetables)",(whole milk),0.021047,0.255516,0.011998,0.570048,2.230969,1.0,0.006620,1.731553,0.563627,0.045350,0.422484,0.308502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,(whole milk),"(yogurt, citrus fruit)",0.255516,0.021657,0.010269,0.040191,1.855768,1.0,0.004736,1.019310,0.619408,0.038476,0.018944,0.257185
533,(whole milk),"(other vegetables, sausage)",0.255516,0.026945,0.010168,0.039793,1.476849,1.0,0.003283,1.013381,0.433700,0.037341,0.013204,0.208576
491,(whole milk),"(other vegetables, pork)",0.255516,0.021657,0.010168,0.039793,1.837394,1.0,0.004634,1.018887,0.612170,0.038081,0.018537,0.254638
223,(whole milk),(hard cheese),0.255516,0.024504,0.010066,0.039395,1.607682,1.0,0.003805,1.015502,0.507716,0.037288,0.015265,0.225092
