In [28]:
import pandas as pd
import requests
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [78]:
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/groceries.csv"
txt = requests.get(url).text
lines = txt.splitlines()
lines[:10]

['citrus fruit,semi-finished bread,margarine,ready soups',
 'tropical fruit,yogurt,coffee',
 'whole milk',
 'pip fruit,yogurt,cream cheese ,meat spreads',
 'other vegetables,whole milk,condensed milk,long life bakery product',
 'whole milk,butter,yogurt,rice,abrasive cleaner',
 'rolls/buns',
 'other vegetables,UHT-milk,rolls/buns,bottled beer,liquor (appetizer)',
 'pot plants',
 'whole milk,cereals']

In [79]:
transactions = []
for transaction in lines:
    items = transaction.split(',')
    transactions.append(items)

transactions[:10]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product'],
 ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner'],
 ['rolls/buns'],
 ['other vegetables',
  'UHT-milk',
  'rolls/buns',
  'bottled beer',
  'liquor (appetizer)'],
 ['pot plants'],
 ['whole milk', 'cereals']]

In [81]:
encoder = TransactionEncoder()
transactions_array = encoder.fit(transactions).transform(transactions)
transactions_array[1:10]

array([[False, False, False, ..., False,  True, False],
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False],
       ...,
       [False,  True, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ...,  True, False, False]])

In [34]:
df = pd.DataFrame(transactions_array, columns=encoder.columns_)

In [82]:
df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [71]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

In [83]:
frequent_itemsets[:20]

Unnamed: 0,support,itemsets
0,0.033452,(UHT-milk)
1,0.017692,(baking powder)
2,0.052466,(beef)
3,0.033249,(berries)
4,0.026029,(beverages)
5,0.080529,(bottled beer)
6,0.110524,(bottled water)
7,0.06487,(brown bread)
8,0.055414,(butter)
9,0.027961,(butter milk)


In [84]:
df_association_rules = association_rules(frequent_itemsets, num_itemsets=1, metric = "confidence", min_threshold = 0.01)
df_association_rules[:10]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(beef),(other vegetables),0.052466,0.193493,0.019725,0.375969,1.943066,1.0,0.009574,1.292416,0.512224,0.087191,0.226255,0.238957
1,(other vegetables),(beef),0.193493,0.052466,0.019725,0.101944,1.943066,1.0,0.009574,1.055095,0.601792,0.087191,0.052218,0.238957
2,(beef),(rolls/buns),0.052466,0.183935,0.013625,0.25969,1.411858,1.0,0.003975,1.102329,0.307866,0.061159,0.09283,0.166882
3,(rolls/buns),(beef),0.183935,0.052466,0.013625,0.074074,1.411858,1.0,0.003975,1.023337,0.357463,0.061159,0.022805,0.166882
4,(root vegetables),(beef),0.108998,0.052466,0.017387,0.159515,3.040367,1.0,0.011668,1.127366,0.753189,0.120677,0.112977,0.245455
5,(beef),(root vegetables),0.052466,0.108998,0.017387,0.331395,3.040367,1.0,0.011668,1.332628,0.708251,0.120677,0.249603,0.245455
6,(beef),(whole milk),0.052466,0.255516,0.021251,0.405039,1.58518,1.0,0.007845,1.251315,0.389597,0.074113,0.200841,0.244103
7,(whole milk),(beef),0.255516,0.052466,0.021251,0.083168,1.58518,1.0,0.007845,1.033487,0.495856,0.074113,0.032402,0.244103
8,(beef),(yogurt),0.052466,0.139502,0.011693,0.222868,1.597601,1.0,0.004374,1.107275,0.394774,0.064862,0.096882,0.153344
9,(yogurt),(beef),0.139502,0.052466,0.011693,0.083819,1.597601,1.0,0.004374,1.034222,0.434703,0.064862,0.03309,0.153344


In [70]:
df_association_rules.sort_values("confidence",ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
5,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,1.0,0.020379,1.244132,0.422732,0.165267,0.196226,0.310432
0,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,1.0,0.025394,1.214013,0.42075,0.2,0.176286,0.339817
2,(rolls/buns),(whole milk),0.183935,0.255516,0.056634,0.307905,1.205032,1.0,0.009636,1.075696,0.208496,0.147942,0.070369,0.264776
1,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,1.0,0.025394,1.140548,0.455803,0.2,0.123228,0.339817
3,(whole milk),(rolls/buns),0.255516,0.183935,0.056634,0.221647,1.205032,1.0,0.009636,1.048452,0.228543,0.147942,0.046213,0.264776
4,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,1.0,0.020379,1.102157,0.488608,0.165267,0.092688,0.310432
