# Chapter 6 - Other Popular Machine Learning Methods
## Segment 1 - Association Rule Mining Using Apriori Algorithm

# Import the required libraries

In [1]:
! pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.17.3-py2.py3-none-any.whl (1.3 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.17.3


In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Data Format

In [3]:
address = 'C:/Users/Alina Sahoo/Documents/Ex_Files_Python_Data_Science_EssT_Pt2/Data/groceries.csv'
data = pd.read_csv(address)

In [4]:
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,
1,tropical fruit,yogurt,coffee,,,,,,
2,whole milk,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,


# Data Coversion

In [5]:
basket_sets = pd.get_dummies(data)

In [6]:
basket_sets.head()

Unnamed: 0,1_Instant food products,1_UHT-milk,1_artif. sweetener,1_baby cosmetics,1_bags,1_baking powder,1_bathroom cleaner,1_beef,1_berries,1_beverages,...,9_sweet spreads,9_tea,9_vinegar,9_waffles,9_whipped/sour cream,9_white bread,9_white wine,9_whole milk,9_yogurt,9_zwieback
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Support Calculation

In [7]:
apriori(basket_sets, min_support=0.02)

Unnamed: 0,support,itemsets
0,0.030421,(7)
1,0.034951,(17)
2,0.029126,(23)
3,0.049191,(26)
4,0.064401,(47)
5,0.04466,(83)
6,0.024272,(90)
7,0.040453,(92)
8,0.038835,(99)
9,0.033981,(100)


In [8]:
apriori(basket_sets, min_support=0.02, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.030421,(1_beef)
1,0.034951,(1_canned beer)
2,0.029126,(1_chicken)
3,0.049191,(1_citrus fruit)
4,0.064401,(1_frankfurter)
5,0.04466,(1_other vegetables)
6,0.024272,(1_pip fruit)
7,0.040453,(1_pork)
8,0.038835,(1_rolls/buns)
9,0.033981,(1_root vegetables)


In [9]:
df = basket_sets

frequent_itemsets = apriori(basket_sets, min_support=0.002, use_colnames=True)

frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.006472,(1_UHT-milk),1
1,0.030421,(1_beef),1
2,0.011974,(1_berries),1
3,0.008414,(1_beverages),1
4,0.014887,(1_bottled beer),1
...,...,...,...
844,0.002265,"(5_other vegetables, 3_pip fruit, 6_whole milk)",3
845,0.002589,"(5_whole milk, 3_root vegetables, 4_other vege...",3
846,0.002913,"(3_whole milk, 4_curd, 5_yogurt)",3
847,0.003236,"(5_other vegetables, 6_whole milk, 4_root vege...",3


In [10]:
frequent_itemsets[frequent_itemsets['length'] >= 3]

Unnamed: 0,support,itemsets,length
820,0.002589,"(2_root vegetables, 1_beef, 3_other vegetables)",3
821,0.002589,"(2_other vegetables, 3_whole milk, 1_chicken)",3
822,0.002589,"(2_other vegetables, 3_whole milk, 1_citrus fr...",3
823,0.003236,"(3_pip fruit, 2_tropical fruit, 1_citrus fruit)",3
824,0.002589,"(3_other vegetables, 4_whole milk, 1_citrus fr...",3
825,0.002265,"(5_other vegetables, 1_frankfurter, 6_whole milk)",3
826,0.002265,"(1_pork, 4_whole milk, 3_other vegetables)",3
827,0.00356,"(2_other vegetables, 3_whole milk, 1_root vege...",3
828,0.002589,"(2_rolls/buns, 1_sausage, 3_soda)",3
829,0.002265,"(1_sausage, 4_whole milk, 3_other vegetables)",3


# Association Rules

## Confidence

In [11]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2_sausage),(1_frankfurter),0.011327,0.064401,0.011327,1.0,15.527638,0.010597,inf
1,(7_pastry),(1_frankfurter),0.005178,0.064401,0.002589,0.5,7.763819,0.002256,1.871197
2,(2_ham),(1_sausage),0.00712,0.076052,0.004531,0.636364,8.367505,0.003989,2.540858
3,(2_meat),(1_sausage),0.006796,0.076052,0.004854,0.714286,9.392097,0.004338,3.233819
4,(3_beef),(1_sausage),0.004854,0.076052,0.002589,0.533333,7.012766,0.00222,1.979889


## Lift

In [12]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2_citrus fruit),(1_beef),0.028803,0.030421,0.005502,0.191011,6.278986,0.004625,1.198508
1,(1_beef),(2_citrus fruit),0.030421,0.028803,0.005502,0.180851,6.278986,0.004625,1.185618
2,(2_other vegetables),(1_beef),0.0589,0.030421,0.003236,0.054945,1.806173,0.001444,1.02595
3,(1_beef),(2_other vegetables),0.030421,0.0589,0.003236,0.106383,1.806173,0.001444,1.053136
4,(2_root vegetables),(1_beef),0.036893,0.030421,0.005502,0.149123,4.902016,0.004379,1.139506


## Lift and Confidence

In [13]:
rules[(rules['lift'] >= 5) & (rules['confidence']>= 0.5)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
92,(2_sausage),(1_frankfurter),0.011327,0.064401,0.011327,1.000000,15.527638,0.010597,inf
136,(7_pastry),(1_frankfurter),0.005178,0.064401,0.002589,0.500000,7.763819,0.002256,1.871197
239,(2_ham),(1_sausage),0.007120,0.076052,0.004531,0.636364,8.367505,0.003989,2.540858
242,(2_meat),(1_sausage),0.006796,0.076052,0.004854,0.714286,9.392097,0.004338,3.233819
259,(3_beef),(1_sausage),0.004854,0.076052,0.002589,0.533333,7.012766,0.002220,1.979889
...,...,...,...,...,...,...,...,...,...
959,"(5_other vegetables, 4_root vegetables)",(6_whole milk),0.005178,0.009385,0.003236,0.625000,66.594828,0.003188,2.641640
960,"(6_whole milk, 4_root vegetables)",(5_other vegetables),0.003883,0.012621,0.003236,0.833333,66.025641,0.003187,5.924272
964,"(5_other vegetables, 7_butter)",(6_whole milk),0.002589,0.009385,0.002265,0.875000,93.232759,0.002241,7.924919
966,"(7_butter, 6_whole milk)",(5_other vegetables),0.002913,0.012621,0.002265,0.777778,61.623932,0.002229,4.443204
