##### Association Rules - Apriori

Install the libraries

In [1]:
%pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

In [3]:
dataset = [['🍎', '🍺', '🍚', '🍗'],
           ['🍎', '🍺', '🍚'],
           ['🍎', '🍺'],
           ['🍎', '🍐'],
           ['🥛', '🍺', '🍚', '🍗'],
           ['🥛', '🍺', '🍚'],
           ['🥛', '🍺'],
           ['🥛', '🍐']]

In [4]:
dataset = [['Apple', 'Bear', 'Rice', 'Chicken'],
           ['Apple', 'Bear', 'Rice'],
           ['Apple', 'Bear'],
           ['Apple', 'Pear'],
           ['Milk', 'Bear', 'Rice', 'Chicken'],
           ['Milk', 'Bear', 'Rice'],
           ['Milk', 'Bear'],
           ['Milk', 'Pear']]

dataset

[['Apple', 'Bear', 'Rice', 'Chicken'],
 ['Apple', 'Bear', 'Rice'],
 ['Apple', 'Bear'],
 ['Apple', 'Pear'],
 ['Milk', 'Bear', 'Rice', 'Chicken'],
 ['Milk', 'Bear', 'Rice'],
 ['Milk', 'Bear'],
 ['Milk', 'Pear']]

In [5]:
from mlxtend.preprocessing import TransactionEncoder

In [6]:
te = TransactionEncoder()

In [7]:
data_new = te.fit_transform(dataset)
data_new

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [8]:
te.columns_

['Apple', 'Bear', 'Chicken', 'Milk', 'Pear', 'Rice']

In [9]:
df = pd.DataFrame(data=data_new, columns=te.columns_)
df

Unnamed: 0,Apple,Bear,Chicken,Milk,Pear,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


##### 1. Generate the frequent itemsets

In [10]:
from mlxtend.frequent_patterns import apriori

In [11]:
freq_itemset = apriori(df, min_support=0.25, use_colnames=True)
freq_itemset

Unnamed: 0,support,itemsets
0,0.5,(Apple)
1,0.75,(Bear)
2,0.25,(Chicken)
3,0.5,(Milk)
4,0.25,(Pear)
5,0.5,(Rice)
6,0.375,"(Apple, Bear)"
7,0.25,"(Rice, Apple)"
8,0.25,"(Chicken, Bear)"
9,0.375,"(Milk, Bear)"


##### 2. Generate the association rules

In [12]:
from mlxtend.frequent_patterns import association_rules

In [13]:
rules = association_rules(freq_itemset, min_threshold=0.5, metric='confidence', num_itemsets=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Apple),(Bear),0.5,0.75,0.375,0.75,1.0,1.0,0.0,1.0,0.0,0.428571,0.0,0.625
1,(Bear),(Apple),0.75,0.5,0.375,0.5,1.0,1.0,0.0,1.0,0.0,0.428571,0.0,0.625
2,(Rice),(Apple),0.5,0.5,0.25,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.0,0.5
3,(Apple),(Rice),0.5,0.5,0.25,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.0,0.5
4,(Chicken),(Bear),0.25,0.75,0.25,1.0,1.333333,1.0,0.0625,inf,0.333333,0.333333,1.0,0.666667
5,(Milk),(Bear),0.5,0.75,0.375,0.75,1.0,1.0,0.0,1.0,0.0,0.428571,0.0,0.625
6,(Bear),(Milk),0.75,0.5,0.375,0.5,1.0,1.0,0.0,1.0,0.0,0.428571,0.0,0.625
7,(Rice),(Bear),0.5,0.75,0.5,1.0,1.333333,1.0,0.125,inf,0.5,0.666667,1.0,0.833333
8,(Bear),(Rice),0.75,0.5,0.5,0.666667,1.333333,1.0,0.125,1.5,1.0,0.666667,0.333333,0.833333
9,(Rice),(Chicken),0.5,0.25,0.25,0.5,2.0,1.0,0.125,1.5,1.0,0.5,0.333333,0.75


In [14]:
rules = rules.iloc[:, [0, 1, 4, 5]]
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Apple),(Bear),0.375,0.75
1,(Bear),(Apple),0.375,0.5
2,(Rice),(Apple),0.25,0.5
3,(Apple),(Rice),0.25,0.5
4,(Chicken),(Bear),0.25,1.0
5,(Milk),(Bear),0.375,0.75
6,(Bear),(Milk),0.375,0.5
7,(Rice),(Bear),0.5,1.0
8,(Bear),(Rice),0.5,0.666667
9,(Rice),(Chicken),0.25,0.5


##### Sort Rules

In [15]:
rules.sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
22,(Chicken),"(Rice, Bear)",0.25,1.0
20,"(Chicken, Bear)",(Rice),0.25,1.0
4,(Chicken),(Bear),0.25,1.0
13,"(Rice, Apple)",(Bear),0.25,1.0
7,(Rice),(Bear),0.5,1.0
18,"(Rice, Chicken)",(Bear),0.25,1.0
23,"(Rice, Milk)",(Bear),0.25,1.0
10,(Chicken),(Rice),0.25,1.0
0,(Apple),(Bear),0.375,0.75
5,(Milk),(Bear),0.375,0.75


In [16]:
rules.sort_values(by='confidence', ascending=False).head(5)

Unnamed: 0,antecedents,consequents,support,confidence
22,(Chicken),"(Rice, Bear)",0.25,1.0
20,"(Chicken, Bear)",(Rice),0.25,1.0
4,(Chicken),(Bear),0.25,1.0
13,"(Rice, Apple)",(Bear),0.25,1.0
7,(Rice),(Bear),0.5,1.0


In [17]:
rules[rules['confidence'] > 0.6]

Unnamed: 0,antecedents,consequents,support,confidence
0,(Apple),(Bear),0.375,0.75
4,(Chicken),(Bear),0.25,1.0
5,(Milk),(Bear),0.375,0.75
7,(Rice),(Bear),0.5,1.0
8,(Bear),(Rice),0.5,0.666667
10,(Chicken),(Rice),0.25,1.0
13,"(Rice, Apple)",(Bear),0.25,1.0
15,"(Apple, Bear)",(Rice),0.25,0.666667
18,"(Rice, Chicken)",(Bear),0.25,1.0
20,"(Chicken, Bear)",(Rice),0.25,1.0


In [18]:
rules[rules['confidence'] > 0.6].sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
4,(Chicken),(Bear),0.25,1.0
7,(Rice),(Bear),0.5,1.0
10,(Chicken),(Rice),0.25,1.0
13,"(Rice, Apple)",(Bear),0.25,1.0
18,"(Rice, Chicken)",(Bear),0.25,1.0
20,"(Chicken, Bear)",(Rice),0.25,1.0
22,(Chicken),"(Rice, Bear)",0.25,1.0
23,"(Rice, Milk)",(Bear),0.25,1.0
0,(Apple),(Bear),0.375,0.75
5,(Milk),(Bear),0.375,0.75


##### Recommendations

In [19]:
rules[rules['antecedents'] == {'Apple'}]

Unnamed: 0,antecedents,consequents,support,confidence
0,(Apple),(Bear),0.375,0.75
3,(Apple),(Rice),0.25,0.5
17,(Apple),"(Rice, Bear)",0.25,0.5


In [20]:
rules[rules['antecedents'] == {'Apple', 'Bear'}]['consequents']

15    (Rice)
Name: consequents, dtype: object

In [21]:
rules[rules['antecedents'] == {'Bear', 'Apple'}]['consequents']

15    (Rice)
Name: consequents, dtype: object

In [22]:
rules[rules['antecedents'] == {'Bear', 'Apple'}]['consequents'].values[0]

frozenset({'Rice'})

##### Export the rules

In [23]:
rules.to_csv('rules.csv', index=False)