# Association Rules Mining

## Create a list of transactions with Items in each

In [1]:
dataset = [
    ["Milk", "Eggs", "Bread"],
    ["Milk", "Eggs"],
    ["Milk", "Bread"],
    ["Eggs", "Apple"],
]

In [2]:
dataset

[['Milk', 'Eggs', 'Bread'],
 ['Milk', 'Eggs'],
 ['Milk', 'Bread'],
 ['Eggs', 'Apple']]

# Import libraries for creating a boolean dataframe
The dataframe will contain boolean values depending on  whether an item is present in a transaction(invoice) or not. Each row of the dataframe is a transaction.

In [3]:
# pip install mlxtend
# import libraries from mlxtend for apriori algorithm implementation
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,fpgrowth,association_rules
import pandas as pd

In [4]:
# create a transaction encoder object and fit the dataset
te = TransactionEncoder()
te_df = te.fit_transform(dataset)
te_df = pd.DataFrame(te_df, columns=te.columns_)
te_df


Unnamed: 0,Apple,Bread,Eggs,Milk
0,False,True,True,True
1,False,False,True,True
2,False,True,False,True
3,True,False,True,False


# Apriori Algorithm

## Create Frequent Itemsets

Set a minimum threshold of support to filter the frequent itemsets from all the itemsets.

In [5]:
# create frequent itemsets with min support of 0.01
freq_itemsets_ap = apriori(te_df, min_support=0.01, use_colnames=True)
freq_itemsets_ap

Unnamed: 0,support,itemsets
0,0.25,(Apple)
1,0.5,(Bread)
2,0.75,(Eggs)
3,0.75,(Milk)
4,0.25,"(Apple, Eggs)"
5,0.25,"(Bread, Eggs)"
6,0.5,"(Bread, Milk)"
7,0.5,"(Milk, Eggs)"
8,0.25,"(Bread, Milk, Eggs)"


# F-P Growth Algorithm

In [6]:
# import fpgrowth algorithm from mlxtend
freq_itemsets_fp = fpgrowth(te_df, min_support=0.01, use_colnames=True)
freq_itemsets_fp

Unnamed: 0,support,itemsets
0,0.75,(Milk)
1,0.75,(Eggs)
2,0.5,(Bread)
3,0.25,(Apple)
4,0.5,"(Milk, Eggs)"
5,0.5,"(Bread, Milk)"
6,0.25,"(Bread, Eggs)"
7,0.25,"(Bread, Milk, Eggs)"
8,0.25,"(Apple, Eggs)"


# Association Rules

In [9]:
# create association rules based on the defined metric and min threshold. eg lift
rules_ap = association_rules(freq_itemsets_ap, metric="lift", min_threshold=1.1)
rules_ap

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Apple),(Eggs),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
1,(Eggs),(Apple),0.75,0.25,0.25,0.333333,1.333333,0.0625,1.125
2,(Bread),(Milk),0.5,0.75,0.5,1.0,1.333333,0.125,inf
3,(Milk),(Bread),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
4,"(Bread, Eggs)",(Milk),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
5,(Milk),"(Bread, Eggs)",0.75,0.25,0.25,0.333333,1.333333,0.0625,1.125


In [10]:
rules_fp = association_rules(freq_itemsets_fp, metric="lift", min_threshold=1.1)
rules_fp

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bread),(Milk),0.5,0.75,0.5,1.0,1.333333,0.125,inf
1,(Milk),(Bread),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
2,"(Bread, Eggs)",(Milk),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
3,(Milk),"(Bread, Eggs)",0.75,0.25,0.25,0.333333,1.333333,0.0625,1.125
4,(Apple),(Eggs),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
5,(Eggs),(Apple),0.75,0.25,0.25,0.333333,1.333333,0.0625,1.125


In [13]:
rules_fp[rules_ap['confidence']>0.7]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bread),(Milk),0.5,0.75,0.5,1.0,1.333333,0.125,inf
2,"(Bread, Eggs)",(Milk),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
4,(Apple),(Eggs),0.25,0.75,0.25,1.0,1.333333,0.0625,inf


In [11]:
help(association_rules)

Help on function association_rules in module mlxtend.frequent_patterns.association_rules:

association_rules(df, metric='confidence', min_threshold=0.8, support_only=False)
    Generates a DataFrame of association rules including the
    metrics 'score', 'confidence', and 'lift'
    
    Parameters
    -----------
    df : pandas DataFrame
      pandas DataFrame of frequent itemsets
      with columns ['support', 'itemsets']
    
    metric : string (default: 'confidence')
      Metric to evaluate if a rule is of interest.
      **Automatically set to 'support' if `support_only=True`.**
      Otherwise, supported metrics are 'support', 'confidence', 'lift',
      'leverage', and 'conviction'
      These metrics are computed as follows:
    
      - support(A->C) = support(A+C) [aka 'support'], range: [0, 1]
    
      - confidence(A->C) = support(A+C) / support(A), range: [0, 1]
    
      - lift(A->C) = confidence(A->C) / support(C), range: [0, inf]
    
      - leverage(A->C) = suppo