### Association Rule Mining: Interestingness Measures

References:

1. **Association Rule Interestingness Measures: Experimental and Theorotical Studies**.
    Authors: Philippe Lenca, Benoit Vaillant, Patrick Meyer, Stephane Lallich.
    DOI: https://doi.org/10.1007/978-3-540-44918-8_3

2. **Association Rule Mining via Apriori Algorithm in Python**. Link: https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/

3. **Mining Association Rules PPT** (Support, Confidence, Lift, Conviction, Leverage, Coverage). Link: https://paginas.fe.up.pt/~ec/files_0506/slides/04_AssociationRules.pdf

Dataset source: https://drive.google.com/file/d/1y5DYn0dGoSbC22xowBq2d4po6h1JxcTQ/view?usp=sharing

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from apyori import apriori

In [2]:
# Importing the dataset
dataset = pd.read_csv('../../../datasets/data/market_basket.csv', header=None)
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [3]:
# Preprocessing the dataset
transactions = []
for index, data in dataset.iterrows():
    transaction = pd.Series.tolist(data[~pd.isnull(data)])
    transactions.append(set(transaction))

In [4]:
# Apply Apriori Algorithm on the transaction database
_association_rules = apriori(transactions, min_support=0.0025, min_confidence=0.7, min_lift=0, min_length=2)

association_rules = []

# Print the association rule
print("Association Rules: Min Support = 0.25%, Min Confidence = 70%", end='\n\n')
for association_rule in _association_rules:
    
    itemset = set([item for item in association_rule[0]])
    support = association_rule[1]
    
    precedent = set([item for item in association_rule[2][0][0]])
    antecedent = set([item for item in association_rule[2][0][1]])
    
    confidence = association_rule[2][0][2]
    lift = association_rule[2][0][3]
    
    association_rules.append((precedent, antecedent))
    
    print("{} => {}".format(precedent, antecedent))
    print("Support = {}, Confidence = {}, Lift = {}".format(support, confidence, lift), end='\n\n')

Association Rules: Min Support = 0.25%, Min Confidence = 70%

{'mushroom cream sauce', 'pasta'} => {'escalope'}
Support = 0.0025329956005865884, Confidence = 0.95, Lift = 11.976386554621849

{'olive oil', 'chocolate', 'frozen vegetables'} => {'mineral water'}
Support = 0.0027996267164378083, Confidence = 0.7, Lift = 2.9366331096196867

{'spaghetti', 'cooking oil', 'eggs'} => {'mineral water'}
Support = 0.0029329422743634183, Confidence = 0.7096774193548387, Lift = 2.9772317240383925

{'olive oil', 'eggs', 'milk'} => {'mineral water'}
Support = 0.0026663111585121984, Confidence = 0.7142857142857143, Lift = 2.996564397571109

{'frozen vegetables', 'soup', 'milk'} => {'mineral water'}
Support = 0.0030662578322890282, Confidence = 0.7666666666666666, Lift = 3.21631245339299



In [5]:
class ARMInterestMeasures:
    
    def __init__(self, transactions, antecedent, consequent):
        ''' Compute necessary parameters involving antecedent, consequent from transactions '''
        
        self.transactions = transactions
        self.antecedent = antecedent
        self.consequent = consequent
        
        self.n_transactions = len(transactions) # Number of transactions in the database
        
        self.n_antecedent_present_trans = 0 # Number of transactions that contain antecedent
        self.n_consequent_present_trans = 0 # Number of transactions thar contain consequent
        self.n_consequent_absent_trans = 0 # Number of transactions that oppose the consequent
        self.n_support_trans = 0 # Number of transactions that support rule (A ^ B)
        self.n_oppose_trans = 0 # Number of transactions that oppose the rule (A ^ !B)
        
        for transaction in transactions:
            
            antecedent_present = self.antecedent <= transaction # Check if antecedent is subset of transaction
            consequent_present = self.consequent <= transaction # Check if consequent is subset of transaction
            
            if antecedent_present:
                self.n_antecedent_present_trans += 1
                
            if consequent_present:
                self.n_consequent_present_trans += 1
            else:
                self.n_consequent_absent_trans += 1
                
            if antecedent_present and consequent_present:
                self.n_support_trans += 1
                
            if antecedent_present and not consequent_present:
                self.n_oppose_trans += 1
                
                
    def computeSupport(self):
        ''' Compute the Support of an association rule A -> B
        
        Formula: n{A U B}/n
        Range: [0, 1]
        Intreprtation: Measure of popularity of the itemset, as measured by the proportion of transactions in which the 
        itemset {AUB} appears.
        '''
            
        return self.n_support_trans/self.n_transactions
    
    def computeConfidence(self):
        ''' Compute the confidence of an association rule A->B
        
        Formula: n{A U B}/n{A}
        Range: [0, 1]
        Intrepretation: How likey B is purchased, when A is purchased, as measured by the proportion of transactions with 
        items A in which items B also appears.
        '''
        
        return self.n_support_trans/self.n_antecedent_present_trans
    
    def computeLift(self):
        ''' Compute the lift (or interest) of an association rule A->B
        
        Formula: support{A U B}/(support{A}.support{B})
        Intrepretation: How likely item B is purchased, when item A is purchased, while controlling for how popular B 
        already is.
            
            Lift = 1 => No Association b/w items
            Lift > 1 => Item B is likely to be brought when item A is brought
            Lift < 1 => Item B is unlikely to be brought when item A is brought
            
        Drawbacks:
            - Rules that hold 100% of time, may not have highest possible lift.
            - Lift is symmetric, i.e., Lift(A->B) = Lift(B->A)
        '''
        
        return (self.n_support_trans/self.n_transactions)/((self.n_antecedent_present_trans/self.n_transactions)*(self.n_consequent_present_trans/self.n_transactions))
    
    def computeConviction(self):
        ''' Compute the conviction of the rule A->B or !(A & !B)
        
        Formula: ( support(A) . support(!B) )/support(A and !B)
        Range: [0, inf)
        Intrepretation: Measure of Implication
        '''
        
        return ((self.n_antecedent_present_trans/self.n_transactions)*(self.n_consequent_absent_trans/self.n_transactions))/(self.n_oppose_trans/self.n_transactions)
    
    def computeLeverage(self):
        ''' Compute the leverage (or Piatetsky-Shapiro) of the rule A->B
        
        Formula: Support(A,B) - Support(A).Support(B)
        Intrepretation: Is the 'proportion of additional elements' covered by both the premise and consequence 'above the 
        expected' if indepedent.
        '''
        
        return (self.n_support_trans/self.n_transactions) - (self.n_antecedent_present_trans/self.n_transactions)*(self.n_consequent_present_trans/self.n_transactions)
    
    def computeCoverage(self):
        ''' Compute the coverage of the rule  A->B
        
        Formula: support(A)
        Range: [0, 1]
        '''
        
        return self.n_antecedent_present_trans/self.n_transactions
    
    def computeCosineSimilarity(self):
        ''' Compute the cosine similarity of the rule A->B 
        
        Formula: Support(A,B)/sqrt(Support(A).Support(B))
        '''
        
        return (self.n_support_trans/self.n_transactions)/np.sqrt((self.n_antecedent_present_trans/self.n_transactions)*(self.n_consequent_present_trans/self.n_transactions))

In [6]:
for precedent, antecedent in association_rules:
    
    print("{} => {}".format(precedent, antecedent))
    
    arm_interest_measures = ARMInterestMeasures(transactions, precedent, antecedent)
    print("Support = {}".format(arm_interest_measures.computeSupport()))
    print("Confidence = {}".format(arm_interest_measures.computeConfidence()))
    print("Lift = {}".format(arm_interest_measures.computeLift()))
    print("Conviction = {}".format(arm_interest_measures.computeConviction()))
    print("Leverage = {}".format(arm_interest_measures.computeLeverage()))
    print("Coverage = {}".format(arm_interest_measures.computeCoverage()))
    print("Cosine Similarity = {}".format(arm_interest_measures.computeCosineSimilarity()))
    
    print()

{'mushroom cream sauce', 'pasta'} => {'escalope'}
Support = 0.0025329956005865884
Confidence = 0.95
Lift = 11.976386554621849
Conviction = 18.413544860685242
Leverage = 0.0023214964485648902
Coverage = 0.0026663111585121984
Cosine Similarity = 0.17417271443536014

{'olive oil', 'chocolate', 'frozen vegetables'} => {'mineral water'}
Support = 0.0027996267164378083
Confidence = 0.7
Lift = 2.9366331096196867
Conviction = 2.5387726080966986
Leverage = 0.001846280958921515
Coverage = 0.003999466737768298
Cosine Similarity = 0.09067235802639696

{'spaghetti', 'cooking oil', 'eggs'} => {'mineral water'}
Support = 0.0029329422743634183
Confidence = 0.7096774193548387
Lift = 2.9772317240383925
Conviction = 2.6233983616999215
Leverage = 0.0019478183249299153
Coverage = 0.0041327822956939075
Cosine Similarity = 0.09344543211954282

{'olive oil', 'eggs', 'milk'} => {'mineral water'}
Support = 0.0026663111585121984
Confidence = 0.7142857142857143
Lift = 2.996564397571109
Conviction = 2.665711238501