In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, fpmax
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
def algorithm(df):
    #find best parameter settings
    min_sup = 0.2
    min_conf = 0.7
    min_lift = 1.3

    #generate itemsets
    frequent_itemsets = fpgrowth(df, min_support=min_sup, use_colnames=True) 

    #generate rules
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_conf)

    #prune misleading rules
    rules = rules[rules['confidence'] > rules['consequent support']]

    #only positively correlated rules
    rules = rules[rules['lift'] > min_lift]

    testRules = rules.sort_values(by=['lift'], ascending=False)
    testRules.index = range(0, testRules.shape[0])

    ruleSet = testRules.loc[0:9]
    
    return ruleSet

### Preprocessing

In [2]:
#transactions have unequal lengths - the max length transaction is 49
rawDf = pd.read_csv("supermarket.csv", header=None, names = list(range(0,49)))

#remove nan rows and create a list of lists 
dataset = [rawDf.loc[rowIndex].dropna().tolist() for rowIndex in rawDf.index]

#create transaction encoder to encode the dataset in the correct format
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

### FP Tree and FP Growth

In [None]:
#timing 
%timeit -n 100 -r 10 algorithm(df)

In [71]:
rules = algorithm(df)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(total = high, fruit)","(bread and cake, vegetables)",0.268641,0.49665,0.202723,0.754626,1.519432,0.069303,2.051357
1,"(total = high, baking needs)",(tissues-paper prd),0.276637,0.485628,0.201643,0.728906,1.500956,0.0673,1.897396
2,"(total = high, vegetables)","(bread and cake, fruit)",0.274476,0.502485,0.202723,0.738583,1.469859,0.064803,1.903143
3,"(bread and cake, total = high)",(tissues-paper prd),0.305381,0.485628,0.216987,0.710545,1.463147,0.068686,1.777036
4,"(biscuits, vegetables, frozen foods)","(bread and cake, fruit)",0.278798,0.502485,0.200778,0.720155,1.433186,0.060686,1.777822
5,"(biscuits, fruit, frozen foods)","(bread and cake, vegetables)",0.282905,0.49665,0.200778,0.709702,1.428978,0.060273,1.733908
6,"(total = high, frozen foods)",(biscuits),0.275124,0.563,0.217419,0.790259,1.403658,0.062525,2.083525
7,"(biscuits, total = high)",(frozen foods),0.265399,0.587206,0.217419,0.819218,1.395113,0.061576,2.283386
8,"(bread and cake, party snack foods, frozen foods)",(biscuits),0.277285,0.563,0.215042,0.775526,1.377489,0.05893,1.946775
9,"(total = high, baking needs)",(biscuits),0.276637,0.563,0.213097,0.770312,1.368229,0.05735,1.902586
