In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, fpmax
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [89]:
def algorithm(df):
    #find best parameter settings
    min_sup = 0.2
    min_conf = 0.7
    min_lift = 1.3

    #generate itemsets
    frequent_itemsets = fpgrowth(df, min_support=min_sup, use_colnames=True) 

    #generate rules
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_conf)

    #prune misleading rules
    rules = rules[rules['confidence'] > rules['consequent support']]

#     #only positively correlated rules
#     rules = rules[rules['lift'] > min_lift]

#     testRules = rules.sort_values(by=['lift'], ascending=False)
#     testRules.index = range(0, testRules.shape[0])

    ruleSet = rules.loc[0:9]
    
    return ruleSet

### Preprocessing

In [87]:
#transactions have unequal lengths - the max length transaction is 49
rawDf = pd.read_csv("supermarket.csv", header=None, names = list(range(0,49)))

#remove nan rows and create a list of lists 
dataset = [rawDf.loc[rowIndex].dropna().tolist() for rowIndex in rawDf.index]

#create transaction encoder to encode the dataset in the correct format
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

### FP Tree and FP Growth

In [74]:
#timing 
%timeit -n 10 -r 10 algorithm(df)

600 ms ± 182 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [90]:
rules = algorithm(df)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(fruit),(bread and cake),0.640156,0.719689,0.502485,0.784943,1.09067,0.041773,1.303425
1,(vegetables),(bread and cake),0.639939,0.719689,0.49665,0.776089,1.078368,0.036093,1.251888
2,(fruit),(vegetables),0.640156,0.639939,0.476983,0.745105,1.164336,0.067322,1.412582
3,(vegetables),(fruit),0.639939,0.640156,0.476983,0.745356,1.164336,0.067322,1.413129
4,"(bread and cake, fruit)",(vegetables),0.502485,0.639939,0.387076,0.770323,1.203743,0.065516,1.567679
5,"(bread and cake, vegetables)",(fruit),0.49665,0.640156,0.387076,0.779373,1.217475,0.069143,1.631011
6,"(fruit, vegetables)",(bread and cake),0.476983,0.719689,0.387076,0.811509,1.127583,0.043797,1.487132
7,(bread and cake),(milk-cream),0.719689,0.635185,0.505079,0.701802,1.104878,0.047944,1.223398
8,(milk-cream),(bread and cake),0.635185,0.719689,0.505079,0.795168,1.104878,0.047944,1.368496
9,"(bread and cake, milk-cream)",(fruit),0.505079,0.640156,0.363951,0.720582,1.125636,0.040622,1.287835
