In [1]:
import time
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth
from mlxtend.frequent_patterns import association_rules


In [14]:
with open("supermarket.csv") as f:
    content = f.readlines()

transaction_list = []

for line in content: 
    # print(line)
    items_purchased = [i.strip() for i in line.split(',')]
    # print(items_purchased)
    transaction_list.append(items_purchased)

# print(transaction_list[:5])  # Sanity check of the result

In [81]:
te = TransactionEncoder()
te_transactions = te.fit(transaction_list).transform(transaction_list)
df = pd.DataFrame(te_transactions, columns=te.columns_)
print(len(te.columns_))
# print(df)

124


In [82]:
def timer(fun):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = fun(*args, **kwargs)
        print("--- %s seconds ---" % (round(time.time() - start_time, 3)))
        return res
    return wrapper

@timer
def search_fp_growth(data_frame, itemsets_needed, treshold=0.01):
    hi = 1
    lo = 0
    itemsets = []
    while((hi-lo)>treshold):
        mid = (lo+hi)/2
        itemsets = fpgrowth(df=data_frame, min_support=mid, use_colnames=True)
        if len(itemsets)<itemsets_needed:
            hi=mid
        else:
            lo=mid
    print("Final min support=",mid)
    return itemsets

In [104]:
wanted_itemset_length = 50
print(wanted_itemset_length)
final_itemsets = search_fp_growth(df, itemsets_needed=wanted_itemset_length)
final_itemsets

50
Final min support= 0.3671875
--- 1.576 seconds ---


Unnamed: 0,support,itemsets
0,0.719689,(bread and cake)
1,0.640156,(fruit)
2,0.639939,(vegetables)
3,0.635185,(milk-cream)
4,0.604063,(baking needs)
5,0.587206,(frozen foods)
6,0.563,(biscuits)
7,0.53231,(juice-sat-cord-ms)
8,0.503566,(party snack foods)
9,0.494489,(margarine)


In [117]:
@timer
def search_association_rules(frequent_itemsets, rules_needed, arguments, treshold=0.01):
    hi = 100
    lo = arguments['min_threshold']
    rules = []
    final_min_tresh = hi
    while((hi-lo)>treshold):
        mid = (lo+hi)/2
        arguments['min_threshold'] = mid
        rules = association_rules(frequent_itemsets, **arguments)
        if len(rules)<rules_needed:
            hi=mid
        else:
            print(final_min_tresh)
            final_min_tresh=min(final_min_tresh, mid)
            lo=mid
    print("Final min ",arguments['metric'],"=",final_min_tresh)
    print(len(rules))
    arguments['min_threshold'] =final_min_tresh
    rules = association_rules(frequent_itemsets, **arguments)
    return rules

In [119]:
association_rules_arguments = dict(
    metric="lift", 
    min_threshold=1
)
final_rules = search_association_rules(final_itemsets, 40, association_rules_arguments)
print(final_rules.sort_values('lift', ascending=False)[['antecedents', 'consequents', 'lift', 'confidence' ]])

100
1.04833984375
Final min  lift = 1.04833984375
38
--- 0.052 seconds ---
                     antecedents                   consequents      lift  \
7   (vegetables, bread and cake)                       (fruit)  1.217475   
10                       (fruit)  (vegetables, bread and cake)  1.217475   
8        (fruit, bread and cake)                  (vegetables)  1.203743   
9                   (vegetables)       (fruit, bread and cake)  1.203743   
40                (frozen foods)                    (biscuits)  1.183261   
41                    (biscuits)                (frozen foods)  1.183261   
4                   (vegetables)                       (fruit)  1.164336   
5                        (fruit)                  (vegetables)  1.164336   
6            (vegetables, fruit)              (bread and cake)  1.127583   
11              (bread and cake)           (vegetables, fruit)  1.127583   
45                    (biscuits)                (baking needs)  1.121008   
44           

In [79]:
@timer
def start_association_rules(frequent_itemsets, arguments):
    rules = association_rules(frequent_itemsets, **arguments)
    return rules

In [80]:
association_rules_arguments = dict(
    metric="lift", 
    min_threshold=1
)
rules = start_association_rules(final_itemsets, association_rules_arguments)
print(rules.sort_values('lift', ascending=False)[:20][['antecedents', 'consequents', 'lift', 'confidence' ]])

--- 0.52 seconds ---
                     antecedents                   consequents      lift  \
3   (vegetables, bread and cake)                       (fruit)  1.217475   
6                        (fruit)  (vegetables, bread and cake)  1.217475   
4        (fruit, bread and cake)                  (vegetables)  1.203743   
5                   (vegetables)       (fruit, bread and cake)  1.203743   
35                (baking needs)                   (margarine)  1.190221   
34                   (margarine)                (baking needs)  1.190221   
29                    (biscuits)                (frozen foods)  1.183261   
28                (frozen foods)                    (biscuits)  1.183261   
1                        (fruit)                  (vegetables)  1.164336   
0                   (vegetables)                       (fruit)  1.164336   
10           (milk-cream, fruit)              (bread and cake)  1.148136   
15              (bread and cake)           (milk-cream, fruit)  1.1