In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [8]:
data = pd.read_excel('data/dataPreprocessed.xlsx')
grouped = data.groupby('Invoice')
unique_products = data['StockCode'].unique().tolist()
product_index_dict = {product: index for index, product in enumerate(unique_products)}

transaction_list = []

for name, group in grouped:
    transaction = [0] * len(unique_products)
    for index, row in group.iterrows():
        product_index = product_index_dict[row['StockCode']]
        transaction[product_index] = 1
    transaction_list.append(transaction)

transaction_df = pd.DataFrame(transaction_list, columns=unique_products)

In [10]:
frequent_itemsets = apriori(transaction_df, min_support=0.02, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='support', min_threshold=0.01)
top_rules = rules.sort_values(by='support', ascending=False).head(5)
print(top_rules)



   antecedents consequents  antecedent support  consequent support   support  \
10     (21733)    (85123A)            0.053127            0.161464  0.038589   
11    (85123A)     (21733)            0.161464            0.053127  0.038589   
0      (21232)     (21231)            0.071352            0.043185  0.033244   
1      (21231)     (21232)            0.043185            0.071352  0.033244   
21    (82494L)     (82482)            0.053928            0.043720  0.029610   

    confidence       lift  leverage  conviction  zhangs_metric  
10    0.726358   4.498564  0.030011    3.064354       0.821342  
11    0.238994   4.498564  0.030011    1.244239       0.927458  
0     0.465918  10.788760  0.030163    1.791511       0.977024  
1     0.769802  10.788760  0.030163    4.034126       0.948262  
21    0.549058  12.558538  0.027252    2.120630       0.972836  
