In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth

In [2]:
df1 = pd.read_csv("Superstore.csv", usecols=["Order ID", "Sub-Category"])
# Group by 'Order ID' and aggregate 'category' values into a list
grouped = df1.groupby("Order ID")["Sub-Category"].apply(list).reset_index()

# Convert the grouped DataFrame to the desired list format
dataset = grouped["Sub-Category"].tolist()

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = fpgrowth(df, min_support=0.01, use_colnames=True)


In [3]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.163145,(Phones)
1,0.262292,(Binders)
2,0.061357,(Tables)
3,0.236286,(Paper)
4,0.043072,(Fasteners)
...,...,...
62,0.016050,"(Appliances, Furnishings)"
63,0.011987,"(Appliances, Accessories)"
64,0.011987,"(Art, Appliances)"
65,0.010971,"(Appliances, Storage)"


In [4]:
from mlxtend.frequent_patterns import association_rules

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.1)

In [5]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Paper),(Phones),0.236286,0.163145,0.034336,0.145314,0.890703,-0.004213,0.979137,-0.138431
1,(Phones),(Paper),0.163145,0.236286,0.034336,0.210461,0.890703,-0.004213,0.967291,-0.127879
2,(Furnishings),(Phones),0.173710,0.163145,0.029256,0.168421,1.032339,0.000916,1.006345,0.037912
3,(Phones),(Furnishings),0.163145,0.173710,0.029256,0.179328,1.032339,0.000916,1.006845,0.037433
4,(Binders),(Phones),0.262292,0.163145,0.039618,0.151046,0.925837,-0.003174,0.985748,-0.097949
...,...,...,...,...,...,...,...,...,...,...
103,(Appliances),(Art),0.090207,0.146282,0.011987,0.132883,0.908402,-0.001209,0.984548,-0.099774
104,(Appliances),(Storage),0.090207,0.155221,0.010971,0.121622,0.783536,-0.003031,0.961748,-0.232927
105,(Storage),(Appliances),0.155221,0.090207,0.010971,0.070681,0.783536,-0.003031,0.978988,-0.246436
106,(Bookcases),(Binders),0.045104,0.262292,0.011174,0.247748,0.944550,-0.000656,0.980666,-0.057917


In [6]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules[
    # (rules["antecedent_len"] >= 2)
    (rules["confidence"] > 0.25) 
    # & (rules["lift"] > 1.25)
]


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
7,"(Paper, Phones)",(Binders),0.034336,0.262292,0.010768,0.313609,1.195651,0.001762,1.074765,0.169454,2
8,"(Binders, Phones)",(Paper),0.039618,0.236286,0.010768,0.271795,1.150279,0.001407,1.048762,0.136035,2
21,(Fasteners),(Paper),0.043072,0.236286,0.011581,0.268868,1.137892,0.001403,1.044564,0.126636,1
22,(Fasteners),(Binders),0.043072,0.262292,0.012393,0.287736,1.097007,0.001096,1.035723,0.092409,1
80,(Storage),(Binders),0.155221,0.262292,0.039618,0.255236,0.973098,-0.001095,0.990526,-0.031688,1
86,"(Paper, Storage)",(Binders),0.035351,0.262292,0.010565,0.298851,1.139382,0.001292,1.052141,0.126815,2
88,"(Storage, Binders)",(Paper),0.039618,0.236286,0.010565,0.266667,1.128576,0.001204,1.041428,0.118627,2
92,(Appliances),(Binders),0.090207,0.262292,0.025599,0.283784,1.081939,0.001939,1.030008,0.083243,1
