In [89]:
#pip install mlxtend
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### 1. Data Prepration

In [90]:
df = pd.read_csv('transactions.csv')
df.head()

Unnamed: 0,OrderID,ProductName
0,SO51176,Road-250 Black
1,SO51176,Road Bottle Cage
2,SO51177,Touring-2000 Blue
3,SO51177,Sport-100 Helmet
4,SO51178,Mountain-200 Silver


### 2. Basket Ceation

In [91]:
# Creating the basket matrix
basket = (
    df
    .groupby(['OrderID', 'ProductName'])['ProductName']
    .count()
    .unstack()
    .fillna(0)
)

# One_hot encoding
basket = basket.map(lambda x: 1 if x > 0 else 0)

# Convert to boolean(Apriori needs boolean values)
basket = basket.astype(bool)

basket.head()

ProductName,AWC Logo Cap,All-Purpose Bike Stand,Bike Wash - Dissolver,Classic Vest,Fender Set - Mountain,HL Mountain Tire,HL Road Tire,Half-Finger Gloves,Hitch Rack - 4-Bike,Hydration Pack - 70 oz.,...,Sport-100 Helmet,Touring Tire,Touring Tire Tube,Touring-1000 Blue,Touring-1000 Yellow,Touring-2000 Blue,Touring-3000 Blue,Touring-3000 Yellow,Water Bottle - 30 oz.,Women's Mountain Shorts
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SO51176,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
SO51177,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,True,False,False,False,False
SO51178,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
SO51179,False,True,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
SO51180,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,True,False


### 3. Aprori

In [92]:
frequent_itemsets = apriori(
    basket,
    min_support=0.02,
    use_colnames=True
)

### 4. Association Rules

In [93]:
rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1
)

# Filter Rules
rules_filtered = rules[
    (rules["confidence"] >= 0.5) &
    (rules["lift"] >= 1.5) &
    (rules["support"] >= 0.01)
].copy()

rules_filtered = rules_filtered[[
    "antecedents",
    "consequents",
    "support",
    "confidence",
    "lift"
]]

# Format antecedents and consequents as strings
rules_filtered["antecedents"] = rules_filtered["antecedents"].apply(lambda x: ", ".join(map(str, x)))
rules_filtered["consequents"] = rules_filtered["consequents"].apply(lambda x: ", ".join(map(str, x)))

# Sort the filtered rules
rules_filtered = rules_filtered.sort_values(
    by=["confidence", "lift"],
    ascending=False
)

rules_filtered

Unnamed: 0,antecedents,consequents,support,confidence,lift
65,"Road-750 Black, Water Bottle - 30 oz.",Road Bottle Cage,0.026958,1.0,10.551906
70,"Sport-100 Helmet, Road Bottle Cage",Water Bottle - 30 oz.,0.021066,1.0,4.584862
52,Touring Tire,Touring Tire Tube,0.044911,0.940629,12.370504
40,Road Bottle Cage,Water Bottle - 30 oz.,0.084542,0.892082,4.090074
55,"Sport-100 Helmet, HL Mountain Tire",Mountain Tire Tube,0.0224,0.891593,5.420969
28,Mountain Bottle Cage,Water Bottle - 30 oz.,0.094047,0.868583,3.982334
64,"Road-750 Black, Road Bottle Cage",Water Bottle - 30 oz.,0.026958,0.867621,3.977922
60,"Sport-100 Helmet, Mountain Bottle Cage",Water Bottle - 30 oz.,0.026235,0.838366,3.843792
12,HL Road Tire,Road Tire Tube,0.033128,0.787318,6.20984
22,ML Mountain Tire,Mountain Tire Tube,0.043133,0.771372,4.690013


In [94]:
# rules_filtered.to_csv("market_basket_rules.csv", index=False)