In [45]:
#pip install mlxtend
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### 1. Data Prepration

In [46]:
df = pd.read_csv('MarketBasketAnalysis.csv')
df.head()

Unnamed: 0,OrderID,ProductName
0,SO51176,Road-250 Black
1,SO51176,Road Bottle Cage
2,SO51177,Touring-2000 Blue
3,SO51177,Sport-100 Helmet
4,SO51178,Mountain-200 Silver


In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50730 entries, 0 to 50729
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   OrderID      50730 non-null  object
 1   ProductName  50730 non-null  object
dtypes: object(2)
memory usage: 792.8+ KB


### 2. Basket Ceation

In [48]:
basket = (
    df
    .groupby(['OrderID', 'ProductName'])['ProductName']
    .count()
    .unstack()
    .fillna(0)
)


In [49]:
basket = basket.map(lambda x: 1 if x > 0 else 0)
basket.head()

ProductName,AWC Logo Cap,All-Purpose Bike Stand,Bike Wash - Dissolver,Classic Vest,Fender Set - Mountain,HL Mountain Tire,HL Road Tire,Half-Finger Gloves,Hitch Rack - 4-Bike,Hydration Pack - 70 oz.,...,Sport-100 Helmet,Touring Tire,Touring Tire Tube,Touring-1000 Blue,Touring-1000 Yellow,Touring-2000 Blue,Touring-3000 Blue,Touring-3000 Yellow,Water Bottle - 30 oz.,Women's Mountain Shorts
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SO51176,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SO51177,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
SO51178,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
SO51179,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SO51180,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0


### 3. Aprori

In [50]:
frequent_itemsets = apriori(
    basket,
    min_support=0.02,
    use_colnames=True
)


frequent_itemsets.sort_values("support", ascending=False).head(10)



Unnamed: 0,support,itemsets
27,0.347952,(Sport-100 Helmet)
32,0.218109,(Water Bottle - 30 oz.)
18,0.16964,(Patch Kit/8 Patches)
14,0.164471,(Mountain Tire Tube)
21,0.126786,(Road Tire Tube)
0,0.111778,(AWC Logo Cap)
3,0.108943,(Fender Set - Mountain)
13,0.108276,(Mountain Bottle Cage)
20,0.09477,(Road Bottle Cage)
52,0.094047,"(Mountain Bottle Cage, Water Bottle - 30 oz.)"


In [51]:
rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1
)


In [52]:
# Filter Rules
rules_filtered = rules[
    (rules["confidence"] >= 0.5) &
    (rules["lift"] >= 1.5) &
    (rules["support"] >= 0.01)
].copy()

rules_filtered = rules_filtered[[
    "antecedents",
    "consequents",
    "support",
    "confidence",
    "lift"
]]

In [53]:
rules_filtered["antecedents"] = rules_filtered["antecedents"].apply(lambda x: ", ".join(map(str, x)))
rules_filtered["consequents"] = rules_filtered["consequents"].apply(lambda x: ", ".join(map(str, x)))

In [55]:
rules_filtered.sort_values(
    by=["confidence", "lift"],
    ascending=False
)

Unnamed: 0,antecedents,consequents,support,confidence,lift
65,"Road-750 Black, Water Bottle - 30 oz.",Road Bottle Cage,0.026958,1.0,10.551906
70,"Sport-100 Helmet, Road Bottle Cage",Water Bottle - 30 oz.,0.021066,1.0,4.584862
52,Touring Tire,Touring Tire Tube,0.044911,0.940629,12.370504
40,Road Bottle Cage,Water Bottle - 30 oz.,0.084542,0.892082,4.090074
55,"Sport-100 Helmet, HL Mountain Tire",Mountain Tire Tube,0.0224,0.891593,5.420969
28,Mountain Bottle Cage,Water Bottle - 30 oz.,0.094047,0.868583,3.982334
64,"Road-750 Black, Road Bottle Cage",Water Bottle - 30 oz.,0.026958,0.867621,3.977922
60,"Sport-100 Helmet, Mountain Bottle Cage",Water Bottle - 30 oz.,0.026235,0.838366,3.843792
12,HL Road Tire,Road Tire Tube,0.033128,0.787318,6.20984
22,ML Mountain Tire,Mountain Tire Tube,0.043133,0.771372,4.690013
