In [1]:
import kagglehub
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
path = kagglehub.dataset_download("ruchi798/shopping-cart-database")

In [3]:
customers = pd.read_csv(path + "/customers.csv")
orders = pd.read_csv(path + "/orders.csv")
products = pd.read_csv(path + "/products.csv")
sales = pd.read_csv(path + "/sales.csv")

In [4]:
basket = pd.merge(sales, products[['product_ID', 'product_name']], 
                 left_on='product_id', 
                 right_on='product_ID')

In [5]:
basket.head()

Unnamed: 0,sales_id,order_id,product_id,price_per_unit,quantity,total_price,product_ID,product_name
0,0,1,218,106,2,212,218,Chambray
1,1,1,481,118,1,118,481,Puffer
2,2,1,2,96,3,288,2,Oxford Cloth
3,3,1,1002,106,2,212,1002,Wool
4,4,1,691,113,3,339,691,Parka


In [6]:
basket[basket["order_id"] == 1]

Unnamed: 0,sales_id,order_id,product_id,price_per_unit,quantity,total_price,product_ID,product_name
0,0,1,218,106,2,212,218,Chambray
1,1,1,481,118,1,118,481,Puffer
2,2,1,2,96,3,288,2,Oxford Cloth
3,3,1,1002,106,2,212,1002,Wool
4,4,1,691,113,3,339,691,Parka
5,5,1,981,106,3,318,981,Wool


In [7]:
basket_sets = basket.groupby(['order_id', 'product_name'])['quantity'].sum().unstack().fillna(0).map(lambda x: 1 if x > 0 else 0)

In [8]:
basket_sets.head()

product_name,Bomber,Camp Collared,Cardigan,Cargo Pants,Casual Slim Fit,Chambray,Chinos,Coach,Cords,Cropped,...,Polo,Puffer,Pullover,Relaxed Leg,Shearling,Slim-Fit,Tracksuit Bottoms,Trench Coat,Windbreaker,Wool
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5,0,0,0,0,0,0,0,1,0,0,...,0,1,0,0,1,0,0,0,0,0


In [9]:
frequent_itemsets = apriori(basket_sets, min_support=0.01, use_colnames=True)



In [10]:
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.145015,(Bomber)
1,0.136959,(Camp Collared)
2,0.129909,(Cardigan)
3,0.128902,(Cargo Pants)
4,0.148036,(Casual Slim Fit)


In [11]:
rules = association_rules(frequent_itemsets, 10, metric="lift", min_threshold=1)

In [12]:
frequent_itemsets.sort_values('support', ascending=False).head(10)

Unnamed: 0,support,itemsets
11,0.238671,(Denim)
17,0.151057,(Joggers)
4,0.148036,(Casual Slim Fit)
30,0.146022,(Slim-Fit)
0,0.145015,(Bomber)
15,0.144008,(Henley)
29,0.13998,(Shearling)
28,0.13998,(Relaxed Leg)
12,0.138973,(Drawstring)
21,0.136959,(Oxford Cloth)


In [13]:
rules.sort_values('lift', ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
594,(Cropped),"(Denim, Oxford Cloth)",0.124874,0.038268,0.01007,0.080645,2.107385,1.0,0.005292,1.046095,0.60046,0.065789,0.044064,0.171902
591,"(Denim, Oxford Cloth)",(Cropped),0.038268,0.124874,0.01007,0.263158,2.107385,1.0,0.005292,1.187671,0.546387,0.065789,0.158016,0.171902
595,(Oxford Cloth),"(Denim, Cropped)",0.136959,0.038268,0.01007,0.073529,1.92144,1.0,0.004829,1.03806,0.555659,0.060976,0.036665,0.168344
590,"(Denim, Cropped)",(Oxford Cloth),0.038268,0.136959,0.01007,0.263158,1.92144,1.0,0.004829,1.17127,0.498639,0.060976,0.146226,0.168344
593,(Denim),"(Oxford Cloth, Cropped)",0.238671,0.022155,0.01007,0.042194,1.904488,1.0,0.004783,1.020922,0.62381,0.040161,0.020493,0.24837
592,"(Oxford Cloth, Cropped)",(Denim),0.022155,0.238671,0.01007,0.454545,1.904488,1.0,0.004783,1.39577,0.485685,0.040161,0.28355,0.24837
261,(Cords),(Mandarin Collar),0.119839,0.116818,0.025176,0.210084,1.798392,1.0,0.011177,1.118071,0.504394,0.119048,0.105603,0.212801
260,(Mandarin Collar),(Cords),0.116818,0.119839,0.025176,0.215517,1.798392,1.0,0.011177,1.121964,0.502668,0.119048,0.108706,0.212801
563,(Wool),(Puffer),0.118832,0.130916,0.025176,0.211864,1.618318,1.0,0.009619,1.102708,0.4336,0.112108,0.093142,0.202086
562,(Puffer),(Wool),0.130916,0.118832,0.025176,0.192308,1.618318,1.0,0.009619,1.09097,0.439629,0.112108,0.083385,0.202086


In [14]:
denim_rules = rules[
    rules['antecedents'].apply(lambda x: 'Denim' in str(x)) |
    rules['consequents'].apply(lambda x: 'Denim' in str(x))
]

In [15]:
denim_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
38,(Camp Collared),(Denim),0.136959,0.238671,0.040282,0.294118,1.232316,1.0,0.007594,1.07855,0.218436,0.12012,0.072829,0.231447
39,(Denim),(Camp Collared),0.238671,0.136959,0.040282,0.168776,1.232316,1.0,0.007594,1.038278,0.247619,0.12012,0.036867,0.231447
74,(Denim),(Cardigan),0.238671,0.129909,0.032226,0.135021,1.039348,1.0,0.00122,1.00591,0.049727,0.095808,0.005875,0.191542
75,(Cardigan),(Denim),0.129909,0.238671,0.032226,0.248062,1.039348,1.0,0.00122,1.012489,0.043511,0.095808,0.012335,0.191542
222,(Coach),(Denim),0.128902,0.238671,0.033233,0.257812,1.080202,1.0,0.002467,1.025791,0.085234,0.099398,0.025143,0.198527


In [16]:
denim_rules.sort_values('lift', ascending=False).head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
594,(Cropped),"(Denim, Oxford Cloth)",0.124874,0.038268,0.01007,0.080645,2.107385,1.0,0.005292,1.046095,0.60046,0.065789,0.044064,0.171902
591,"(Denim, Oxford Cloth)",(Cropped),0.038268,0.124874,0.01007,0.263158,2.107385,1.0,0.005292,1.187671,0.546387,0.065789,0.158016,0.171902
595,(Oxford Cloth),"(Denim, Cropped)",0.136959,0.038268,0.01007,0.073529,1.92144,1.0,0.004829,1.03806,0.555659,0.060976,0.036665,0.168344
590,"(Denim, Cropped)",(Oxford Cloth),0.038268,0.136959,0.01007,0.263158,1.92144,1.0,0.004829,1.17127,0.498639,0.060976,0.146226,0.168344
592,"(Oxford Cloth, Cropped)",(Denim),0.022155,0.238671,0.01007,0.454545,1.904488,1.0,0.004783,1.39577,0.485685,0.040161,0.28355,0.24837


In [17]:
print("If customer buys Denim, they also buy:")
denim_if = rules[rules['antecedents'].apply(lambda x: 'Denim' in str(x))]
print(denim_if.sort_values('lift', ascending=False)[['consequents', 'support', 'confidence', 'lift']])


If customer buys Denim, they also buy:
                 consequents   support  confidence      lift
591                (Cropped)  0.010070    0.263158  2.107385
590           (Oxford Cloth)  0.010070    0.263158  1.921440
593  (Oxford Cloth, Cropped)  0.010070    0.042194  1.904488
335                 (Puffer)  0.042296    0.177215  1.353651
282                (Cropped)  0.038268    0.160338  1.283993
339            (Relaxed Leg)  0.042296    0.177215  1.266005
39           (Camp Collared)  0.040282    0.168776  1.232316
323             (Drawstring)  0.040282    0.168776  1.214456
326           (High-Waisted)  0.037261    0.156118  1.192502
330           (Oxford Cloth)  0.038268    0.160338  1.170700
324                (Flannel)  0.037261    0.156118  1.148336
332                (Peacoat)  0.033233    0.139241  1.142693
252                  (Cords)  0.031219    0.130802  1.091480
223                  (Coach)  0.033233    0.139241  1.080202
343            (Trench Coat)  0.034240    0.1

In [18]:
print("Products that lead to buying Denim:")
denim_then = rules[rules['consequents'].apply(lambda x: 'Denim' in str(x))]
print(denim_then.sort_values('lift', ascending=False)[['antecedents', 'support', 'confidence', 'lift']])


Products that lead to buying Denim:
                 antecedents   support  confidence      lift
594                (Cropped)  0.010070    0.080645  2.107385
595           (Oxford Cloth)  0.010070    0.073529  1.921440
592  (Oxford Cloth, Cropped)  0.010070    0.454545  1.904488
334                 (Puffer)  0.042296    0.323077  1.353651
283                (Cropped)  0.038268    0.306452  1.283993
338            (Relaxed Leg)  0.042296    0.302158  1.266005
38           (Camp Collared)  0.040282    0.294118  1.232316
322             (Drawstring)  0.040282    0.289855  1.214456
327           (High-Waisted)  0.037261    0.284615  1.192502
331           (Oxford Cloth)  0.038268    0.279412  1.170700
325                (Flannel)  0.037261    0.274074  1.148336
333                (Peacoat)  0.033233    0.272727  1.142693
253                  (Cords)  0.031219    0.260504  1.091480
222                  (Coach)  0.033233    0.257812  1.080202
342            (Trench Coat)  0.034240    0.2556