# Frequent itemsets

In [49]:
from loguru import logger # pip install loguru
import warnings
import pandas as pd
import sqlite3
from mlxtend.frequent_patterns import apriori, association_rules
warnings.simplefilter('ignore')

In [50]:
# SQLite Connecties
logger.info('Establishing connection with SQLite databases...')
conn = sqlite3.connect('../data/processed/United_Outdoors.sqlite3')
logger.success('Connections established!')

# Connect to the SQLite database
cursor = conn.cursor()

[32m2024-05-26 14:10:02.402[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mEstablishing connection with SQLite databases...[0m
[32m2024-05-26 14:10:02.404[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [32m[1mConnections established![0m


In [51]:
AW_ordered_products = pd.read_sql_query('''
    SELECT o.SalesOrderID, p.Name, o.OrderQty
    FROM Order_Details o
    INNER JOIN 'product' p ON p.ProductID_AW = o.ProductID_AW                             
''', conn)

NW_ordered_products = pd.read_sql_query('''
    SELECT o.OrderID, p.Name, o.Quantity
    FROM Order_Details o
    INNER JOIN 'product' p ON p.ProductID_NW = o.ProductID_NW                             
''', conn)

AW_ordered_products = AW_ordered_products.rename(columns={'SalesOrderID': 'OrderID', 'OrderQty': 'Quantity'})

ordered_products = pd.concat([AW_ordered_products, NW_ordered_products], ignore_index=True)

ordered_products = ordered_products.drop_duplicates()


In [52]:
minsup = 0.025
minconf = 0.5

In [53]:
# implement the apriori algorithm
basket = (ordered_products
          .groupby(['OrderID', 'Name'])['Quantity']
          .count().unstack().reset_index().fillna(0)
          .set_index('OrderID'))
basket.head()

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,Chai,...,Vegie-spread,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10248.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10249.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10252.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
# hot encode the basket
def hot_encode(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
basket_sets = basket.applymap(hot_encode)
basket_sets

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,Chai,...,Vegie-spread,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10248.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10249.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10250.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10251.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10252.0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75119.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75120.0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75121.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75122.0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [55]:
# Generate frequent itemsets
frequent_itemsets = apriori(basket_sets, min_support=minsup, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.104722,(AWC Logo Cap)
1,0.04109,(Bike Wash - Dissolver)
2,0.065676,(Fender Set - Mountain)
3,0.043227,(HL Mountain Tire)
4,0.026568,(HL Road Tire)
5,0.033627,"(Half-Finger Gloves, M)"
6,0.027342,"(Half-Finger Gloves, S)"
7,0.033256,(Hydration Pack - 70 oz.)
8,0.026691,(LL Mountain Tire)
9,0.032327,(LL Road Tire)


In [56]:
# generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
rules = rules[rules['confidence'] >= minconf]

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
27,"(Long-Sleeve Logo Jersey, L, Sport-100 Helmet,...",(AWC Logo Cap),0.027249,0.104722,0.02536,0.930682,8.887158,0.022506,12.915485,0.912338
33,"(Long-Sleeve Logo Jersey, L, Sport-100 Helmet,...",(AWC Logo Cap),0.028023,0.104722,0.025701,0.917127,8.757723,0.022766,10.80302,0.911354
21,(Road Bottle Cage),(Water Bottle - 30 oz.),0.053011,0.145162,0.047097,0.888435,6.120306,0.039402,7.662215,0.883442
25,(Touring Tire),(Touring Tire Tube),0.028952,0.046075,0.025019,0.864171,18.755649,0.023685,7.022989,0.974908
18,(Mountain Bottle Cage),(Water Bottle - 30 oz.),0.062703,0.145162,0.052392,0.835556,5.75603,0.04329,5.19834,0.881545
28,"(AWC Logo Cap, Sport-100 Helmet, Black)","(Long-Sleeve Logo Jersey, L)",0.030593,0.050627,0.02536,0.828947,16.373612,0.023811,5.55018,0.968557
34,"(AWC Logo Cap, Sport-100 Helmet, Blue)","(Long-Sleeve Logo Jersey, L)",0.031305,0.050627,0.025701,0.820969,16.216027,0.024116,5.302851,0.968657
0,"(Long-Sleeve Logo Jersey, L)",(AWC Logo Cap),0.050627,0.104722,0.03629,0.71682,6.84497,0.030989,3.161511,0.899443
32,"(Long-Sleeve Logo Jersey, L, AWC Logo Cap)","(Sport-100 Helmet, Blue)",0.03629,0.09568,0.025701,0.708191,7.401629,0.022228,3.099013,0.897464
26,"(Long-Sleeve Logo Jersey, L, AWC Logo Cap)","(Sport-100 Helmet, Black)",0.03629,0.09311,0.02536,0.698805,7.505129,0.021981,3.010976,0.899397
