# Practicumopdrachten Week 9.2 (eerste kans)

In [148]:
from loguru import logger # pip install loguru
import warnings
import pandas as pd
import sqlite3
from mlxtend.frequent_patterns import apriori, association_rules
warnings.simplefilter('ignore')

In [149]:
# SQLite Connecties
logger.info('Establishing connection with SQLite databases...')
conn = sqlite3.connect('../data/processed/United_Outdoors.sqlite3')
logger.success('Connections established!')

# Connect to the SQLite database
cursor = conn.cursor()

[32m2024-05-26 12:52:52.775[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mEstablishing connection with SQLite databases...[0m
[32m2024-05-26 12:52:52.775[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [32m[1mConnections established![0m


In [150]:
AW_ordered_products = pd.read_sql_query('''
    SELECT o.SalesOrderID, p.Name, o.OrderQty
    FROM Order_Details o
    INNER JOIN 'product' p ON p.ProductID_AW = o.ProductID_AW                             
''', conn)

NW_ordered_products = pd.read_sql_query('''
    SELECT o.OrderID, p.Name, o.Quantity
    FROM Order_Details o
    INNER JOIN 'product' p ON p.ProductID_NW = o.ProductID_NW                             
''', conn)

AW_ordered_products = AW_ordered_products.rename(columns={'SalesOrderID': 'OrderID', 'OrderQty': 'Quantity'})

ordered_products = pd.concat([AW_ordered_products, NW_ordered_products], ignore_index=True)

ordered_products = ordered_products.drop_duplicates()


In [151]:
minsup = 0.01
minconf = 0.01

In [152]:
# implement the apriori algorithm
basket = (ordered_products
          .groupby(['OrderID', 'Name'])['Quantity']
          .count().unstack().reset_index().fillna(0)
          .set_index('OrderID'))
basket.head()

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,Chai,...,Vegie-spread,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10248.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10249.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10252.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [153]:
# hot encode the basket
def hot_encode(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
basket_sets = basket.applymap(hot_encode)
basket_sets

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,Chai,...,Vegie-spread,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10248.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10249.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10250.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10251.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10252.0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75119.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75120.0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75121.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75122.0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [154]:
# Generate frequent itemsets
frequent_itemsets = apriori(basket_sets, min_support=minsup, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.104722,(AWC Logo Cap)
1,0.041090,(Bike Wash - Dissolver)
2,0.017185,"(Classic Vest, M)"
3,0.021118,"(Classic Vest, S)"
4,0.065676,(Fender Set - Mountain)
...,...,...
1853,0.010094,"(Long-Sleeve Logo Jersey, L, Water Bottle - 30..."
1854,0.010063,"(Long-Sleeve Logo Jersey, L, Water Bottle - 30..."
1855,0.010033,"(Long-Sleeve Logo Jersey, L, Water Bottle - 30..."
1856,0.010094,"(Long-Sleeve Logo Jersey, L, Water Bottle - 30..."


In [155]:
# generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
rules = rules[rules['confidence'] >= minconf]

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
9773,"(Classic Vest, M, Classic Vest, S, AWC Logo Cap)","(Short-Sleeve Classic Jersey, XL, Short-Sleeve...",0.010063,0.011736,0.010063,1.000000,85.211082,0.009945,inf,0.998311
13701,"(Bike Wash - Dissolver, Classic Vest, M, Class...","(Short-Sleeve Classic Jersey, XL, Short-Sleeve...",0.010094,0.011736,0.010094,1.000000,85.211082,0.009976,inf,0.998342
14993,"(Sport-100 Helmet, Black, Bike Wash - Dissolve...","(Short-Sleeve Classic Jersey, XL, Short-Sleeve...",0.010559,0.011736,0.010559,1.000000,85.211082,0.010435,inf,0.998811
15021,"(Bike Wash - Dissolver, Classic Vest, S, Sport...","(Short-Sleeve Classic Jersey, XL, Short-Sleeve...",0.010373,0.011736,0.010373,1.000000,85.211082,0.010251,inf,0.998623
16341,"(Classic Vest, M, Classic Vest, S, Hitch Rack ...","(Short-Sleeve Classic Jersey, XL, Short-Sleeve...",0.010002,0.011736,0.010002,1.000000,85.211082,0.009884,inf,0.998248
...,...,...,...,...,...,...,...,...,...,...
9881,(Water Bottle - 30 oz.),"(AWC Logo Cap, Short-Sleeve Classic Jersey, XL...",0.145162,0.010125,0.010002,0.068899,6.804598,0.008532,1.063123,0.997897
16422,(Water Bottle - 30 oz.),"(Long-Sleeve Logo Jersey, L, Classic Vest, M, ...",0.145162,0.010125,0.010002,0.068899,6.804598,0.008532,1.063123,0.997897
16452,(Water Bottle - 30 oz.),"(Long-Sleeve Logo Jersey, L, Short-Sleeve Clas...",0.145162,0.010125,0.010002,0.068899,6.804598,0.008532,1.063123,0.997897
33617,(Water Bottle - 30 oz.),"(Long-Sleeve Logo Jersey, L, Classic Vest, M, ...",0.145162,0.010125,0.010002,0.068899,6.804598,0.008532,1.063123,0.997897
