# Practicumopdrachten Week 9.2 (eerste kans)

In [117]:
from loguru import logger # pip install loguru
import warnings
import pandas as pd
import sqlite3
from mlxtend.frequent_patterns import apriori, association_rules
warnings.simplefilter('ignore')

In [118]:
# SQLite Connecties
logger.info('Establishing connection with SQLite databases...')
conn = sqlite3.connect('../data/processed/United_Outdoors.sqlite3')
logger.success('Connections established!')

# Connect to the SQLite database
cursor = conn.cursor()

[32m2024-05-26 08:44:11.852[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mEstablishing connection with SQLite databases...[0m
[32m2024-05-26 08:44:11.852[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [32m[1mConnections established![0m


In [119]:
orders = pd.read_sql_query('''
    SELECT id, SalesOrderID, OrderID, Quantity, OrderQty, ModifiedDate, ProductID_AW, ProductID_NW
    FROM Order_Details
''', conn)

products = pd.read_sql_query('''
    SELECT Name, ProductID_AW, ProductID_NW
    FROM Product p 
''', conn)

In [120]:
minsup = 0.00000000001
minconf = 0.000001

In [122]:
orders['ModifiedDate'] = pd.to_datetime(orders['ModifiedDate'])
orders = orders.sort_values('ModifiedDate').drop_duplicates(['SalesOrderID', 'OrderID'], keep='last')
orders['Quantity'] = orders['Quantity'].combine_first(orders['OrderQty'])
ordered_products = pd.merge(orders, products, on=['ProductID_AW', 'ProductID_NW'], how='inner')
ordered_products = ordered_products[['id', 'Name', 'Quantity']]

ordered_products

Unnamed: 0,id,Name,Quantity
0,82758,"Road-350-W Yellow, 44",4.0
1,24185,"Mountain-200 Black, 38",1.0
2,24186,"Road-250 Red, 58",1.0
3,24187,"Road-650 Black, 52",1.0
4,24188,"Road-550-W Yellow, 40",1.0
...,...,...,...
32309,110264,Mountain Tire Tube,1.0
32310,110266,Water Bottle - 30 oz.,1.0
32311,110268,Road Tire Tube,1.0
32312,110301,"Mountain-200 Black, 46",1.0


In [123]:
# implement the apriori algorithm
basket = (ordered_products
          .groupby(['id', 'Name'])['Quantity']
          .count().unstack().reset_index().fillna(0)
          .set_index('id'))
basket.head()

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Baseball Cap,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,...,Visor,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [124]:
# hot encode the basket
def hot_encode(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
basket_sets = basket.applymap(hot_encode)
basket_sets

Name,AWC Logo Cap,Alice Mutton,All-Purpose Bike Stand,Aniseed Syrup,Baseball Cap,Bike Wash - Dissolver,Boston Crab Meat,Cable Lock,Camembert Pierrot,Carnarvon Tigers,...,Visor,Water Bottle - 30 oz.,Wimmers gute Semmelknödel,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Tights, L","Women's Tights, M","Women's Tights, S",Zaanse koeken
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
52,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124089,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124091,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124093,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124095,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [125]:
# generate frequent itemsets
frequent_itemsets = apriori(basket_sets, minsup, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.019352,(AWC Logo Cap)
1,0.000774,(Alice Mutton)
2,0.001610,(All-Purpose Bike Stand)
3,0.000248,(Aniseed Syrup)
4,0.006471,(Bike Wash - Dissolver)
...,...,...
333,0.007834,"(Women's Mountain Shorts, S)"
334,0.000774,"(Women's Tights, L)"
335,0.000186,"(Women's Tights, M)"
336,0.000867,"(Women's Tights, S)"


In [126]:
# generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
rules = rules[rules['confidence'] >= minconf]

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
