In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)


In [2]:
from src.data_cleaning import load_and_clean_data
from src.transaction_encoder import create_basket, one_hot_encode
from src.apriori_analysis import run_apriori
from src.association_rules import generate_rules


In [3]:
data_path = os.path.join(project_root, "data", "Online_Retail.csv")

df = load_and_clean_data(data_path)
basket = create_basket(df)
basket = basket[basket.apply(len) > 1]
encoded_df = one_hot_encode(basket)
frequent_itemsets = run_apriori(encoded_df, min_support=0.02)


In [4]:
rules = generate_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1.2
)

rules.head()


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(wooden star christmas scandinavian),(wooden heart christmas scandinavian),0.027844,0.029316,0.021142,0.759295,25.900727,1.0,0.020326,4.032681,0.988927,0.586989,0.752026,0.740243
1,(wooden heart christmas scandinavian),(wooden star christmas scandinavian),0.029316,0.027844,0.021142,0.72119,25.900727,1.0,0.020326,3.486798,0.990426,0.586989,0.713204,0.740243
2,"(roses regency teacup and saucer, green regenc...",(pink regency teacup and saucer),0.041848,0.041467,0.029534,0.705729,17.019109,1.0,0.027798,3.257316,0.982352,0.549139,0.692999,0.708975
3,(pink regency teacup and saucer),"(roses regency teacup and saucer, green regenc...",0.041467,0.041848,0.029534,0.712221,17.019109,1.0,0.027798,3.329468,0.981961,0.549139,0.699652,0.708975
4,"(pink regency teacup and saucer, roses regency...",(green regency teacup and saucer),0.032639,0.055307,0.029534,0.904841,16.360246,1.0,0.027728,9.92756,0.970555,0.505597,0.89927,0.719416


In [5]:
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(wooden star christmas scandinavian),(wooden heart christmas scandinavian),0.021142,0.759295,25.900727
1,(wooden heart christmas scandinavian),(wooden star christmas scandinavian),0.021142,0.72119,25.900727
2,"(roses regency teacup and saucer, green regenc...",(pink regency teacup and saucer),0.029534,0.705729,17.019109
3,(pink regency teacup and saucer),"(roses regency teacup and saucer, green regenc...",0.029534,0.712221,17.019109
4,"(pink regency teacup and saucer, roses regency...",(green regency teacup and saucer),0.029534,0.904841,16.360246
5,(green regency teacup and saucer),"(pink regency teacup and saucer, roses regency...",0.029534,0.53399,16.360246
6,(pink regency teacup and saucer),(green regency teacup and saucer),0.034492,0.8318,15.039604
7,(green regency teacup and saucer),(pink regency teacup and saucer),0.034492,0.623645,15.039604
8,"(pink regency teacup and saucer, green regency...",(roses regency teacup and saucer),0.029534,0.85624,14.782426
9,(roses regency teacup and saucer),"(pink regency teacup and saucer, green regency...",0.029534,0.509878,14.782426


In [6]:
high_conf_rules = rules[
    (rules['confidence'] >= 0.6) &
    (rules['lift'] >= 1.5)
]

high_conf_rules.head()


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(wooden star christmas scandinavian),(wooden heart christmas scandinavian),0.027844,0.029316,0.021142,0.759295,25.900727,1.0,0.020326,4.032681,0.988927,0.586989,0.752026,0.740243
1,(wooden heart christmas scandinavian),(wooden star christmas scandinavian),0.029316,0.027844,0.021142,0.72119,25.900727,1.0,0.020326,3.486798,0.990426,0.586989,0.713204,0.740243
2,"(roses regency teacup and saucer, green regenc...",(pink regency teacup and saucer),0.041848,0.041467,0.029534,0.705729,17.019109,1.0,0.027798,3.257316,0.982352,0.549139,0.692999,0.708975
3,(pink regency teacup and saucer),"(roses regency teacup and saucer, green regenc...",0.041467,0.041848,0.029534,0.712221,17.019109,1.0,0.027798,3.329468,0.981961,0.549139,0.699652,0.708975
4,"(pink regency teacup and saucer, roses regency...",(green regency teacup and saucer),0.032639,0.055307,0.029534,0.904841,16.360246,1.0,0.027728,9.92756,0.970555,0.505597,0.89927,0.719416
