# Compare Apriori vs FP-Growth (parameter sensitivity)

In [None]:
basket_path="data/processed/basket_bool.parquet"
support_grid=[0.03,0.02,0.015,0.01,0.0075,0.005]
min_confidence=0.3
min_lift=1.0
output_metrics_path="data/processed/compare_metrics.csv"

In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.apriori_library import AssociationRulesMiner, FPGrowthMiner

In [None]:
basket_bool = pd.read_parquet(basket_path)
ap = AssociationRulesMiner(); fp = FPGrowthMiner()
rows=[]
for ms in support_grid:
    t0=time.perf_counter(); fi_a, r_a = ap.run(basket_bool, ms, min_confidence, min_lift); t1=time.perf_counter()
    t2=time.perf_counter(); fi_f, r_f = fp.run(basket_bool, ms, min_confidence, min_lift); t3=time.perf_counter()
    rows += [
        {"algo":"apriori","min_support":ms,"time_sec":t1-t0,"n_itemsets":len(fi_a),"n_rules":len(r_a),"avg_len": float(fi_a["length"].mean()) if len(fi_a) else np.nan},
        {"algo":"fpgrowth","min_support":ms,"time_sec":t3-t2,"n_itemsets":len(fi_f),"n_rules":len(r_f),"avg_len": float(fi_f["length"].mean()) if len(fi_f) else np.nan},
    ]
metrics=pd.DataFrame(rows)
metrics


In [None]:
metrics.to_csv(output_metrics_path, index=False)

# Runtime vs min_support
plt.figure(figsize=(6,4))
for algo, d in metrics.groupby("algo"):
    plt.plot(d["min_support"], d["time_sec"], marker="o", label=algo)
plt.gca().invert_xaxis()
plt.title("Runtime vs min_support")
plt.xlabel("min_support")
plt.ylabel("time (sec)")
plt.legend()
plt.show()

# #Rules vs min_support
plt.figure(figsize=(6,4))
for algo, d in metrics.groupby("algo"):
    plt.plot(d["min_support"], d["n_rules"], marker="o", label=algo)
plt.gca().invert_xaxis()
plt.title("Number of rules vs min_support")
plt.xlabel("min_support")
plt.ylabel("#rules")
plt.legend()
plt.show()
