## Imports

In [None]:
import numpy as np
from apriori import A_Priori
from time import time

In [None]:
# Reading data from sale_data.dat file

with open('sale_data.dat') as f:
    data: list[set[int]] = [{int(i) for i in b.split()} for b in f.readlines()]

n_baskets = len(data)
print(f'The number of baskets is: {n_baskets}')

print(data[:4])

## Find frequent itemsets

In [None]:
s = 500
c = 0.5
k = 3

start = time()
frequent_itemsets = A_Priori.get_frequent_itemsets(data, k, s)
delta_t = time() - start

print(f'The itemsets until k={k} have been found in {delta_t} seconds')

## Mine frequent rules

In [None]:
start = time()
rules = A_Priori.mine_frequent_rules(frequent_itemsets, c)
delta_t = time() - start

print(f'The rules have been found in {delta_t} seconds')

## Check the results with existing library

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)  # type: ignore
ml_fp = fpgrowth(df, min_support=s/n_baskets, use_colnames=True, max_len=k)
ml_rules = association_rules(ml_fp, metric="confidence", min_threshold=c)

print(f'The number of frequent itemsets found by the mlxtend library is: {len(ml_fp)}')
print(f'The number of frequent itemsets found by the apriori algorithm is: {np.sum([len(v) for v in frequent_itemsets.values()])}\n')

print(f'The number of rules found by the mlxtend library is: {len(ml_rules)}')
print(f'The number of rules found by the apriori algorithm is: {np.sum([len(rules[x]) for x in rules])} \n')

## Some plots

In [None]:
A_Priori.some_plots(data)