In [1]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd

In [2]:
# Sample dataset
data = {'Transaction': ['T1', 'T2', 'T3', 'T4', 'T5'],
        'Items': [['A', 'B', 'D'],
                  ['B', 'C'],
                  ['A', 'B', 'C', 'E'],
                  ['A', 'C', 'E'],
                  ['A', 'B', 'D']]}

df = pd.DataFrame(data)

In [3]:
# Transform the transaction data into a one-hot encoded DataFrame
onehot = df['Items'].str.join('|').str.get_dummies()
onehot = onehot.add_prefix('Item_')

In [None]:
# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(onehot, min_support=0.4, use_colnames=True)

In [5]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [6]:
# Display frequent itemsets and association rules
print("Frequent Itemsets:")
print(frequent_itemsets)
print("\nAssociation Rules:")
print(rules)

Frequent Itemsets:
    support                  itemsets
0       0.8                  (Item_A)
1       0.8                  (Item_B)
2       0.6                  (Item_C)
3       0.4                  (Item_D)
4       0.4                  (Item_E)
5       0.6          (Item_B, Item_A)
6       0.4          (Item_C, Item_A)
7       0.4          (Item_D, Item_A)
8       0.4          (Item_E, Item_A)
9       0.4          (Item_B, Item_C)
10      0.4          (Item_B, Item_D)
11      0.4          (Item_E, Item_C)
12      0.4  (Item_B, Item_D, Item_A)
13      0.4  (Item_E, Item_C, Item_A)

Association Rules:
         antecedents       consequents  antecedent support  \
0           (Item_D)          (Item_A)                 0.4   
1           (Item_A)          (Item_D)                 0.8   
2           (Item_E)          (Item_A)                 0.4   
3           (Item_A)          (Item_E)                 0.8   
4           (Item_B)          (Item_D)                 0.8   
5           (Item_D