 # **Association rule mining**

In [1]:
# Import the packages
import numpy as np

In [2]:
#load the transactions dataset
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Loading the data
def load_dataset(path_to_data):
    transactions = []
    with open(path_to_data, 'r') as fid:
        for line in fid:
            # Splitting each line to get items in the transaction
            transaction = line.strip().split(',')
            transactions.append(transaction)
    return transactions

# Path to your data file
path_to_data = "transactions_data.txt"
dataset = load_dataset(path_to_data)
print("Loaded Transactions:")
print(dataset)

Loaded Transactions:
[['Lime', 'Queso', 'Salsa', 'Salt', 'Tortilla chips'], ['Ranch dip', 'Salsa', 'Tortilla chips'], ['Queso', 'Tortilla chips'], ['Potato chips', 'Ranch dip'], ['Salsa', 'Tortilla chips'], ['Queso', 'Salsa', 'Tortilla chips'], ['Pita chips', 'Ranch dip'], ['Guacamole', 'Tortilla chips'], ['Guacamole', 'Queso', 'Salsa', 'Tortilla chips'], ['Pita chips', 'Salsa']]


In [3]:
# Transform the data to a format suitable for the apriori function
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
print("\nDataFrame after Transaction Encoding:")
print(df.head())

# Apply the apriori algorithm
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
print("\nFrequent Itemsets:")
print(frequent_itemsets)

# Generate the association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
print("\nAssociation Rules:")
print(rules)


DataFrame after Transaction Encoding:
   Guacamole   Lime  Pita chips  Potato chips  Queso  Ranch dip  Salsa   Salt  \
0      False   True       False         False   True      False   True   True   
1      False  False       False         False  False       True   True  False   
2      False  False       False         False   True      False  False  False   
3      False  False       False          True  False       True  False  False   
4      False  False       False         False  False      False   True  False   

   Tortilla chips  
0            True  
1            True  
2            True  
3           False  
4            True  

Frequent Itemsets:
    support                        itemsets
0       0.2                     (Guacamole)
1       0.2                    (Pita chips)
2       0.4                         (Queso)
3       0.3                     (Ranch dip)
4       0.6                         (Salsa)
5       0.7                (Tortilla chips)
6       0.2     (Tortilla 

  and should_run_async(code)
