In [7]:
import pandas as pd
from itertools import combinations

# Step 1: Create a sample transaction dataset (this represents purchases made in each transaction)
data = {
    'Milk': [1, 1, 1, 0, 1],
    'Bread': [1, 1, 0, 1, 1],
    'Butter': [0, 1, 1, 1, 1],
    'Cheese': [0, 0, 1, 1, 1],
    'Eggs': [1, 0, 1, 1, 1]
}

# Convert the data dictionary to a pandas DataFrame
df = pd.DataFrame(data)

# Step 2: Convert the dataset to a list of transactions
transactions = []
for _, row in df.iterrows():
    transactions.append([item for item in df.columns if row[item] == 1])

# Helper function to calculate support
def calculate_support(itemset, transactions):
    count = 0
    for transaction in transactions:
        if set(itemset).issubset(transaction):
            count += 1
    return count / len(transactions)

# Step 3: Generate frequent itemsets using the Apriori algorithm
def apriori(transactions, min_support):
    itemsets = []
    # Start with single itemsets
    items = list(df.columns)
    single_itemsets = [[item] for item in items]
    
    # Check support for single itemsets
    frequent_itemsets = []
    for itemset in single_itemsets:
        support = calculate_support(itemset, transactions)
        if support >= min_support:
            frequent_itemsets.append((itemset, support))
    
    itemsets.append(frequent_itemsets)
    
    # Generate higher-order itemsets
    k = 2
    while True:
        candidate_itemsets = []
        # Generate candidate itemsets of length k by combining frequent itemsets of length k-1
        for i in range(len(frequent_itemsets)):
            for j in range(i + 1, len(frequent_itemsets)):
                candidate = list(set(frequent_itemsets[i][0]) | set(frequent_itemsets[j][0]))
                if len(candidate) == k and candidate not in candidate_itemsets:
                    candidate_itemsets.append(candidate)
        
        # Check support for candidate itemsets
        new_frequent_itemsets = []
        for itemset in candidate_itemsets:
            support = calculate_support(itemset, transactions)
            if support >= min_support:
                new_frequent_itemsets.append((itemset, support))
        
        if not new_frequent_itemsets:
            break
        
        itemsets.append(new_frequent_itemsets)
        frequent_itemsets = new_frequent_itemsets
        k += 1

    return itemsets

# Task 1: Apply the Apriori algorithm with min_support=0.07 (7%)
min_support_7 = 0.07
frequent_itemsets_7 = apriori(transactions, min_support_7)

print("Frequent Itemsets with min_support=0.07 (7%):")
for level, itemsets in enumerate(frequent_itemsets_7):
    print(f"Level {level + 1}:")
    for itemset, support in itemsets:
        print(f"Itemset: {itemset}, Support: {support:.4f}")

# Task 2: Apply the Apriori algorithm with min_support=0.05 (5%)
min_support_5 = 0.05
frequent_itemsets_5 = apriori(transactions, min_support_5)

print("\nFrequent Itemsets with min_support=0.05 (5%):")
for level, itemsets in enumerate(frequent_itemsets_5):
    print(f"Level {level + 1}:")
    for itemset, support in itemsets:
        print(f"Itemset: {itemset}, Support: {support:.4f}")


Frequent Itemsets with min_support=0.07 (7%):
Level 1:
Itemset: ['Milk'], Support: 0.8000
Itemset: ['Bread'], Support: 0.8000
Itemset: ['Butter'], Support: 0.8000
Itemset: ['Cheese'], Support: 0.6000
Itemset: ['Eggs'], Support: 0.8000
Level 2:
Itemset: ['Bread', 'Milk'], Support: 0.6000
Itemset: ['Butter', 'Milk'], Support: 0.6000
Itemset: ['Cheese', 'Milk'], Support: 0.4000
Itemset: ['Eggs', 'Milk'], Support: 0.6000
Itemset: ['Bread', 'Butter'], Support: 0.6000
Itemset: ['Cheese', 'Bread'], Support: 0.4000
Itemset: ['Eggs', 'Bread'], Support: 0.6000
Itemset: ['Cheese', 'Butter'], Support: 0.6000
Itemset: ['Eggs', 'Butter'], Support: 0.6000
Itemset: ['Cheese', 'Eggs'], Support: 0.6000
Level 3:
Itemset: ['Bread', 'Butter', 'Milk'], Support: 0.4000
Itemset: ['Cheese', 'Bread', 'Milk'], Support: 0.2000
Itemset: ['Eggs', 'Bread', 'Milk'], Support: 0.4000
Itemset: ['Cheese', 'Butter', 'Milk'], Support: 0.4000
Itemset: ['Eggs', 'Butter', 'Milk'], Support: 0.4000
Itemset: ['Cheese', 'Eggs', '