In [3]:
def generate_candidates(freq_itemsets, k):
    # Initialize an empty set to store candidate itemsets of length k
    candidates = set()

    # Iterate over each pair of itemsets in the list of frequent itemsets
    for i in range(len(freq_itemsets)):
        for j in range(i + 1, len(freq_itemsets)):
            # Extract the first k-2 items from both itemsets
            l1 = list(freq_itemsets[i])[:k-2]
            l2 = list(freq_itemsets[j])[:k-2]

            # If the first k-2 items of both itemsets are the same, merge them
            if l1 == l2:
                # Create a candidate itemset by combining the two itemsets
                candidate = frozenset(freq_itemsets[i]) | frozenset(freq_itemsets[j])
                
                # Add the candidate itemset to the set of candidates
                candidates.add(candidate)

    return candidates

def scan_dataset(dataset, candidates):
    # Initialize a dictionary to count occurrences of each candidate itemset
    itemset_counts = {itemset: 0 for itemset in candidates}

    # Iterate over each transaction in the dataset
    for transaction in dataset:
        # Check each candidate itemset
        for candidate in candidates:
            # If the candidate itemset is a subset of the transaction, count it
            if candidate.issubset(transaction):
                itemset_counts[candidate] += 1

    return itemset_counts

def filter_itemsets(itemset_counts, min_support, total_transactions):
    # Filter itemsets based on minimum support threshold
    return {itemset: count for itemset, count in itemset_counts.items() if count / total_transactions >= min_support}

def apriori(dataset, min_support):
    # Initialize a dictionary to store frequent itemsets
    freq_itemsets = {}

    # Generate a set of single-item itemsets
    one_itemsets = {frozenset([item]) for transaction in dataset for item in transaction}
    
    # Scan the dataset for itemset counts
    itemset_counts = scan_dataset(dataset, one_itemsets)
    total_transactions = len(dataset)
    
    # Filter itemsets based on minimum support and update the frequent itemsets
    freq_itemsets.update(filter_itemsets(itemset_counts, min_support, total_transactions))
    
    # Initialize the list of frequent itemsets of length k
    freq_itemsets_list = [itemset for itemset in freq_itemsets.keys()]
    
    # Start with itemsets of length 2 and increment k
    k = 2
    while freq_itemsets_list:
        # Generate candidate itemsets of length k
        candidates = generate_candidates(freq_itemsets_list, k)
        
        # Break the loop if no candidates are generated
        if not candidates:
            break
        
        # Scan the dataset for counts of candidate itemsets
        itemset_counts = scan_dataset(dataset, candidates)
        
        # Filter itemsets based on minimum support and update the frequent itemsets
        freq_itemsets.update(filter_itemsets(itemset_counts, min_support, total_transactions))
        
        # Update the list of frequent itemsets of length k
        freq_itemsets_list = [itemset for itemset in freq_itemsets.keys() if len(itemset) == k]
        
        # Increment k to process itemsets of length k+1
        k += 1
    
    return freq_itemsets

def main():
    # Sample dataset: each transaction is represented as a set of items
    dataset = [
        {'milk', 'bread', 'butter'},
        {'milk', 'bread'},
        {'milk', 'butter'},
        {'bread', 'butter'},
        {'milk', 'bread', 'butter', 'eggs'},
        {'bread', 'eggs'}
    ]
    
    # Set the minimum support threshold
    min_support = 0.5
    
    # Run the Apriori algorithm to find frequent itemsets
    freq_itemsets = apriori(dataset, min_support)
    
    # Print the frequent itemsets and their counts
    print("Frequent Itemsets:")
    for itemset, count in freq_itemsets.items():
        print(f"{set(itemset)}: {count}")

if __name__ == "__main__":
    main()


Frequent Itemsets:
{'milk'}: 4
{'bread'}: 5
{'butter'}: 4
{'milk', 'bread'}: 3
{'butter', 'bread'}: 3
{'milk', 'butter'}: 3
