In [2]:
import itertools

# Sample dataset
transactions = [
    ["Star Wars", "Marvel", "DC Comics"],
    ["Star Wars", "Star Trek"],
    ["Marvel", "DC Comics"],
    ["Star Wars", "Marvel"],
    ["Star Trek", "DC Comics"],
    ["Star Wars", "DC Comics"],
    ["Star Trek", "Star Wars"],
    ["DC Comics", "Marvel"],
    ["Star Wars", "Star Trek", "Marvel"],
    ["DC Comics"]
]


# Function to generate candidate itemsets of length k
def generate_candidates(itemsets, length):
    return set(itertools.combinations(itemsets, length))

# Function to filter out itemsets that meet the minimum support threshold
def filter_frequent_itemsets(transactions, candidates, min_support):
    # Step 1: Create a dictionary to count occurrences of each candidate itemset
    itemset_count = {}
    
    # Step 2: Loop through each transaction in the transactions list
    for transaction in transactions:
        # Step 3: Check each candidate itemset
        for candidate in candidates:
            # Step 4: If the candidate is part of the transaction, increase its count
            if set(candidate).issubset(transaction):
                if candidate in itemset_count:
                    itemset_count[candidate] += 1
                else:
                    itemset_count[candidate] = 1

    # Step 5: Calculate the total number of transactions
    num_transactions = len(transactions)
    
    # Step 6: Create a dictionary for itemsets that meet the minimum support
    frequent_itemsets = {}
    for itemset, count in itemset_count.items():
        support = count / num_transactions
        if support >= min_support:
            frequent_itemsets[itemset] = support
    
    # Step 7: Return the frequent itemsets
    return frequent_itemsets


# Function to generate association rules from frequent itemsets
def generate_association_rules(frequent_itemsets, min_confidence):
    rules = []
    for itemset in frequent_itemsets:
        for i in range(1, len(itemset)):
            for subset in itertools.combinations(itemset, i):
                antecedent = set(subset)
                consequent = set(itemset) - antecedent
                if consequent:
                    support_antecedent = frequent_itemsets.get(tuple(sorted(antecedent)), 0)
                    support_itemset = frequent_itemsets[itemset]
                    confidence = support_itemset / support_antecedent if support_antecedent > 0 else 0
                    if confidence >= min_confidence:
                        lift = confidence / (frequent_itemsets.get(tuple(sorted(consequent)), 0) / len(transactions))
                        rules.append((antecedent, consequent, confidence, lift))
    return rules

# Step 1: Generate all itemsets
itemsets = set(item for transaction in transactions for item in transaction)

# Step 2: Initialize
min_support = 0.3
min_confidence = 0.6
frequent_itemsets = {}
k = 1

# Step 3: Generate frequent itemsets
while True:
    candidates = generate_candidates(itemsets, k)
    frequent_itemsets_k = filter_frequent_itemsets(transactions, candidates, min_support)
    if not frequent_itemsets_k:
        break
    frequent_itemsets.update(frequent_itemsets_k)
    k += 1

# Step 4: Generate association rules
rules = generate_association_rules(frequent_itemsets, min_confidence)

# Output results
print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"Itemset: {itemset}, Support: {support:.2f}")

print("\nAssociation Rules:")
for antecedent, consequent, confidence, lift in rules:
    print(f"Rule: {antecedent} -> {consequent}, Confidence: {confidence:.2f}, Lift: {lift:.2f}")


Frequent Itemsets:
Itemset: ('Star Wars',), Support: 0.60
Itemset: ('DC Comics',), Support: 0.60
Itemset: ('Marvel',), Support: 0.50
Itemset: ('Star Trek',), Support: 0.40
Itemset: ('Marvel', 'DC Comics'), Support: 0.30
Itemset: ('Marvel', 'Star Wars'), Support: 0.30
Itemset: ('Star Wars', 'Star Trek'), Support: 0.30

Association Rules:
Rule: {'Marvel'} -> {'DC Comics'}, Confidence: 0.60, Lift: 10.00
Rule: {'Marvel'} -> {'Star Wars'}, Confidence: 0.60, Lift: 10.00
Rule: {'Star Trek'} -> {'Star Wars'}, Confidence: 0.75, Lift: 12.50
