<a href="https://colab.research.google.com/github/jeeswan/Data-Warehousing-and-Data-Mining-/blob/main/DW_Lab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlxtend



In [None]:
# Question 1a: Find frequently occurring itemsets using Apriori algorithm

from itertools import combinations

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]  # Skip header
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def count_support(itemset, transactions):
    """Count how many transactions contain the itemset"""
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    return count

def apriori_frequent_itemsets(transactions, min_support=0.1):
    """Find frequent itemsets using Apriori"""
    total_transactions = len(transactions)
    min_count = min_support * total_transactions
    frequent_itemsets = []

    # Get all unique items
    all_items = set()
    for transaction in transactions:
        all_items.update(transaction)

    # Find frequent 1-itemsets
    frequent_1 = []
    for item in all_items:
        count = count_support([item], transactions)
        if count >= min_count:
            frequent_1.append([item])
            frequent_itemsets.append(([item], count, count/total_transactions))

    # Generate larger itemsets
    k = 2
    current_frequent = frequent_1

    while current_frequent:
        # Generate candidates
        candidates = []
        for i in range(len(current_frequent)):
            for j in range(i+1, len(current_frequent)):
                # Combine itemsets
                combined = list(set(current_frequent[i] + current_frequent[j]))
                if len(combined) == k:
                    candidates.append(sorted(combined))

        # Remove duplicates
        candidates = [list(x) for x in set(tuple(x) for x in candidates)]

        # Check support for candidates
        next_frequent = []
        for candidate in candidates:
            count = count_support(candidate, transactions)
            if count >= min_count:
                next_frequent.append(candidate)
                frequent_itemsets.append((candidate, count, count/total_transactions))

        current_frequent = next_frequent
        k += 1

    return frequent_itemsets

# Test with both files
print("=== APRIORI FREQUENT ITEMSETS ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 30)

    try:
        transactions = load_data(filename)
        frequent_items = apriori_frequent_itemsets(transactions)

        print(f"Total transactions: {len(transactions)}")
        print(f"Frequent itemsets found: {len(frequent_items)}\n")

        for itemset, count, support in frequent_items:
            print(f"  {itemset}")
            print(f"    Count: {count}, Support: {support:.3f}")

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*50 + "\n")

=== APRIORI FREQUENT ITEMSETS ===

File: sports.txt
------------------------------
Total transactions: 50
Frequent itemsets found: 20

  ['water bottle']
    Count: 14, Support: 0.280
  ['gloves']
    Count: 18, Support: 0.360
  ['cricket bat']
    Count: 20, Support: 0.400
  ['ice cream']
    Count: 13, Support: 0.260
  ['football']
    Count: 22, Support: 0.440
  ['juice']
    Count: 21, Support: 0.420
  ['cricket ball']
    Count: 18, Support: 0.360
  ['gloves', 'juice']
    Count: 5, Support: 0.100
  ['cricket ball', 'cricket bat']
    Count: 7, Support: 0.140
  ['cricket ball', 'gloves']
    Count: 7, Support: 0.140
  ['cricket ball', 'juice']
    Count: 6, Support: 0.120
  ['cricket ball', 'football']
    Count: 5, Support: 0.100
  ['football', 'gloves']
    Count: 7, Support: 0.140
  ['football', 'juice']
    Count: 7, Support: 0.140
  ['football', 'ice cream']
    Count: 5, Support: 0.100
  ['cricket bat', 'gloves']
    Count: 6, Support: 0.120
  ['cricket bat', 'juice']
    Co

In [None]:
# Question 1b: Compute the support of frequent itemsets

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]  # Skip header
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def calculate_support(itemset, transactions):
    """Calculate support for an itemset"""
    # Count transactions containing the itemset
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1

    # Support = count / total transactions
    total = len(transactions)
    support = count / total if total > 0 else 0

    return count, support

def demo_support_calculation(transactions):
    """Demonstrate support calculation with examples"""
    print("Support Calculation Examples:")
    print("-" * 40)

    # Example itemsets to calculate support for
    example_itemsets = [
        ['Swimming'],
        ['Baseball', 'Football'],
        ['Tennis', 'Basketball', 'Baseball']
    ]

    for itemset in example_itemsets:
        count, support = calculate_support(itemset, transactions)
        print(f"Itemset: {itemset}")
        print(f"  Appears in {count} out of {len(transactions)} transactions")
        print(f"  Support = {count}/{len(transactions)} = {support:.3f}\n")

# Test with both files
print("=== SUPPORT CALCULATION ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 30)

    try:
        transactions = load_data(filename)
        print(f"Total transactions: {len(transactions)}\n")

        demo_support_calculation(transactions)

        # Show a few actual transactions for context
        print("Sample transactions:")
        for i, trans in enumerate(transactions[:3]):
            print(f"  Transaction {i+1}: {trans}")

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*50 + "\n")

=== SUPPORT CALCULATION ===

File: sports.txt
------------------------------
Total transactions: 50

Support Calculation Examples:
----------------------------------------
Itemset: ['Swimming']
  Appears in 0 out of 50 transactions
  Support = 0/50 = 0.000

Itemset: ['Baseball', 'Football']
  Appears in 0 out of 50 transactions
  Support = 0/50 = 0.000

Itemset: ['Tennis', 'Basketball', 'Baseball']
  Appears in 0 out of 50 transactions
  Support = 0/50 = 0.000

Sample transactions:
  Transaction 1: ['football', 'cricket ball', 'gloves']
  Transaction 2: ['cricket bat', 'cricket ball', 'juice']
  Transaction 3: ['football', 'water bottle', 'juice']


File: space.txt
------------------------------
Total transactions: 50

Support Calculation Examples:
----------------------------------------
Itemset: ['Swimming']
  Appears in 0 out of 50 transactions
  Support = 0/50 = 0.000

Itemset: ['Baseball', 'Football']
  Appears in 0 out of 50 transactions
  Support = 0/50 = 0.000

Itemset: ['Tenni

In [None]:
# Question 1c: Compute confidence and lift of association rules

from itertools import combinations

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]  # Skip header
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def calculate_support(itemset, transactions):
    """Calculate support for an itemset"""
    count = sum(1 for trans in transactions if all(item in trans for item in itemset))
    return count / len(transactions)

def generate_association_rules(itemset, transactions, min_confidence=0.3):
    """Generate association rules from a frequent itemset"""
    rules = []

    # Generate all possible splits of the itemset
    for i in range(1, len(itemset)):
        for antecedent in combinations(itemset, i):
            consequent = [item for item in itemset if item not in antecedent]

            # Calculate metrics
            support_itemset = calculate_support(itemset, transactions)
            support_antecedent = calculate_support(list(antecedent), transactions)
            support_consequent = calculate_support(consequent, transactions)

            # Confidence = support(A ∪ B) / support(A)
            confidence = support_itemset / support_antecedent if support_antecedent > 0 else 0

            # Lift = confidence / support(B)
            lift = confidence / support_consequent if support_consequent > 0 else 0

            if confidence >= min_confidence:
                rules.append({
                    'antecedent': list(antecedent),
                    'consequent': consequent,
                    'support': support_itemset,
                    'confidence': confidence,
                    'lift': lift
                })

    return rules

def find_frequent_itemsets_simple(transactions, min_support=0.1):
    """Simple version to find frequent itemsets"""
    all_items = set()
    for trans in transactions:
        all_items.update(trans)

    frequent = []

    # Check 2-itemsets and 3-itemsets
    for size in [2, 3]:
        for itemset in combinations(all_items, size):
            support = calculate_support(list(itemset), transactions)
            if support >= min_support:
                frequent.append(list(itemset))

    return frequent

# Test with both files
print("=== ASSOCIATION RULES (CONFIDENCE & LIFT) ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 40)

    try:
        transactions = load_data(filename)
        frequent_itemsets = find_frequent_itemsets_simple(transactions)

        print(f"Total transactions: {len(transactions)}")
        print(f"Frequent itemsets: {len(frequent_itemsets)}\n")

        all_rules = []
        for itemset in frequent_itemsets:
            rules = generate_association_rules(itemset, transactions)  # min_confidence already set
            all_rules.extend(rules)

        print("Association Rules:")
        print("-" * 30)

        for i, rule in enumerate(all_rules[:5], 1):  # Show top 5 rules
            print(f"Rule {i}: {rule['antecedent']} → {rule['consequent']}")
            print(f"  Support: {rule['support']:.3f}")
            print(f"  Confidence: {rule['confidence']:.3f}")
            print(f"  Lift: {rule['lift']:.3f}")

            # Explanation
            if rule['lift'] > 1:
                print(f"  → Items are positively associated (lift > 1)")
            elif rule['lift'] < 1:
                print(f"  → Items are negatively associated (lift < 1)")
            else:
                print(f"  → Items are independent (lift = 1)")
            print()

        if not all_rules:
            print("  No rules found with minimum confidence threshold")

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*60 + "\n")

=== ASSOCIATION RULES (CONFIDENCE & LIFT) ===

File: sports.txt
----------------------------------------
Total transactions: 50
Frequent itemsets: 13

Association Rules:
------------------------------
Rule 1: ['water bottle'] → ['football']
  Support: 0.100
  Confidence: 0.357
  Lift: 0.812
  → Items are negatively associated (lift < 1)

Rule 2: ['gloves'] → ['cricket bat']
  Support: 0.120
  Confidence: 0.333
  Lift: 0.833
  → Items are negatively associated (lift < 1)

Rule 3: ['cricket bat'] → ['gloves']
  Support: 0.120
  Confidence: 0.300
  Lift: 0.833
  → Items are negatively associated (lift < 1)

Rule 4: ['gloves'] → ['football']
  Support: 0.140
  Confidence: 0.389
  Lift: 0.884
  → Items are negatively associated (lift < 1)

Rule 5: ['football'] → ['gloves']
  Support: 0.140
  Confidence: 0.318
  Lift: 0.884
  → Items are negatively associated (lift < 1)



File: space.txt
----------------------------------------
Total transactions: 50
Frequent itemsets: 7

Association Rules:

In [None]:
# Question 2a: Find frequently occurring itemsets using FP-Growth algorithm

from collections import defaultdict, Counter

class FPNode:
    """Node in FP-Tree"""
    def __init__(self, item, count=0, parent=None):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.next_node = None

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def build_fp_tree(transactions, min_support=0.1):
    """Build FP-Tree from transactions"""
    min_count = len(transactions) * min_support

    # Count item frequencies
    item_counts = Counter()
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    # Keep only frequent items
    frequent_items = {item: count for item, count in item_counts.items()
                     if count >= min_count}

    # Create root node
    root = FPNode("root")
    header_table = defaultdict(list)

    # Insert each transaction into tree
    for transaction in transactions:
        # Filter and sort transaction by frequency (descending)
        filtered_trans = [item for item in transaction if item in frequent_items]
        filtered_trans.sort(key=lambda x: frequent_items[x], reverse=True)

        if filtered_trans:
            insert_transaction(filtered_trans, root, header_table)

    return root, header_table, frequent_items

def insert_transaction(transaction, node, header_table):
    """Insert a transaction into the FP-Tree"""
    if not transaction:
        return

    item = transaction[0]

    # If child exists, increment count
    if item in node.children:
        node.children[item].count += 1
    else:
        # Create new child node
        new_node = FPNode(item, 1, node)
        node.children[item] = new_node
        header_table[item].append(new_node)

    # Recursively insert remaining items
    insert_transaction(transaction[1:], node.children[item], header_table)

def mine_fp_tree(header_table, min_support, transactions_count, prefix=[]):
    """Mine frequent itemsets from FP-Tree"""
    frequent_itemsets = []
    min_count = transactions_count * min_support

    # Sort items by frequency (ascending)
    items = [(item, len(nodes)) for item, nodes in header_table.items()]
    items.sort(key=lambda x: x[1])

    for item, _ in items:
        # Create new frequent itemset
        new_itemset = prefix + [item]

        # Calculate support
        total_count = sum(node.count for node in header_table[item])
        support = total_count / transactions_count

        if total_count >= min_count:
            frequent_itemsets.append((new_itemset, total_count, support))

            # Generate conditional pattern base
            conditional_patterns = []
            for node in header_table[item]:
                path = []
                current = node.parent
                while current.item != "root":
                    path.append(current.item)
                    current = current.parent

                if path:
                    # Add pattern multiple times based on node count
                    for _ in range(node.count):
                        conditional_patterns.append(path[::-1])  # Reverse path

            # Recursively mine conditional patterns
            if conditional_patterns:
                cond_root, cond_header, _ = build_fp_tree(conditional_patterns, min_support)
                if cond_header:
                    sub_itemsets = mine_fp_tree(cond_header, min_support,
                                               len(conditional_patterns), new_itemset)
                    frequent_itemsets.extend(sub_itemsets)

    return frequent_itemsets

def fp_growth(transactions, min_support=0.1):
    """FP-Growth algorithm main function"""
    root, header_table, frequent_items = build_fp_tree(transactions, min_support)
    return mine_fp_tree(header_table, min_support, len(transactions))

# Test with both files
print("=== FP-GROWTH FREQUENT ITEMSETS ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 30)

    try:
        transactions = load_data(filename)
        frequent_itemsets = fp_growth(transactions)

        print(f"Total transactions: {len(transactions)}")
        print(f"Frequent itemsets found: {len(frequent_itemsets)}\n")

        for itemset, count, support in frequent_itemsets:
            print(f"  {itemset}")
            print(f"    Count: {count}, Support: {support:.3f}")

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*50 + "\n")

=== FP-GROWTH FREQUENT ITEMSETS ===

File: sports.txt
------------------------------
Total transactions: 50
Frequent itemsets found: 49

  ['football']
    Count: 22, Support: 0.440
  ['juice']
    Count: 21, Support: 0.420
  ['juice', 'football']
    Count: 7, Support: 1.000
  ['cricket bat']
    Count: 20, Support: 0.400
  ['cricket bat', 'juice']
    Count: 7, Support: 0.636
  ['cricket bat', 'football']
    Count: 6, Support: 0.545
  ['cricket bat', 'football', 'juice']
    Count: 2, Support: 1.000
  ['cricket ball']
    Count: 18, Support: 0.360
  ['cricket ball', 'cricket bat']
    Count: 7, Support: 0.467
  ['cricket ball', 'football']
    Count: 5, Support: 0.333
  ['cricket ball', 'football', 'cricket bat']
    Count: 1, Support: 1.000
  ['cricket ball', 'juice']
    Count: 6, Support: 0.400
  ['cricket ball', 'juice', 'cricket bat']
    Count: 2, Support: 1.000
  ['gloves']
    Count: 18, Support: 0.360
  ['gloves', 'football']
    Count: 7, Support: 0.412
  ['gloves', 'crick

In [None]:
# Question 2b: Compute the support of frequent itemsets (FP-Growth)

from collections import Counter

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]  # Skip header
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def calculate_support_fpgrowth(itemset, transactions):
    """Calculate support for an itemset using FP-Growth approach"""
    # Count transactions containing the itemset
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1

    # Support = count / total transactions
    total = len(transactions)
    support = count / total if total > 0 else 0

    return count, support

def demonstrate_support_calculation(transactions):
    """Show how support is calculated in FP-Growth"""
    print("FP-Growth Support Calculation:")
    print("-" * 40)

    # First, show item frequencies (like FP-Growth does)
    item_counts = Counter()
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    print("Individual item frequencies:")
    for item, count in item_counts.most_common(5):
        support = count / len(transactions)
        print(f"  {item}: {count} times, Support = {support:.3f}")

    print("\nCombined itemset support examples:")

    # Example itemsets
    if len(item_counts) >= 2:
        top_items = list(item_counts.most_common(3))

        # 2-itemset example
        if len(top_items) >= 2:
            itemset = [top_items[0][0], top_items[1][0]]
            count, support = calculate_support_fpgrowth(itemset, transactions)
            print(f"  {itemset}: {count} times, Support = {support:.3f}")

        # 3-itemset example if possible
        if len(top_items) >= 3:
            itemset = [top_items[0][0], top_items[1][0], top_items[2][0]]
            count, support = calculate_support_fpgrowth(itemset, transactions)
            print(f"  {itemset}: {count} times, Support = {support:.3f}")

def show_fp_growth_process(transactions, min_support=0.1):
    """Show the FP-Growth process step by step"""
    print("\nFP-Growth Process:")
    print("-" * 30)

    # Step 1: Count frequencies
    item_counts = Counter()
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    min_count = len(transactions) * min_support
    print(f"Minimum support count: {min_count:.1f}")

    # Step 2: Filter frequent items
    frequent_items = {item: count for item, count in item_counts.items()
                     if count >= min_count}

    print(f"Frequent items: {len(frequent_items)}")
    for item, count in sorted(frequent_items.items(), key=lambda x: x[1], reverse=True):
        support = count / len(transactions)
        print(f"  {item}: count={count}, support={support:.3f}")

    # Step 3: Show ordered transactions
    print(f"\nSample ordered transactions (top 3):")
    for i, transaction in enumerate(transactions[:3]):
        # Filter and sort by frequency
        filtered = [item for item in transaction if item in frequent_items]
        filtered.sort(key=lambda x: frequent_items[x], reverse=True)
        print(f"  Transaction {i+1}: {filtered}")

# Test with both files
print("=== FP-GROWTH SUPPORT CALCULATION ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 40)

    try:
        transactions = load_data(filename)
        print(f"Total transactions: {len(transactions)}\n")

        demonstrate_support_calculation(transactions)
        show_fp_growth_process(transactions)

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*60 + "\n")

=== FP-GROWTH SUPPORT CALCULATION ===

File: sports.txt
----------------------------------------
Total transactions: 50

FP-Growth Support Calculation:
----------------------------------------
Individual item frequencies:
  football: 22 times, Support = 0.440
  juice: 21 times, Support = 0.420
  cricket bat: 20 times, Support = 0.400
  cricket ball: 18 times, Support = 0.360
  gloves: 18 times, Support = 0.360

Combined itemset support examples:
  ['football', 'juice']: 7 times, Support = 0.140
  ['football', 'juice', 'cricket bat']: 2 times, Support = 0.040

FP-Growth Process:
------------------------------
Minimum support count: 5.0
Frequent items: 7
  football: count=22, support=0.440
  juice: count=21, support=0.420
  cricket bat: count=20, support=0.400
  cricket ball: count=18, support=0.360
  gloves: count=18, support=0.360
  water bottle: count=14, support=0.280
  ice cream: count=13, support=0.260

Sample ordered transactions (top 3):
  Transaction 1: ['football', 'cricket bal

In [None]:
# Question 2c: Compute confidence and lift of association rules (FP-Growth)

from collections import defaultdict, Counter
from itertools import combinations

def load_data(filename):
    """Load transactions from file"""
    with open(filename, 'r') as f:
        lines = f.readlines()[1:]  # Skip header
        return [line.strip().split(',')[1:] for line in lines if line.strip()]

def calculate_support(itemset, transactions):
    """Calculate support for an itemset"""
    count = sum(1 for trans in transactions if all(item in trans for item in itemset))
    return count / len(transactions)

def simple_fpgrowth_frequent(transactions, min_support=0.1):
    """Simplified FP-Growth to find frequent itemsets"""
    # Count item frequencies
    item_counts = Counter()
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    min_count = len(transactions) * min_support
    frequent_items = [item for item, count in item_counts.items() if count >= min_count]

    frequent_itemsets = []

    # Add single items
    for item in frequent_items:
        support = calculate_support([item], transactions)
        frequent_itemsets.append(([item], support))

    # Add 2-itemsets
    for itemset in combinations(frequent_items, 2):
        support = calculate_support(list(itemset), transactions)
        if support >= min_support:
            frequent_itemsets.append((list(itemset), support))

    # Add 3-itemsets
    for itemset in combinations(frequent_items, 3):
        support = calculate_support(list(itemset), transactions)
        if support >= min_support:
            frequent_itemsets.append((list(itemset), support))

    return frequent_itemsets

def generate_association_rules_fpgrowth(frequent_itemsets, transactions, min_confidence=0.3):
    """Generate association rules from frequent itemsets found by FP-Growth"""
    rules = []

    # Only consider itemsets with 2+ items
    for itemset, itemset_support in frequent_itemsets:
        if len(itemset) < 2:
            continue

        # Generate all possible antecedent -> consequent combinations
        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                consequent = [item for item in itemset if item not in antecedent]

                # Calculate support for antecedent
                antecedent_support = calculate_support(list(antecedent), transactions)
                consequent_support = calculate_support(consequent, transactions)

                if antecedent_support > 0 and consequent_support > 0:
                    # Confidence = support(itemset) / support(antecedent)
                    confidence = itemset_support / antecedent_support

                    # Lift = confidence / support(consequent)
                    lift = confidence / consequent_support

                    if confidence >= min_confidence:
                        rules.append({
                            'antecedent': list(antecedent),
                            'consequent': consequent,
                            'support': itemset_support,
                            'confidence': confidence,
                            'lift': lift
                        })

    return rules

def explain_metrics():
    """Explain what confidence and lift mean"""
    print("Association Rule Metrics Explanation:")
    print("-" * 40)
    print("• Support: How often the itemset appears")
    print("• Confidence: How often consequent appears when antecedent appears")
    print("• Lift: How much more likely consequent is when we have antecedent")
    print("  - Lift > 1: Positive association")
    print("  - Lift < 1: Negative association")
    print("  - Lift = 1: No association")
    print()

# Test with both files
print("=== FP-GROWTH ASSOCIATION RULES ===\n")

for filename in ['sports.txt', 'space.txt']:
    print(f"File: {filename}")
    print("-" * 40)

    try:
        transactions = load_data(filename)
        print(f"Total transactions: {len(transactions)}\n")

        # Find frequent itemsets using simplified FP-Growth approach
        frequent_itemsets = simple_fpgrowth_frequent(transactions)
        print(f"Frequent itemsets found: {len(frequent_itemsets)}")

        # Show some frequent itemsets
        print("\nFrequent itemsets:")
        for itemset, support in frequent_itemsets[:5]:
            print(f"  {itemset} (support: {support:.3f})")

        # Generate association rules
        rules = generate_association_rules_fpgrowth(frequent_itemsets, transactions)

        print(f"\nAssociation Rules (min_confidence = 0.5):")
        print("-" * 50)

        if rules:
            explain_metrics()

            for i, rule in enumerate(rules[:5], 1):
                print(f"Rule {i}: {rule['antecedent']} → {rule['consequent']}")
                print(f"  Support: {rule['support']:.3f}")
                print(f"  Confidence: {rule['confidence']:.3f}")
                print(f"  Lift: {rule['lift']:.3f}")

                # Interpretation
                if rule['lift'] > 1.1:
                    print(f"  → Strong positive association!")
                elif rule['lift'] > 1:
                    print(f"  → Positive association")
                elif rule['lift'] < 0.9:
                    print(f"  → Negative association")
                else:
                    print(f"  → Weak/no association")
                print()
        else:
            print("  No rules found meeting minimum confidence threshold")

    except FileNotFoundError:
        print(f"  File {filename} not found!")

    print("\n" + "="*60 + "\n")

=== FP-GROWTH ASSOCIATION RULES ===

File: sports.txt
----------------------------------------
Total transactions: 50

Frequent itemsets found: 20

Frequent itemsets:
  ['football'] (support: 0.440)
  ['cricket ball'] (support: 0.360)
  ['gloves'] (support: 0.360)
  ['cricket bat'] (support: 0.400)
  ['juice'] (support: 0.420)

Association Rules (min_confidence = 0.5):
--------------------------------------------------
Association Rule Metrics Explanation:
----------------------------------------
• Support: How often the itemset appears
• Confidence: How often consequent appears when antecedent appears
• Lift: How much more likely consequent is when we have antecedent
  - Lift > 1: Positive association
  - Lift < 1: Negative association
  - Lift = 1: No association

Rule 1: ['football'] → ['gloves']
  Support: 0.140
  Confidence: 0.318
  Lift: 0.884
  → Negative association

Rule 2: ['gloves'] → ['football']
  Support: 0.140
  Confidence: 0.389
  Lift: 0.884
  → Negative association

R