In [None]:
"""
Apriori Algorithm - Simple Basic Implementation
===============================================
Line by line explanation in English
"""

# ============================================================================
# STEP 1: Create Simple Transaction Data
# ============================================================================
print("="*60)
print("APRIORI ALGORITHM - SIMPLE IMPLEMENTATION")
print("="*60)

# These are grocery store transactions
# Each list represents one customer's shopping basket
transactions = [
    ['Bread', 'Milk', 'Beer'],           # Transaction 1
    ['Bread', 'Butter', 'Beer'],         # Transaction 2
    ['Milk', 'Butter', 'Coke'],          # Transaction 3
    ['Bread', 'Milk', 'Butter', 'Beer'], # Transaction 4
    ['Bread', 'Milk', 'Coke'],           # Transaction 5
    ['Milk', 'Butter', 'Beer'],          # Transaction 6
    ['Bread', 'Butter'],                 # Transaction 7
    ['Bread', 'Milk', 'Butter', 'Coke'], # Transaction 8
    ['Bread', 'Butter', 'Beer'],         # Transaction 9
    ['Milk', 'Butter']                   # Transaction 10
]

print(f"\nTotal Transactions: {len(transactions)}")
print("\nSample Transactions:")
for i in range(3):
    print(f"Transaction {i+1}: {transactions[i]}")

# ============================================================================
# STEP 2: Function to Calculate Support
# ============================================================================
print("\n" + "="*60)
print("SUPPORT CALCULATION")
print("="*60)

def calculate_support(transactions, itemset):
    """
    Support = Number of transactions containing itemset / Total transactions
    
    Example: 
    If 'Bread' appears in 7 out of 10 transactions
    Support = 7/10 = 0.7 = 70%
    """
    count = 0  # Initialize counter
    
    # Check each transaction
    for transaction in transactions:
        # Check if all items in itemset are present in transaction
        all_items_present = True
        for item in itemset:
            if item not in transaction:
                all_items_present = False
                break
        
        # If all items are present, increment count
        if all_items_present:
            count += 1
    
    # Calculate support
    support = count / len(transactions)
    return support

# Example: Calculate support for Bread
bread_support = calculate_support(transactions, ['Bread'])
print(f"\nExample: Support of 'Bread' = {bread_support:.2f} ({bread_support*100:.0f}%)")

milk_support = calculate_support(transactions, ['Milk'])
print(f"Example: Support of 'Milk' = {milk_support:.2f} ({milk_support*100:.0f}%)")

# ============================================================================
# STEP 3: Find Frequent 1-itemsets (Single Items)
# ============================================================================
print("\n" + "="*60)
print("STEP 3: FINDING FREQUENT 1-ITEMSETS")
print("="*60)

# Set minimum support threshold (must appear in 2 or more transactions)
min_support = 0.2  # 20% means 2 out of 10 transactions
print(f"Minimum Support Threshold: {min_support*100:.0f}%")

# Extract all unique items
all_items = set()  # Use set to avoid duplicates
for transaction in transactions:
    for item in transaction:
        all_items.add(item)

print(f"\nAll Unique Items: {sorted(all_items)}")

# Calculate support for each item
print(f"\nCalculating support for each item...")
frequent_1_itemsets = {}  # Store frequent items here

for item in all_items:
    support = calculate_support(transactions, [item])
    print(f"  {item:10s} -> Support: {support:.2f}")
    
    # If support >= min_support, add to frequent itemsets
    if support >= min_support:
        frequent_1_itemsets[frozenset([item])] = support

print(f"\nFrequent 1-itemsets: {len(frequent_1_itemsets)}")
for itemset, support in frequent_1_itemsets.items():
    items = list(itemset)
    print(f"  {{{items[0]}}} -> Support: {support:.2f}")

# ============================================================================
# STEP 4: Find Frequent 2-itemsets (Pairs)
# ============================================================================
print("\n" + "="*60)
print("STEP 4: FINDING FREQUENT 2-ITEMSETS (PAIRS)")
print("="*60)

# Create pairs from frequent 1-itemsets
frequent_items_list = [list(itemset)[0] for itemset in frequent_1_itemsets.keys()]

print(f"Creating pairs from: {frequent_items_list}")

# Generate all possible pairs
candidate_2_itemsets = []
for i in range(len(frequent_items_list)):
    for j in range(i+1, len(frequent_items_list)):
        pair = [frequent_items_list[i], frequent_items_list[j]]
        candidate_2_itemsets.append(pair)

print(f"\nGenerated {len(candidate_2_itemsets)} candidate pairs")

# Calculate support for each pair
print(f"\nCalculating support for each pair...")
frequent_2_itemsets = {}

for itemset in candidate_2_itemsets:
    support = calculate_support(transactions, itemset)
    itemset_str = f"{{{itemset[0]}, {itemset[1]}}}"
    print(f"  {itemset_str:25s} -> Support: {support:.2f}")
    
    # If support >= min_support, add to frequent itemsets
    if support >= min_support:
        frequent_2_itemsets[frozenset(itemset)] = support

print(f"\nFrequent 2-itemsets: {len(frequent_2_itemsets)}")
for itemset, support in frequent_2_itemsets.items():
    items = sorted(list(itemset))
    print(f"  {{{items[0]}, {items[1]}}} -> Support: {support:.2f}")

# ============================================================================
# STEP 5: Find Frequent 3-itemsets (Triplets)
# ============================================================================
print("\n" + "="*60)
print("STEP 5: FINDING FREQUENT 3-ITEMSETS (TRIPLETS)")
print("="*60)

# Create triplets from frequent 2-itemsets
candidate_3_itemsets = []
frequent_2_list = [list(itemset) for itemset in frequent_2_itemsets.keys()]

# Merge pairs to create triplets
for i in range(len(frequent_2_list)):
    for j in range(i+1, len(frequent_2_list)):
        # Find union
        union = list(set(frequent_2_list[i] + frequent_2_list[j]))
        # If exactly 3 items, it's a candidate
        if len(union) == 3:
            union.sort()  # Sort for consistency
            if union not in candidate_3_itemsets:
                candidate_3_itemsets.append(union)

print(f"Generated {len(candidate_3_itemsets)} candidate triplets")

# Calculate support for each triplet
print(f"\nCalculating support for each triplet...")
frequent_3_itemsets = {}

for itemset in candidate_3_itemsets:
    support = calculate_support(transactions, itemset)
    itemset_str = f"{{{itemset[0]}, {itemset[1]}, {itemset[2]}}}"
    print(f"  {itemset_str:35s} -> Support: {support:.2f}")
    
    # If support >= min_support, add to frequent itemsets
    if support >= min_support:
        frequent_3_itemsets[frozenset(itemset)] = support

print(f"\nFrequent 3-itemsets: {len(frequent_3_itemsets)}")
if frequent_3_itemsets:
    for itemset, support in frequent_3_itemsets.items():
        items = sorted(list(itemset))
        print(f"  {{{items[0]}, {items[1]}, {items[2]}}} -> Support: {support:.2f}")
else:
    print("  No frequent 3-itemsets found!")

# ============================================================================
# STEP 6: Generate Association Rules
# ============================================================================
print("\n" + "="*60)
print("STEP 6: GENERATING ASSOCIATION RULES")
print("="*60)

min_confidence = 0.5  # 50% confidence threshold
print(f"Minimum Confidence: {min_confidence*100:.0f}%")

# Generate rules from 2-itemsets
print(f"\nRules from 2-itemsets:")
print("-" * 60)

for itemset, support_AB in frequent_2_itemsets.items():
    items = list(itemset)
    
    # Rule 1: items[0] -> items[1]
    support_A = frequent_1_itemsets[frozenset([items[0]])]
    confidence = support_AB / support_A
    
    if confidence >= min_confidence:
        # Calculate lift
        support_B = frequent_1_itemsets[frozenset([items[1]])]
        lift = support_AB / (support_A * support_B)
        
        print(f"\nRule: {items[0]} ‚Üí {items[1]}")
        print(f"  Support:    {support_AB:.2f}")
        print(f"  Confidence: {confidence:.2f} ({confidence*100:.0f}%)")
        print(f"  Lift:       {lift:.2f}")
        print(f"  Meaning: {confidence*100:.0f}% of people who buy {items[0]} also buy {items[1]}")
    
    # Rule 2: items[1] -> items[0]
    support_B = frequent_1_itemsets[frozenset([items[1]])]
    confidence = support_AB / support_B
    
    if confidence >= min_confidence:
        # Calculate lift
        support_A = frequent_1_itemsets[frozenset([items[0]])]
        lift = support_AB / (support_A * support_B)
        
        print(f"\nRule: {items[1]} ‚Üí {items[0]}")
        print(f"  Support:    {support_AB:.2f}")
        print(f"  Confidence: {confidence:.2f} ({confidence*100:.0f}%)")
        print(f"  Lift:       {lift:.2f}")
        print(f"  Meaning: {confidence*100:.0f}% of people who buy {items[1]} also buy {items[0]}")

# ============================================================================
# STEP 7: Summary & Visualization (Text-based)
# ============================================================================
print("\n" + "="*60)
print("STEP 7: SUMMARY & VISUALIZATION")
print("="*60)

print("\nüìä FREQUENT ITEMSETS SUMMARY:")
print("-" * 60)
print(f"1-itemsets (Single Items): {len(frequent_1_itemsets)}")
print(f"2-itemsets (Pairs):        {len(frequent_2_itemsets)}")
print(f"3-itemsets (Triplets):     {len(frequent_3_itemsets)}")

print("\nüî• TOP FREQUENT ITEMSETS:")
print("-" * 60)

# Combine all frequent itemsets
all_frequent = {}
all_frequent.update(frequent_1_itemsets)
all_frequent.update(frequent_2_itemsets)
all_frequent.update(frequent_3_itemsets)

# Sort by support
sorted_itemsets = sorted(all_frequent.items(), key=lambda x: x[1], reverse=True)

for itemset, support in sorted_itemsets[:5]:
    items = sorted(list(itemset))
    items_str = ', '.join(items)
    bar = '‚ñà' * int(support * 50)  # Visual bar
    print(f"{{{items_str:30s}}} Support: {support:.2f} {bar}")

# ============================================================================
# STEP 8: Key Concepts Explanation
# ============================================================================
print("\n" + "="*60)
print("STEP 8: KEY CONCEPTS - SIMPLE EXPLANATION")
print("="*60)

print("""
üîë IMPORTANT CONCEPTS:

1Ô∏è‚É£ SUPPORT:
   Definition: How frequently an itemset appears
   Formula: Count(Itemset) / Total Transactions
   Example: If 'Bread' appears in 7 out of 10 transactions
            Support = 7/10 = 0.7 = 70%

2Ô∏è‚É£ CONFIDENCE:
   Definition: How reliable is the rule
   Formula: Support(A+B) / Support(A)
   Example: {Bread} ‚Üí {Butter}
            If 5 people buy Bread
            And 4 of them also buy Butter
            Confidence = 4/5 = 0.8 = 80%

3Ô∏è‚É£ LIFT:
   Definition: How strong is the association between A and B
   Formula: Support(A+B) / (Support(A) √ó Support(B))
   Lift = 1: No association (random)
   Lift > 1: Positive association (bought together)
   Lift < 1: Negative association (substitutes)
   
   Example: Lift = 2 means "A and B occur together 2x more than random"

üí° BUSINESS USE:
- Lift > 1.5: Create bundle offers
- Lift > 2.0: Place products near each other
- Lift > 3.0: Strong recommendation

üìù ALGORITHM STEPS:
Step 1: Calculate support for all single items
Step 2: Create pairs from frequent items
Step 3: Create triplets from frequent pairs
Step 4: Continue until no more frequent itemsets
Step 5: Generate association rules

‚úÖ WHEN TO USE:
‚Ä¢ Understanding customer buying patterns
‚Ä¢ Product recommendations
‚Ä¢ Store layout design
‚Ä¢ Creating bundle offers
‚Ä¢ Market basket analysis

‚ùå WHEN NOT TO USE:
‚Ä¢ Small datasets (< 100 transactions)
‚Ä¢ Continuous numerical data
‚Ä¢ Real-time requirements
‚Ä¢ When interpretability is not important
""")

print("\n" + "="*60)
print("APRIORI ALGORITHM COMPLETE! üéâ")
print("="*60)