## Mini Assignment: Association Rule Mining with Simulated Data

### Main Tasks
- Simulating transactions
- Performing Apriori
- Generating and interpreting association rules
- Submitting on GitHub

##### Libraries

In [None]:
# !pip install mlxtend

In [7]:
# Import Libraries
import random
import pandas as pd
# Apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

#### Step 1: Simulate Transaction Data

In [4]:
# Pool of items
items = ['Bread', 'Milk', 'Eggs', 'Butter', 'Cheese', 'Cereal', 'Juice', 'Apples']

# Generate 10 fake transactions (each with 2–5 items)
transactions = []
for _ in range(10):
    transaction = random.sample(items, random.randint(2, 5))
    transactions.append(transaction)

# Show transactions
for i, t in enumerate(transactions, 1):
    print(f"Transaction {i}: {t}")


Transaction 1: ['Bread', 'Apples']
Transaction 2: ['Butter', 'Apples']
Transaction 3: ['Butter', 'Juice', 'Milk', 'Cereal', 'Apples']
Transaction 4: ['Milk', 'Juice', 'Butter']
Transaction 5: ['Milk', 'Butter', 'Cheese', 'Cereal']
Transaction 6: ['Cheese', 'Milk', 'Butter']
Transaction 7: ['Cheese', 'Bread']
Transaction 8: ['Juice', 'Butter', 'Bread']
Transaction 9: ['Cereal', 'Milk']
Transaction 10: ['Apples', 'Juice', 'Eggs']


### Step 2: Analyze with Apriori

In [5]:
# One-hot encode
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori with min support = 0.3
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)
print(frequent_itemsets)


   support         itemsets
0      0.4         (Apples)
1      0.3          (Bread)
2      0.6         (Butter)
3      0.3         (Cereal)
4      0.3         (Cheese)
5      0.4          (Juice)
6      0.5           (Milk)
7      0.3  (Butter, Juice)
8      0.4   (Milk, Butter)
9      0.3   (Cereal, Milk)


#### Step 3: Generate Association Rules

In [8]:
# Generate rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# Show 2 rules
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(2))

# Explain one rule
example_rule = rules.iloc[0]
print(f"\nRule explained:")
print(f"If someone buys {list(example_rule['antecedents'])}, they are likely to also buy {list(example_rule['consequents'])}.")


  antecedents consequents  support  confidence      lift
0     (Juice)    (Butter)      0.3        0.75  1.250000
1      (Milk)    (Butter)      0.4        0.80  1.333333

Rule explained:
If someone buys ['Juice'], they are likely to also buy ['Butter'].
