In [22]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import warnings

# Filter out DeprecationWarnings specifically from jupyter_client
# warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client")

In [23]:
 #1: Define the dataset
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]
print("Q1. Dataset Transactions:\n")
for i, t in enumerate(dataset, start=1):
    print(f"Transaction {i}: {t}")

Q1. Dataset Transactions:

Transaction 1: ['Coffee', 'Donut', 'Sandwich']
Transaction 2: ['Coffee', 'Donut']
Transaction 3: ['Coffee', 'Sandwich']
Transaction 4: ['Coffee', 'Muffin']
Transaction 5: ['Donut', 'Muffin']


In [14]:
#Q2: One-Hot Encode the dataset into DataFrame
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_array, columns=te.columns_)

print("\n\nQ2. One-Hot Encoded DataFrame:\n")
print(df)
print("\n(Each row = transaction; Each column = item; True/False shows presence)")



Q2. One-Hot Encoded DataFrame:

   Coffee  Donut  Muffin  Sandwich
0    True   True   False      True
1    True   True   False     False
2    True  False   False      True
3    True  False    True     False
4   False   True    True     False

(Each row = transaction; Each column = item; True/False shows presence)


In [15]:
# Q3: Find frequent itemsets using Apriori (min_support = 0.4)
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
frequent_itemsets['support'] = frequent_itemsets['support'].round(2)

print("\n\nQ3. Frequent Itemsets (min_support = 0.4):\n")
print(frequent_itemsets)



Q3. Frequent Itemsets (min_support = 0.4):

   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)


In [16]:
# Q4: Generate all association rules from frequent itemsets
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.0)
rules['support'] = rules['support'].round(2)
rules['confidence'] = rules['confidence'].round(2)
rules['lift'] = rules['lift'].round(2)

print("\n\nQ4. All Association Rules:\n")
print(rules[['antecedents','consequents','support','confidence','lift']])



Q4. All Association Rules:

  antecedents consequents  support  confidence  lift
0    (Coffee)     (Donut)      0.4        0.50  0.83
1     (Donut)    (Coffee)      0.4        0.67  0.83
2    (Coffee)  (Sandwich)      0.4        0.50  1.25
3  (Sandwich)    (Coffee)      0.4        1.00  1.25


In [17]:
# Q5: Filter rules with support >= 0.4 and confidence >= 0.6
strong_rules = rules[(rules['support']>=0.4) & (rules['confidence']>=0.6)]
print("\n\nQ5. Strong Rules (support >= 0.4 and confidence >= 0.6):\n")
print(strong_rules[['antecedents','consequents','support','confidence','lift']])



Q5. Strong Rules (support >= 0.4 and confidence >= 0.6):

  antecedents consequents  support  confidence  lift
1     (Donut)    (Coffee)      0.4        0.67  0.83
3  (Sandwich)    (Coffee)      0.4        1.00  1.25


In [18]:
 #Q6: Interpret one strong rule in words
if not strong_rules.empty:
    rule = strong_rules.iloc[0]
    ant = list(rule['antecedents'])
    cons = list(rule['consequents'])
    print(f"\n\nQ6. Interpretation of one strong rule:\n")
    print(f"If a customer buys {ant}, they are likely to buy {cons} "
          f"with confidence {rule['confidence']} and lift {rule['lift']}.")
else:
    print("\n\nQ6. No strong rules found with given thresholds.")



Q6. Interpretation of one strong rule:

If a customer buys ['Donut'], they are likely to buy ['Coffee'] with confidence 0.67 and lift 0.83.


In [20]:
# Q7: Effect of changing min_support and min_confidence
print("\n\nQ7. Effect of changing thresholds:")
for sup in [0.2, 0.4, 0.6]:
    fi = apriori(df, min_support=sup, use_colnames=True)
    rules_temp = association_rules(fi, metric="confidence", min_threshold=0.5)
    print(f"min_support={sup}: {len(fi)} frequent itemsets, {len(rules_temp)} rules")

print("As min_support increases, fewer itemsets pass the threshold → fewer rules.")
print("As min_confidence increases, fewer rules are considered strong.")



Q7. Effect of changing thresholds:
min_support=0.2: 10 frequent itemsets, 11 rules
min_support=0.4: 6 frequent itemsets, 4 rules
min_support=0.6: 2 frequent itemsets, 0 rules
As min_support increases, fewer itemsets pass the threshold → fewer rules.
As min_confidence increases, fewer rules are considered strong.


In [21]:
# Q8: Why Lift > 1 indicates good association
print("\n\nQ8. Why Lift > 1 means a good rule:")
print("Lift = P(A and B) / (P(A)*P(B)).")
print("If Lift > 1, A and B occur together more often than expected by chance,")
print("which means a positive association between them.")



Q8. Why Lift > 1 means a good rule:
Lift = P(A and B) / (P(A)*P(B)).
If Lift > 1, A and B occur together more often than expected by chance,
which means a positive association between them.
