<a href="https://colab.research.google.com/github/chandanams-stack/TNSIF_AIML_DBIT/blob/main/Aprio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install mlxtend pandas
import warnings

# Filter out deprecation warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)



In [25]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import warnings

# Filter out deprecation warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [14]:
# Step 1: Define the dataset
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


In [15]:
# Step 2: Convert dataset into a DataFrame (one-hot encoded)
df = pd.DataFrame(dataset, columns=['Item1','Item2','Item3'])
print("Original Transactions:")
print(pd.DataFrame(dataset))


Original Transactions:
        0         1         2
0  Coffee     Donut  Sandwich
1  Coffee     Donut      None
2  Coffee  Sandwich      None
3  Coffee    Muffin      None
4   Donut    Muffin      None


In [17]:
# One-hot encoding
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)
print("\nOne-Hot Encoded DataFrame:")
print(df_encoded)


One-Hot Encoded DataFrame:
   Coffee  Donut  Muffin  Sandwich
0    True   True   False      True
1    True   True   False     False
2    True  False   False      True
3    True  False    True     False
4   False   True    True     False


In [18]:
# One-hot encoding
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)
print("\nOne-Hot Encoded DataFrame:")
print(df_encoded)


One-Hot Encoded DataFrame:
   Coffee  Donut  Muffin  Sandwich
0    True   True   False      True
1    True   True   False     False
2    True  False   False      True
3    True  False    True     False
4   False   True    True     False


In [19]:
# Step 3: Generate frequent itemsets with min_support = 0.4
frequent_itemsets = apriori(df_encoded, min_support=0.4, use_colnames=True)
print("\nFrequent Itemsets (support >= 0.4):")
print(frequent_itemsets)



Frequent Itemsets (support >= 0.4):
   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Donut, Coffee)
5      0.4  (Sandwich, Coffee)


In [20]:
# Step 4: Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
print("\nAll Association Rules:")
print(rules[['antecedents','consequents','support','confidence','lift']])


All Association Rules:
  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1    (Coffee)     (Donut)      0.4    0.500000  0.833333
2  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
3    (Coffee)  (Sandwich)      0.4    0.500000  1.250000


In [24]:
# Step 5: Filter rules with support >= 0.4 and confidence >= 0.6
strong_rules = rules[(rules['support'] >= 0.4) & (rules['confidence'] >= 0.6)]
print("\nStrong Rules (support >= 0.4, confidence >= 0.6):")
print(strong_rules[['antecedents','consequents','support','confidence','lift']])



Strong Rules (support >= 0.4, confidence >= 0.6):
  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
2  (Sandwich)    (Coffee)      0.4    1.000000  1.250000


In [23]:
# Step 6: Interpretation Example
if not strong_rules.empty:
    first_rule = strong_rules.iloc[0]
    print(f"\nInterpretation Example: If a customer buys {list(first_rule['antecedents'])}, "
          f"they are likely to also buy {list(first_rule['consequents'])} "
          f"(Confidence: {first_rule['confidence']:.2f}, Lift: {first_rule['lift']:.2f}).")


Interpretation Example: If a customer buys ['Donut'], they are likely to also buy ['Coffee'] (Confidence: 0.67, Lift: 0.83).


In [22]:
# One-hot encoding
te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_data, columns=te.columns_)

# Q7: Effect of changing min_support and min_confidence
print("\n--- Q7: Effect of changing min_support and min_confidence ---")
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
rules1 = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
print("\nRules with support=0.4, confidence=0.6:\n", rules1[['antecedents','consequents','support','confidence','lift']])

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules2 = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)
print("\nRules with support=0.2, confidence=0.4:\n", rules2[['antecedents','consequents','support','confidence','lift']])


--- Q7: Effect of changing min_support and min_confidence ---

Rules with support=0.4, confidence=0.6:
   antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1  (Sandwich)    (Coffee)      0.4    1.000000  1.250000

Rules with support=0.2, confidence=0.4:
            antecedents      consequents  support  confidence      lift
0              (Donut)         (Coffee)      0.4    0.666667  0.833333
1             (Coffee)          (Donut)      0.4    0.500000  0.833333
2             (Muffin)         (Coffee)      0.2    0.500000  0.625000
3           (Sandwich)         (Coffee)      0.4    1.000000  1.250000
4             (Coffee)       (Sandwich)      0.4    0.500000  1.250000
5             (Muffin)          (Donut)      0.2    0.500000  0.833333
6           (Sandwich)          (Donut)      0.2    0.500000  0.833333
7      (Donut, Coffee)       (Sandwich)      0.2    0.500000  1.250000
8    (Sandwich, Donut)         (Coffee)    

In [21]:
# Q8: Show rules and highlight lift > 1
print("\n--- Q8: Why Lift > 1 is good ---")
rules_all = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
print(rules_all[['antecedents','consequents','support','confidence','lift']])

good_rules = rules_all[rules_all['lift'] > 1]
print("\nRules with Lift > 1:\n", good_rules[['antecedents','consequents','support','confidence','lift']])



--- Q8: Why Lift > 1 is good ---
  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1    (Coffee)     (Donut)      0.4    0.500000  0.833333
2  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
3    (Coffee)  (Sandwich)      0.4    0.500000  1.250000

Rules with Lift > 1:
   antecedents consequents  support  confidence  lift
2  (Sandwich)    (Coffee)      0.4         1.0  1.25
3    (Coffee)  (Sandwich)      0.4         0.5  1.25
