<a href="https://colab.research.google.com/github/abdgazalaa241992-cmyk/lab/blob/main/IUG_Practical_Assignment_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Given the dataset below, perform the required task for each question and display the output.



In [1]:
transactions = [
    ['A', 'B'],   # T1
    ['A'],        # T2
    ['B'],        # T3
    ['A', 'B'],   # T4
    []            # T5 (neither A nor B)
]
transactions

[['A', 'B'], ['A'], ['B'], ['A', 'B'], []]

# Q1- Perform Apirioi algorithm to get the frequent itemsets given min support = 3 and min conf 80%.

In [3]:
# Apriori implementation using mlxtend

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder # converts a list of transactions (list of item strings) into a binary matrix (one-hot encoding).
from mlxtend.frequent_patterns import apriori, association_rules

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

# Apriori (support as fraction)
frequent_itemsets = apriori(
    df,
    min_support=0.60,   # min support count = 3/5=0.6
    use_colnames=True
)

# Generate association rules
rules = association_rules(
    frequent_itemsets,
    metric="confidence",
    min_threshold=0.80   #min conf 80%
)

print("Frequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules[['antecedents','consequents','support','confidence','lift']])

Frequent Itemsets:
   support itemsets
0      0.6      (A)
1      0.6      (B)

Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, support, confidence, lift]
Index: []


# Q2- Perform FP-Growth algorithm to get the frequent itemsets given min support = 3 and min conf 80%.

In [None]:
# FP-Growth / FP-Tree using mlxtend

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Transactions from the image
# Note: T5 has O repeated in the image -> in market-basket mining we count it once per transaction.

# One-hot encode transactions
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

# Given: min support count = 3
# We have 5 transactions => min_support fraction = 3/5 = 0.6
min_support = 3 / len(transactions)  # 0.6

# FP-Growth to get frequent itemsets
freq_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)
freq_itemsets = freq_itemsets.sort_values(["support", "itemsets"], ascending=[False, True]).reset_index(drop=True)

print("=== Frequent Itemsets (FP-Growth) ===")
print(freq_itemsets)

#  generate association rules
min_conf = 0.8  #min conf =80%
rules = association_rules(freq_itemsets, metric="confidence", min_threshold=min_conf)
rules = rules.sort_values(["confidence", "support", "lift"], ascending=[False, False, False]).reset_index(drop=True)

print(f"\n=== Association Rules (confidence >= {min_conf}) ===")
print(rules[["antecedents", "consequents", "support", "confidence", "lift"]])


# Q3- Perform ECLAT algorithm to get the frequent itemsets given min support = 3 and min conf 80%.

In [4]:
from pyECLAT import ECLAT
df=pd.DataFrame(transactions)                #convert to panda data type
df

Unnamed: 0,0,1
0,A,B
1,A,
2,B,
3,A,B
4,,


In [5]:
eclat_instance = ECLAT(data=df, verbose=True) #verbose=True to see the loading bar
eclat_instance

100%|██████████| 3/3 [00:00<00:00, 1896.44it/s]
100%|██████████| 3/3 [00:00<?, ?it/s]
100%|██████████| 3/3 [00:00<00:00, 2973.98it/s]


<pyECLAT.pyECLAT.ECLAT at 0x1cf6034d040>

In [8]:
get_ECLAT_indexes, get_ECLAT_supports = eclat_instance.fit(min_support=0.6,
                                                           min_combination=1,
                                                           max_combination=3,
                                                           separator=' & ',
                                                           verbose=True)

Combination 1 by 1


2it [00:00, 168.35it/s]


Combination 2 by 2


1it [00:00, 170.78it/s]


Combination 3 by 3


0it [00:00, ?it/s]


In [9]:
get_ECLAT_supports

{'B': 0.6, 'A': 0.6}

# Q4- Perform Apirioi algorithm to get the frequent itemsets given min support = 3 and min conf 80%.

In [None]:
#repeated as Q1

# Q5- Returns a dict with:
      - counts
      - support
      - confidence (X->Y, Y->X)
      - lift (X->Y)
      - interest (same as lift via alternative formula)
      - contingency table counts (a,b,c,d)
      - h-confidence

In [10]:
def association_metrics(transactions, X, Y):
    n = len(transactions)

    # Contingency table counts
    a = b = c = d = 0
    for t in transactions:
        t = set(t)
        if X in t and Y in t:
            a += 1
        elif X in t and Y not in t:
            b += 1
        elif X not in t and Y in t:
            c += 1
        else:
            d += 1

    # Counts
    count_X = a + b
    count_Y = a + c
    count_XY = a

    # Supports
    support_X = count_X / n
    support_Y = count_Y / n
    support_XY = count_XY / n

    # Confidence
    conf_X_to_Y = support_XY / support_X if support_X != 0 else 0
    conf_Y_to_X = support_XY / support_Y if support_Y != 0 else 0

    # Lift
    lift_XY = support_XY / (support_X * support_Y) if support_X * support_Y != 0 else 0

    # Interest (alternative lift formula)
    interest = conf_X_to_Y / support_Y if support_Y != 0 else 0

    # h-confidence
    h_confidence = min(conf_X_to_Y, conf_Y_to_X)

    return {
        "counts": {
            "X": count_X,
            "Y": count_Y,
            "X_and_Y": count_XY
        },
        "support": {
            "X": support_X,
            "Y": support_Y,
            "X_and_Y": support_XY
        },
        "confidence": {
            "X->Y": conf_X_to_Y,
            "Y->X": conf_Y_to_X
        },
        "lift (X->Y)": lift_XY,
        "interest": interest,
        "contingency_table": {
            "a (X,Y)": a,
            "b (X,¬Y)": b,
            "c (¬X,Y)": c,
            "d (¬X,¬Y)": d
        },
        "h-confidence": h_confidence
    }

result = association_metrics(transactions, 'A', 'B')

for k, v in result.items():
    print(k, ":", v)


counts : {'X': 3, 'Y': 3, 'X_and_Y': 2}
support : {'X': 0.6, 'Y': 0.6, 'X_and_Y': 0.4}
confidence : {'X->Y': 0.6666666666666667, 'Y->X': 0.6666666666666667}
lift (X->Y) : 1.1111111111111112
interest : 1.1111111111111114
contingency_table : {'a (X,Y)': 2, 'b (X,¬Y)': 1, 'c (¬X,Y)': 1, 'd (¬X,¬Y)': 1}
h-confidence : 0.6666666666666667
