In [1]:
import pandas as pd
from itertools import combinations

# Step 1: Load Dataset

In [2]:
df = pd.read_csv('market.csv')
transactions = df['Items'].apply(lambda x: x.split(','))

In [3]:
print("🧾 Sample Transactions:")
for i, t in enumerate(transactions[:5], 1):
    print(f"Transaction {i}: {t}")

🧾 Sample Transactions:
Transaction 1: ['Milk', 'Bread', 'Butter']
Transaction 2: ['Milk', 'Bread', 'Jam']
Transaction 3: ['Bread', 'Butter', 'Cheese']
Transaction 4: ['Milk', 'Bread', 'Butter', 'Eggs']
Transaction 5: ['Bread', 'Butter', 'Cheese', 'Eggs']


# Step 2: Define Minimum Support and Confidence

In [4]:
min_support = 0.3
min_confidence = 0.6

# Step 3: Generate All Possible Items

In [5]:
all_items = sorted({item for trans in transactions for item in trans})


# Step 4: Function to Calculate Support

In [6]:
def calculate_support(itemset):
    count = sum(1 for trans in transactions if itemset.issubset(set(trans)))
    return count / len(transactions)


# Step 5: Generate Frequent Itemsets

In [7]:
def apriori(transactions, min_support):
    freq_itemsets = []
    k = 1
    current_itemsets = [frozenset([item]) for item in all_items]

    while current_itemsets:
        valid_itemsets = []
        for itemset in current_itemsets:
            support = calculate_support(itemset)
            if support >= min_support:
                valid_itemsets.append((itemset, support))
                print(f"Frequent Itemset: {set(itemset)}, Support: {round(support,2)}")
        freq_itemsets.extend(valid_itemsets)

        # Generate next-level candidates
        next_items = []
        for i in range(len(valid_itemsets)):
            for j in range(i+1, len(valid_itemsets)):
                union = valid_itemsets[i][0] | valid_itemsets[j][0]
                if len(union) == k + 1 and union not in next_items:
                    next_items.append(union)
        current_itemsets = next_items
        k += 1

    return freq_itemsets

# Step 6: Run Apriori

In [8]:
print("\nFrequent Itemsets (Support ≥ 0.3):")
frequent_itemsets = apriori(transactions, min_support)


Frequent Itemsets (Support ≥ 0.3):
Frequent Itemset: {'Bread'}, Support: 0.95
Frequent Itemset: {'Butter'}, Support: 0.8
Frequent Itemset: {'Cheese'}, Support: 0.3
Frequent Itemset: {'Eggs'}, Support: 0.4
Frequent Itemset: {'Jam'}, Support: 0.3
Frequent Itemset: {'Milk'}, Support: 0.65
Frequent Itemset: {'Butter', 'Bread'}, Support: 0.8
Frequent Itemset: {'Bread', 'Cheese'}, Support: 0.3
Frequent Itemset: {'Eggs', 'Bread'}, Support: 0.35
Frequent Itemset: {'Jam', 'Bread'}, Support: 0.3
Frequent Itemset: {'Bread', 'Milk'}, Support: 0.6
Frequent Itemset: {'Butter', 'Cheese'}, Support: 0.3
Frequent Itemset: {'Butter', 'Milk'}, Support: 0.45
Frequent Itemset: {'Butter', 'Bread', 'Cheese'}, Support: 0.3
Frequent Itemset: {'Butter', 'Bread', 'Milk'}, Support: 0.45



# Step 7: Generate Association Rules

In [9]:
print("\n Association Rules (Confidence ≥ 0.6):")
rules = []
for itemset, support in frequent_itemsets:
    if len(itemset) > 1:
        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                antecedent = set(antecedent)
                consequent = itemset - antecedent
                if consequent:
                    support_X = calculate_support(antecedent)
                    support_XY = calculate_support(itemset)
                    confidence = support_XY / support_X
                    lift = confidence / calculate_support(consequent)
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, support_XY, confidence, lift))
                        print(f"Rule: {antecedent} → {consequent} | "
                              f"Support: {round(support_XY,2)} | "
                              f"Confidence: {round(confidence,2)} | "
                              f"Lift: {round(lift,2)}")


 Association Rules (Confidence ≥ 0.6):
Rule: {'Butter'} → frozenset({'Bread'}) | Support: 0.8 | Confidence: 1.0 | Lift: 1.05
Rule: {'Bread'} → frozenset({'Butter'}) | Support: 0.8 | Confidence: 0.84 | Lift: 1.05
Rule: {'Cheese'} → frozenset({'Bread'}) | Support: 0.3 | Confidence: 1.0 | Lift: 1.05
Rule: {'Eggs'} → frozenset({'Bread'}) | Support: 0.35 | Confidence: 0.87 | Lift: 0.92
Rule: {'Jam'} → frozenset({'Bread'}) | Support: 0.3 | Confidence: 1.0 | Lift: 1.05
Rule: {'Bread'} → frozenset({'Milk'}) | Support: 0.6 | Confidence: 0.63 | Lift: 0.97
Rule: {'Milk'} → frozenset({'Bread'}) | Support: 0.6 | Confidence: 0.92 | Lift: 0.97
Rule: {'Cheese'} → frozenset({'Butter'}) | Support: 0.3 | Confidence: 1.0 | Lift: 1.25
Rule: {'Milk'} → frozenset({'Butter'}) | Support: 0.45 | Confidence: 0.69 | Lift: 0.87
Rule: {'Cheese'} → frozenset({'Bread', 'Butter'}) | Support: 0.3 | Confidence: 1.0 | Lift: 1.25
Rule: {'Butter', 'Cheese'} → frozenset({'Bread'}) | Support: 0.3 | Confidence: 1.0 | Lift: 1

# Step 8: Identify the Strongest Rule And Conclusion

In [12]:
if rules:
    strongest_rule = max(rules, key=lambda x: x[4])  # highest lift
    print("\nStrongest Association Rule:")
    print(f"{strongest_rule[0]} → {strongest_rule[1]}")
    print(f"Support: {round(strongest_rule[2], 2)}, "
          f"Confidence: {round(strongest_rule[3], 2)}, "
          f"Lift: {round(strongest_rule[4], 2)}")

    print("\nConclusion:")
    print(f"The pair {strongest_rule[0]} → {strongest_rule[1]} is the strongest association rule.")
    print("It indicates that when customers buy", list(strongest_rule[0])[0],
          "they are highly likely to buy", list(strongest_rule[1])[0], "together.")
else:
    print("\nNo strong rules found with the given thresholds.")



Strongest Association Rule:
{'Cheese'} → frozenset({'Butter'})
Support: 0.3, Confidence: 1.0, Lift: 1.25

Conclusion:
The pair {'Cheese'} → frozenset({'Butter'}) is the strongest association rule.
It indicates that when customers buy Cheese they are highly likely to buy Butter together.
