In [1]:
import pandas as pd
from itertools import combinations

# Step 1: Load the dataset
file_path = r'C:\Users\User\Downloads\Association Rules\Association Rules\Online retail.xlsx'  
data = pd.read_excel(file_path, header=None)  # Assuming no header in the dataset

# Step 2: Data Preprocessing
# Split transactions into lists
transactions = data[0].dropna().apply(lambda x: x.split(','))

# Step 3: Generate frequent itemsets
def get_itemsets(transactions, length):
    """Generate all itemsets of a given length from transactions."""
    itemsets = []
    for transaction in transactions:
        itemsets.extend(combinations(sorted(set(transaction)), length))
    return pd.Series(itemsets).value_counts()

def apriori(transactions, min_support):
    """Generate frequent itemsets using the Apriori algorithm."""
    # Convert transactions to list of lists
    transactions = transactions.tolist()

    # Generate frequent itemsets
    frequent_itemsets = []
    k = 1
    while True:
        # Get itemsets of length k
        itemsets = get_itemsets(transactions, k)

        # Calculate support
        itemsets = itemsets[itemsets / len(transactions) >= min_support]

        # Stop if no itemsets meet the minimum support
        if itemsets.empty:
            break

        # Add to frequent itemsets
        frequent_itemsets.extend([(list(item), support / len(transactions)) for item, support in itemsets.items()])
        k += 1

    # Convert to DataFrame
    return pd.DataFrame(frequent_itemsets, columns=['itemset', 'support'])

# Set the minimum support threshold
min_support = 0.01
frequent_itemsets = apriori(transactions, min_support)

print("Frequent Itemsets:")
print(frequent_itemsets)

# Step 4: Generate association rules
def generate_rules(frequent_itemsets, min_confidence):
    """Generate association rules from frequent itemsets."""
    rules = []
    for _, row in frequent_itemsets.iterrows():
        itemset = row['itemset']
        itemset_support = row['support']

        if len(itemset) > 1:
            # Generate all possible combinations of antecedent and consequent
            for i in range(1, len(itemset)):
                antecedents = list(combinations(itemset, i))
                for antecedent in antecedents:
                    consequent = list(set(itemset) - set(antecedent))
                    if len(consequent) > 0:
                        antecedent_support = frequent_itemsets.loc[
                            frequent_itemsets['itemset'].apply(set) == set(antecedent), 'support'
                        ].values[0]
                        confidence = itemset_support / antecedent_support
                        if confidence >= min_confidence:
                            rules.append({
                                'antecedent': antecedent,
                                'consequent': consequent,
                                'support': itemset_support,
                                'confidence': confidence,
                                'lift': confidence / frequent_itemsets.loc[
                                    frequent_itemsets['itemset'].apply(set) == set(consequent), 'support'
                                ].values[0]
                            })
    return pd.DataFrame(rules)

# Set the minimum confidence threshold
min_confidence = 0.5
rules = generate_rules(frequent_itemsets, min_confidence)

print("\nAssociation Rules:")
print(rules)

# Step 5: Analysis
# Save results to files
frequent_itemsets.to_csv('frequent_itemsets_manual.csv', index=False)
rules.to_csv('association_rules_manual.csv', index=False)

print("\nAnalysis complete. Results saved as 'frequent_itemsets_manual.csv' and 'association_rules_manual.csv'.")


Frequent Itemsets:
                                      itemset   support
0                             [mineral water]  0.238368
1                                      [eggs]  0.179709
2                                 [spaghetti]  0.174110
3                              [french fries]  0.170911
4                                 [chocolate]  0.163845
..                                        ...       ...
252              [chocolate, milk, spaghetti]  0.010932
253              [chocolate, eggs, spaghetti]  0.010532
254     [mineral water, olive oil, spaghetti]  0.010265
255        [eggs, ground beef, mineral water]  0.010132
256  [french fries, mineral water, spaghetti]  0.010132

[257 rows x 2 columns]

Association Rules:
            antecedent       consequent   support  confidence      lift
0  (ground beef, milk)  [mineral water]  0.011065    0.503030  2.110308
1  (eggs, ground beef)  [mineral water]  0.010132    0.506667  2.125563

Analysis complete. Results saved as 'frequent_it