In [27]:
import pandas as pd
import itertools
from collections import defaultdict
import time
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth

datasets = {
    1: 'amazon_items.csv',
    2: 'bestbuy_items.csv',
    3: 'kmart_items.csv',
    4: 'target_items.csv',
    5: 'walmart_items.csv'
}

print("Available Datasets:")
for key, value in datasets.items():
    print(f"{key}: {value}")

dataset_choice = int(input("Enter the dataset number you want to load (1-5): "))

if dataset_choice in datasets:
    dataset_path = datasets[dataset_choice]
    df = pd.read_csv(dataset_path)
    print(f"\nLoaded dataset from: {dataset_path}")
    print(df.head())

    transactions = df['Items'].apply(lambda x: x.split(', ')).tolist()
    all_items = sorted(set(item for transaction in transactions for item in transaction))

    print("Transactions:", transactions)
    print("Number of Transactions:", len(transactions))
    print("All Unique Items:", all_items)
    print("Number of Unique Items:", len(all_items))
else:
    print("Invalid choice. Please select a number between 1 and 5.")

min_support = float(input("Enter the minimum support value: "))
min_confidence = float(input("Enter the minimum confidence value: "))

transaction_df = pd.DataFrame([{item: (item in transaction) for item in all_items} for transaction in transactions])


  and should_run_async(code)


Available Datasets:
1: /content/amazon_items - Sheet1.csv
2: /content/bestbuy_items - Sheet1.csv
3: /content/kmart_items - Sheet1 (1).csv
4: /content/target_items - Sheet1 (1).csv
5: /content/walmart_items - Sheet1.csv
Enter the dataset number you want to load (1-5): 1

Loaded dataset from: /content/amazon_items - Sheet1.csv
   TransactionID                                      Items
0              1  Kindle, Cable, Stand, Smartwatch, Headset
1              2       Fire, Kindle, Echo, Charger, Speaker
2              3    Smartwatch, Stand, Cable, Charger, Fire
3              4      Echo, Kindle, Fire, Smartwatch, Cable
4              5       Laptop, Charger, Smartwatch, Headset
Transactions: [['Kindle', 'Cable', 'Stand', 'Smartwatch', 'Headset'], ['Fire', 'Kindle', 'Echo', 'Charger', 'Speaker'], ['Smartwatch', 'Stand', 'Cable', 'Charger', 'Fire'], ['Echo', 'Kindle', 'Fire', 'Smartwatch', 'Cable'], ['Laptop', 'Charger', 'Smartwatch', 'Headset'], ['Kindle', 'Cable', 'Speaker', 'Echo', 'S

In [28]:
def generate_candidates(itemset, length):
    """Generate all combinations of itemsets of a given length."""
    return list(itertools.combinations(itemset, length))

def calculate_support(itemset, transactions):
    """Calculate support of an itemset."""
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    return count / len(transactions)

def generate_frequent_itemsets(transactions, all_items, min_support):

    frequent_itemsets = []
    k = 1
    while True:
        candidate_itemsets = generate_candidates(all_items, k)
        current_frequent_itemsets = []


        for itemset in candidate_itemsets:
            support = calculate_support(itemset, transactions)
            if support >= min_support:
                current_frequent_itemsets.append((itemset, support))

        if not current_frequent_itemsets:
            break


        frequent_itemsets.extend(current_frequent_itemsets)
        k += 1

    return frequent_itemsets


start_time = time.time()
frequent_itemsets = generate_frequent_itemsets(transactions, all_items, min_support)
end_time = time.time()
bf_time = end_time - start_time

print("\nFrequent Itemsets:")
for itemset, support in frequent_itemsets:
    print(f"Itemset: {itemset}, Support: {support}")



Frequent Itemsets:
Itemset: ('Cable',), Support: 0.52
Itemset: ('Charger',), Support: 0.44
Itemset: ('Echo',), Support: 0.48
Itemset: ('Fire',), Support: 0.48
Itemset: ('Headset',), Support: 0.32
Itemset: ('Kindle',), Support: 0.48
Itemset: ('Laptop',), Support: 0.2
Itemset: ('Smartwatch',), Support: 0.64
Itemset: ('Speaker',), Support: 0.4
Itemset: ('Stand',), Support: 0.32
Itemset: ('Cable', 'Charger'), Support: 0.2
Itemset: ('Cable', 'Echo'), Support: 0.2
Itemset: ('Cable', 'Fire'), Support: 0.28
Itemset: ('Cable', 'Headset'), Support: 0.2
Itemset: ('Cable', 'Kindle'), Support: 0.28
Itemset: ('Cable', 'Smartwatch'), Support: 0.32
Itemset: ('Charger', 'Fire'), Support: 0.28
Itemset: ('Charger', 'Smartwatch'), Support: 0.28
Itemset: ('Echo', 'Fire'), Support: 0.2
Itemset: ('Echo', 'Kindle'), Support: 0.28
Itemset: ('Echo', 'Smartwatch'), Support: 0.32
Itemset: ('Echo', 'Speaker'), Support: 0.24
Itemset: ('Fire', 'Smartwatch'), Support: 0.32
Itemset: ('Fire', 'Speaker'), Support: 0.2


  and should_run_async(code)


In [29]:
def generate_association_rules(frequent_itemsets, min_confidence):
    rules = []
    for itemset, support in frequent_itemsets:
        if len(itemset) < 2:
            continue
        for i in range(1, len(itemset)):
            antecedents = list(itertools.combinations(itemset, i))
            for antecedent in antecedents:
                consequent = tuple(item for item in itemset if item not in antecedent)
                antecedent_support = calculate_support(antecedent, transactions)
                if antecedent_support > 0:
                    confidence = support / antecedent_support
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, support, confidence))
    return rules

association_rules_bf = generate_association_rules(frequent_itemsets, min_confidence)
print("\nAssociation Rules (Brute Force):")
for antecedent, consequent, support, confidence in association_rules_bf:
    print(f"Rule: {antecedent} -> {consequent}, Support: {support}, Confidence: {confidence}")





Association Rules (Brute Force):
Rule: ('Cable',) -> ('Fire',), Support: 0.28, Confidence: 0.5384615384615385
Rule: ('Fire',) -> ('Cable',), Support: 0.28, Confidence: 0.5833333333333334
Rule: ('Headset',) -> ('Cable',), Support: 0.2, Confidence: 0.625
Rule: ('Cable',) -> ('Kindle',), Support: 0.28, Confidence: 0.5384615384615385
Rule: ('Kindle',) -> ('Cable',), Support: 0.28, Confidence: 0.5833333333333334
Rule: ('Cable',) -> ('Smartwatch',), Support: 0.32, Confidence: 0.6153846153846154
Rule: ('Smartwatch',) -> ('Cable',), Support: 0.32, Confidence: 0.5
Rule: ('Charger',) -> ('Fire',), Support: 0.28, Confidence: 0.6363636363636365
Rule: ('Fire',) -> ('Charger',), Support: 0.28, Confidence: 0.5833333333333334
Rule: ('Charger',) -> ('Smartwatch',), Support: 0.28, Confidence: 0.6363636363636365
Rule: ('Echo',) -> ('Kindle',), Support: 0.28, Confidence: 0.5833333333333334
Rule: ('Kindle',) -> ('Echo',), Support: 0.28, Confidence: 0.5833333333333334
Rule: ('Echo',) -> ('Smartwatch',), Su

  and should_run_async(code)


In [30]:
# Apriori
start_time = time.time()
frequent_itemsets_apriori = apriori(transaction_df, min_support=min_support, use_colnames=True)
end_time = time.time()
apriori_time = end_time - start_time

association_rules_apriori = association_rules(frequent_itemsets_apriori, metric="confidence", min_threshold=min_confidence)

print("\nFrequent Itemsets (Apriori):")
print(frequent_itemsets_apriori)
print("\nAssociation Rules (Apriori):")
for _, row in association_rules_apriori.iterrows():
    print(f"Rule: {tuple(row['antecedents'])} -> {tuple(row['consequents'])}, Support: {row['support']:.4f}, Confidence: {row['confidence']:.4f}")



Frequent Itemsets (Apriori):
    support                     itemsets
0      0.52                      (Cable)
1      0.44                    (Charger)
2      0.48                       (Echo)
3      0.48                       (Fire)
4      0.32                    (Headset)
5      0.48                     (Kindle)
6      0.20                     (Laptop)
7      0.64                 (Smartwatch)
8      0.40                    (Speaker)
9      0.32                      (Stand)
10     0.20             (Charger, Cable)
11     0.20                (Echo, Cable)
12     0.28                (Cable, Fire)
13     0.20             (Headset, Cable)
14     0.28              (Kindle, Cable)
15     0.32          (Smartwatch, Cable)
16     0.28              (Charger, Fire)
17     0.28        (Charger, Smartwatch)
18     0.20                 (Echo, Fire)
19     0.28               (Kindle, Echo)
20     0.32           (Smartwatch, Echo)
21     0.24              (Speaker, Echo)
22     0.32           (Smar

  and should_run_async(code)


In [31]:
# FP-Growth
start_time = time.time()
frequent_itemsets_fpgrowth = fpgrowth(transaction_df, min_support=min_support, use_colnames=True)
end_time = time.time()
fpgrowth_time = end_time - start_time

association_rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, metric="confidence", min_threshold=min_confidence)

print("\nFrequent Itemsets (FP-Growth):")
print(frequent_itemsets_fpgrowth)
print("\nAssociation Rules (FP-Growth):")
for _, row in association_rules_fpgrowth.iterrows():
    print(f"Rule: {tuple(row['antecedents'])} -> {tuple(row['consequents'])}, Support: {row['support']:.4f}, Confidence: {row['confidence']:.4f}")




Frequent Itemsets (FP-Growth):
    support                     itemsets
0      0.64                 (Smartwatch)
1      0.52                      (Cable)
2      0.48                     (Kindle)
3      0.32                      (Stand)
4      0.32                    (Headset)
5      0.48                       (Fire)
6      0.48                       (Echo)
7      0.44                    (Charger)
8      0.40                    (Speaker)
9      0.20                     (Laptop)
10     0.32          (Smartwatch, Cable)
11     0.28              (Kindle, Cable)
12     0.24         (Smartwatch, Kindle)
13     0.24          (Smartwatch, Stand)
14     0.28            (Kindle, Headset)
15     0.20             (Headset, Cable)
16     0.20     (Kindle, Headset, Cable)
17     0.32           (Smartwatch, Fire)
18     0.28                (Cable, Fire)
19     0.20    (Smartwatch, Cable, Fire)
20     0.28               (Kindle, Echo)
21     0.20                 (Echo, Fire)
22     0.32           (Sm

  and should_run_async(code)


In [32]:
# Performance Comparison
print("\nPerformance Comparison:")
print(f"Brute Force Time: {bf_time:.4f} seconds")
print(f"Apriori Time: {apriori_time:.4f} seconds")
print(f"FP-Growth Time: {fpgrowth_time:.4f} seconds")


fastest_algorithm = min([('Brute Force', bf_time), ('Apriori', apriori_time), ('FP-Growth', fpgrowth_time)], key=lambda x: x[1])
print(f"The fastest algorithm is {fastest_algorithm[0]} with a time of {fastest_algorithm[1]:.4f} seconds.")



Performance Comparison:
Brute Force Time: 0.0082 seconds
Apriori Time: 0.0133 seconds
FP-Growth Time: 0.0063 seconds
The fastest algorithm is FP-Growth with a time of 0.0063 seconds.


  and should_run_async(code)


In [33]:
# Compare the number of frequent itemsets generated by each algorithm
print("\nNumber of Frequent Itemsets Generated:")
print(f"Brute Force: {len(frequent_itemsets)}")
print(f"Apriori: {len(frequent_itemsets_apriori)}")
print(f"FP-Growth: {len(frequent_itemsets_fpgrowth)}")

# Algorithm with Most Itemsets
most_itemsets = max([('Brute Force', len(frequent_itemsets)), ('Apriori', len(frequent_itemsets_apriori)), ('FP-Growth', len(frequent_itemsets_fpgrowth))], key=lambda x: x[1])
print(f"The algorithm that generated the most frequent itemsets is {most_itemsets[0]} with {most_itemsets[1]} itemsets.")



Number of Frequent Itemsets Generated:
Brute Force: 31
Apriori: 31
FP-Growth: 31
The algorithm that generated the most frequent itemsets is Brute Force with 31 itemsets.


  and should_run_async(code)
