In [4]:
import pandas as pd
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import time

In [6]:
import os
import csv
import random

# Define the items available in the supermarket
items = ['Diapers', 'Detergent', 'Shampoo', 'Toothpaste', 'Cereal', 'Milk', 'Bread', 'Eggs', 'Coffee', 'Soap']

# Function to create a dataset of 20 transactions
def create_transactions(num_transactions=20, num_items=10):
    transactions = []
    for _ in range(num_transactions):
        # Each transaction contains a random subset of the items, but with some logic to make it non-random
        num_items_in_transaction = random.randint(2, 5)  # Each transaction will have between 2 and 5 items
        transaction = random.sample(items, num_items_in_transaction)
        transactions.append(transaction)
    return transactions

# Save transactions to CSV file in the 'Downloads' folder
def save_to_csv(transactions, filename):
    # Path to Downloads folder
    downloads_folder = os.path.join(os.path.expanduser('~'), 'Downloads')
    
    # Ensure the file is saved in the Downloads folder
    with open(os.path.join(downloads_folder, filename), mode='w', newline='') as file:
        writer = csv.writer(file)
        for transaction in transactions:
            writer.writerow(transaction)

# Generate and save 5 different datasets, each with 20 transactions
for i in range(5):
    transactions = create_transactions(num_transactions=20)
    save_to_csv(transactions, f'dataset_{i+1}.csv')

print("5 datasets created and saved in the Downloads folder.")


5 datasets created and saved in the Downloads folder.


In [None]:
import os
import csv
import pandas as pd
import time
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder

file_paths = {
    "Amazon": r"C:\Users\DELL\Downloads\Dobariya_Aesha_midtermproj\amazon.csv",
    "BestBuy": r"C:\Users\DELL\Downloads\Dobariya_Aesha_midtermproj\BestBuy.csv",
    "KMart": r"C:\Users\DELL\Downloads\Dobariya_Aesha_midtermproj\Kmart.csv",
    "Nike": r"C:\Users\DELL\Downloads\Dobariya_Aesha_midtermproj\Nike.csv"
}

# Extract transactions from CSV file
def load_transactions(file_path):
    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        transactions = [list(filter(None, row)) for row in reader]  # Filter out empty items in rows
    return transactions

# Applying Brute force method to generate frequent items
def generate_frequent_itemsets(transactions, support_threshold):
    item_count = {}
    for transaction in transactions:
        for item in transaction:
            item_count[item] = item_count.get(item, 0) + 1

    frequent_itemsets = {1: {item: count for item, count in item_count.items() if count / len(transactions) >= support_threshold}}

    k = 2
    while True:
        prev_itemsets = list(frequent_itemsets[k - 1].keys())
        new_itemsets = list(combinations(prev_itemsets, k))
        item_count = {}
        for transaction in transactions:
            transaction_set = set(transaction)
            for itemset in new_itemsets:
                if set(itemset).issubset(transaction_set):
                    item_count[itemset] = item_count.get(itemset, 0) + 1

        frequent_itemsets[k] = {itemset: count for itemset, count in item_count.items() if count / len(transactions) >= support_threshold}
        if not frequent_itemsets[k]:
            del frequent_itemsets[k]
            break
        k += 1
    return frequent_itemsets

# Applying Apriori Algorithm
def apriori_algorithm(transactions, support_threshold, confidence_threshold):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = apriori(df, min_support=support_threshold, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold)

    return frequent_itemsets, rules

# Applying FP-Growth Algorithm
def fpgrowth_algorithm(transactions, support_threshold, confidence_threshold):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = fpgrowth(df, min_support=support_threshold, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold)

    return frequent_itemsets, rules

# Comparing by timing function
def measure_execution_time(algorithm_func, *args):
    start_time = time.time()
    result = algorithm_func(*args)
    end_time = time.time()
    return result, end_time - start_time

# Source code
while True:
    # user-defined entry or exit
    print("\nAvailable databases:")
    for i, name in enumerate(file_paths.keys(), 1):
        print(f"{i}. {name}")
    print("0. Exit")
    
    choice = int(input("Enter the number corresponding to the database you'd like to choose (or 0 to exit): "))

    # Exit the loop if the user chooses 0
    if choice == 0:
        print("Exiting the program.")
        break

    #selected database
    db_name = list(file_paths.keys())[choice - 1]

    # Load the selected transactions
    transactions = load_transactions(file_paths[db_name])
    print(f"Loaded {len(transactions)} transactions from {db_name}.")

    # User-defined for support and confidence thresholds
    support_threshold = float(input("Enter support threshold in % (e.g., 10 for 10%): ")) / 100
    confidence_threshold = float(input("Enter confidence threshold in % (e.g., 20 for 20%): ")) / 100

    print(f"\nProcessing {db_name} with support {support_threshold * 100}% and confidence {confidence_threshold * 100}%...")

    # Brute Force
    bf_result, bf_time = measure_execution_time(generate_frequent_itemsets, transactions, support_threshold)
    print(f"\nBrute Force Frequent Itemsets:\n{bf_result}")
    print(f"Brute Force Time: {bf_time:.4f}s")

    # Apriori
    apriori_result, apriori_time = measure_execution_time(apriori_algorithm, transactions, support_threshold, confidence_threshold)
    print(f"\nApriori Frequent Itemsets:\n{apriori_result[0]}")
    print(f"Apriori Rules:\n{apriori_result[1]}")
    print(f"Apriori Time: {apriori_time:.4f}s")

    # FP-Growth
    fp_result, fp_time = measure_execution_time(fpgrowth_algorithm, transactions, support_threshold, confidence_threshold)
    print(f"\nFP-Growth Frequent Itemsets:\n{fp_result[0]}")
    print(f"FP-Growth Rules:\n{fp_result[1]}")
    print(f"FP-Growth Time: {fp_time:.4f}s")

    #if user wants to analyze different dataset
    continue_choice = input("\nDo you want to analyze another dataset? (yes/no): ").strip().lower()
    if continue_choice != 'yes':
        print("Exiting the program.")
        break



Available databases:
1. Amazon
2. BestBuy
3. KMart
4. Nike
0. Exit


Enter the number corresponding to the database you'd like to choose (or 0 to exit):  2


Loaded 20 transactions from BestBuy.


Enter support threshold in % (e.g., 10 for 10%):  30
Enter confidence threshold in % (e.g., 20 for 20%):  60



Processing BestBuy with support 30.0% and confidence 60.0%...

Brute Force Frequent Itemsets:
{1: {'Cereal': 7, 'Eggs': 8, 'Milk': 11, 'Bread': 7, 'Coffee': 7, 'Detergent': 8, 'Soap': 6, 'Toothpaste': 6}, 2: {('Milk', 'Detergent'): 6}}
Brute Force Time: 0.0000s

Apriori Frequent Itemsets:
   support           itemsets
0     0.35            (Bread)
1     0.35           (Cereal)
2     0.35           (Coffee)
3     0.40        (Detergent)
4     0.40             (Eggs)
5     0.55             (Milk)
6     0.30             (Soap)
7     0.30       (Toothpaste)
8     0.30  (Milk, Detergent)
Apriori Rules:
   antecedents consequents  antecedent support  consequent support  support  \
0  (Detergent)      (Milk)                 0.4                0.55      0.3   

   confidence      lift  leverage  conviction  zhangs_metric  
0        0.75  1.363636      0.08         1.8       0.444444  
Apriori Time: 0.0478s

FP-Growth Frequent Itemsets:
   support           itemsets
0     0.40             (Egg

In [6]:
import pandas as pd
df = pd.read_csv(r"C:\Users\DELL\Downloads\Nike.csv", delimiter=';')
print(df)
df

                           Cereal,Eggs,Bread
0                             Detergent,Soap
1                       Bread,Cereal,Shampoo
2        Eggs,Diapers,Detergent,Milk,Shampoo
3                           Cereal,Milk,Eggs
4                          Coffee,Toothpaste
5       Soap,Bread,Shampoo,Coffee,Toothpaste
6                    Bread,Soap,Shampoo,Eggs
7                Eggs,Diapers,Detergent,Soap
8                       Diapers,Coffee,Bread
9                          Toothpaste,Cereal
10                         Diapers,Detergent
11   Shampoo,Toothpaste,Detergent,Soap,Bread
12             Toothpaste,Bread,Soap,Diapers
13               Bread,Detergent,Coffee,Soap
14  Toothpaste,Shampoo,Detergent,Soap,Coffee
15                              Cereal,Bread
16                           Shampoo,Diapers
17                               Milk,Cereal
18                Eggs,Coffee,Cereal,Diapers


Unnamed: 0,"Cereal,Eggs,Bread"
0,"Detergent,Soap"
1,"Bread,Cereal,Shampoo"
2,"Eggs,Diapers,Detergent,Milk,Shampoo"
3,"Cereal,Milk,Eggs"
4,"Coffee,Toothpaste"
5,"Soap,Bread,Shampoo,Coffee,Toothpaste"
6,"Bread,Soap,Shampoo,Eggs"
7,"Eggs,Diapers,Detergent,Soap"
8,"Diapers,Coffee,Bread"
9,"Toothpaste,Cereal"
