In [1]:
import os
import sys
import random
from itertools import permutations, combinations # important for getting all the possible k itemsets
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
# List of all the datasets that are needed

In [29]:
def item_k_support_possibilities(item_names, k):
    '''
    Generate all combinations of itemsets
    '''
    item_k_arrange = combinations(item_names, k)
    possibilities_of_k_items = [item for item in item_k_arrange]
    return possibilities_of_k_items

def item_k_confidence_possibilities(item_names, k):
    '''
    Generate all permutations of itemsets 
    '''
    item_k_arrange = permutations(item_names, k)
    possibilities_of_k_items = [item for item in item_k_arrange]
    return possibilities_of_k_items

def count_itemsets_for_k(current_itemset, transactions, k):
    item_k_filter = [name for name in current_itemset.keys()]
    item_k_frequent_names = item_k_support_possibilities(item_k_filter, k)
    itemset_k = {}
    for item in item_k_frequent_names:
        count_occ = sum(1 for transact in transactions if set(item).issubset(transact))
        itemset_k[item] = float(count_occ) / len(transactions)
    return itemset_k
    
def get_itemsets_with_confidence(total_itemset_frequent, min_confidence):
    itemset_confidence = {}
    itemset_copy = total_itemset_frequent.copy()

    for key, val in total_itemset_frequent.items():
        if isinstance(key, tuple):
            key_elems = item_k_confidence_possibilities(list(key), len(key))

            for key_elem in key_elems[:-1]:
                if len(key_elem) == 2:
                    numerator = key_elem[0]
                    if numerator in itemset_copy:
                        confidence_val = val / itemset_copy[numerator]
                        itemset_confidence[key_elem] = confidence_val
                else:
                    confidence_lst = []
                    for i in range(len(key_elem) - 1):
                        numerator = key_elem[i]
                        if numerator in itemset_copy:
                            if i == 0:
                                confidence_val = val / itemset_copy[numerator]
                                confidence_lst.append(confidence_val)
                            else:
                                next_key = key_elem[:i + 1]
                                if next_key not in itemset_copy:
                                    itemset_copy[next_key] = val
                                
                                next_confidence = val / itemset_copy[next_key]
                                confidence_lst.append(next_confidence)

                    itemset_confidence[key_elem] = confidence_lst

    return itemset_confidence, itemset_copy

def collect_frequent_itemset(unfilter_dict_k, min_support):
    # unnecessary_data = {}
    filtered_dict = {}
    for key, val in unfilter_dict_k.items():
        if val >= min_support:
            filtered_dict[key] = val
    return filtered_dict
    


In [31]:
# Loads a dictionary of datasets that you can select by number
selected_stores = {1: "amazon", 2: "best_buy", 3: "k-mart", 4: "nike", 5: "ace_hardware"}
selected_id = int(input("Enter the store number for the dataset that you want:\n1. Amazon\n2. Best Buy\n3. K-mart\n4. Nike\n5. Ace Hardware\n"))
if selected_id not in selected_stores.keys():
    print("invalid number, There are only 5 choices!Try again next time")
    sys.exit()
item_names = pd.read_csv(f"{os.getcwd()}/{selected_stores[selected_id]}_items.csv")
transactions = pd.read_csv(f"{os.getcwd()}/{selected_stores[selected_id]}_transactions.csv")
print(f"You have selected the {selected_stores[selected_id]} dataset")
# Enter the minimum support and the minimum confidence 
min_support = float(input("Please enter the minimum support percent that you want (1 to 100):\n"))
min_support /= 100
min_confidence = float(input("Please enter the minimum confidence percent that you want (1 to 100):\n"))
min_confidence /= 100

itemset_k1 = item_names.set_index("Item Name").to_dict()["Item #"]

# Split the string by comma to seperate each string in a row
item_k1_names = [name for name in item_names["Item Name"]]
itemset_k = {}

item_k1_count = transactions['Transaction'].str.split(", ").explode().value_counts()

item_k1 = item_k1_count.to_dict()
item_k = transactions['Transaction'].str.split(", ").to_list()

# Get the support value for each itemset-1
for k, _ in itemset_k1.items():
    if k not in item_k1:
        itemset_k1[k] = float(0)
    else: 
        itemset_k1[k] = float(item_k1[k]) / len(transactions["Transaction"])

itemset_frequent_k1 = collect_frequent_itemset(itemset_k1, min_support)
itemset_confidence_k = {}
itemset_frequent_k = itemset_frequent_k1
k_val = 2
updated_itemset = itemset_frequent_k1
while len(itemset_frequent_k) >= k_val:
    itemset_k = count_itemsets_for_k(itemset_frequent_k1, item_k, k_val)
    # print()
    itemset_frequent_k = collect_frequent_itemset(itemset_k, min_support)
    updated_itemset.update(itemset_frequent_k)
    # print(itemset_confidence_k)
    k_val += 1
# print(updated_itemset)
for key_s, val_s in updated_itemset.items():
    print(f"Itemset: {key_s}, Support: {val_s}\n")
item_conf, item_supp = get_itemsets_with_confidence(updated_itemset, min_support)
print()
# print(item_conf)
refined_conf = {}
for key_c,val_c in item_conf.items():
    if len(key_c) == 2:
        print(f"Itemset:{key_c[0:1]} -> {key_c[1:]}, Confidence: {val_c}\n")
        refined_conf
    elif len(key_c) > 2:
        for i in range(len(key_c)-1):
            print(f"Itemset:{key_c[0:i+1]} -> {key_c[i+1:]}, Confidence: {val_c[i]}\n")
    
# print(updated_itemset)
# for item in itemset_frequent_k:
    
#     itemset_confidence_k = get_itemsets_with_confidence(itemset_frequent_k1, k_val, min_support)

# item_k1_filter = [name for name in itemset_frequent_k1.keys()]
# item_k2_frequent_names = item_k_possibilities(item_k1_filter, 2)

Enter the store number for the dataset that you want:
1. Amazon
2. Best Buy
3. K-mart
4. Nike
5. Ace Hardware
 1


You have selected the amazon dataset


Please enter the minimum support percent that you want (1 to 100):
 50
Please enter the minimum confidence percent that you want (1 to 100):
 50


Itemset: A Beginner’s Guide, Support: 0.55

Itemset: Java: The Complete Reference, Support: 0.5

Itemset: Java For Dummies, Support: 0.65

Itemset: Android Programming: The Big Nerd Ranch, Support: 0.65

Itemset: ('Java: The Complete Reference', 'Java For Dummies'), Support: 0.5


Itemset:('Java: The Complete Reference',) -> ('Java For Dummies',), Confidence: 1.0

