In [None]:
from mlxtend.frequent_patterns import apriori
import pandas as pd
import time

# Sample dataset (list of transactions)
dataset = pd.DataFrame([['A', 'B', 'C'],
           ['D', 'B'],
           ['B', 'C', 'D', 'A'],
           ['B', 'A', 'C'],
                       []])

start=time.time()
# Apply Apriori algorithm to get frequent itemsets
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
end=time.time()
print("Time required for apriori : ",end-start)

# Display the result
print(frequent_itemsets)

Time required for apriori :  0.006939411163330078
   support   itemsets
0     0.75        (A)
1     0.75        (B)
2     0.75        (C)
3     0.50        (D)
4     0.50     (B, A)
5     0.75     (A, C)
6     0.50     (B, C)
7     0.50  (B, A, C)


In [None]:
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
import time

# Sample dataset (list of transactions)
dataset = pd.DataFrame([['A', 'B', 'C'],
           ['A', 'C', 'B', 'D'],
           ['A', 'C', 'B', 'C'],
           ['A', 'D', 'B'],
           ['B', 'A', 'C']])

start2=time.time()
# Apply Apriori algorithm to get frequent itemsets
frequent_itemsets = fpgrowth(df, min_support=0.5, use_colnames=True)
end2=time.time()
print("Time required for apriori : ",end2-start2)

# Display the result
print(frequent_itemsets)

Time required for apriori :  0.001711130142211914
   support   itemsets
0     0.75        (C)
1     0.75        (B)
2     0.75        (A)
3     0.50        (D)
4     0.50     (B, C)
5     0.75     (A, C)
6     0.50     (B, A)
7     0.50  (B, A, C)


In [None]:
def mine_tree(node, header_table, min_support, prefix, frequent_item_list, dataset_size):
    sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda p: p[1][0])]
    for item in sorted_items:
        new_freq_set = prefix.copy()
        new_freq_set.add(item)
        support = header_table[item][0] / dataset_size
        frequent_item_list.append((new_freq_set, support))
        cond_patt_bases = find_prefix_path(header_table, item)
        cond_tree, cond_header_table = construct_tree(cond_patt_bases, min_support)
        if cond_header_table is not None and len(cond_header_table) > 0:
            mine_tree(cond_tree, cond_header_table, min_support, new_freq_set, frequent_item_list, dataset_size)

def fpgrowth(dataset, min_support):
    root, header_table = construct_tree(dataset, min_support)
    frequent_item_list = []
    dataset_size = sum([dataset[item] for item in dataset])
    mine_tree(root, header_table, min_support, set(), frequent_item_list, dataset_size)
    return frequent_item_list

# Sample transaction data
dataset = {
    frozenset({'A', 'B', 'C'}): 1,
    frozenset({'A', 'C', 'B', 'D'}): 1,
    frozenset({'B', 'C', 'D', 'A'}): 1,
    frozenset({'B', 'A', 'C'}): 1,
    frozenset({'A', 'D', 'B'}): 1,
}

# Apply FP-Growth algorithm
min_support = 2
frequent_itemsets = fpgrowth(dataset, min_support)
print("Itemset\t\t Support")

for itemset in frequent_itemsets:
    print(itemset)


Itemset		 Support
({'C'}, 0.6666666666666666)
({'A', 'C'}, 1.0)
({'B', 'A', 'C'}, 1.0)
({'B', 'C'}, 2.0)
({'D'}, 0.6666666666666666)
({'D', 'A'}, 0.6666666666666666)
({'D', 'B', 'A'}, 0.6666666666666666)
({'D', 'B'}, 1.6666666666666667)
({'B'}, 1.0)
({'A'}, 1.0)
({'B', 'A'}, 1.0)


In [None]:
import csv
from collections import defaultdict

class FPNode:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.next_sibling = None

def conditional_tree_from_paths(paths, min_support):
    items = defaultdict(int)
    for path in paths:
        for node in path:
            items[node.item] += node.count

    items = {item: count for item, count in items.items() if count >= min_support}


    frequent_items = sorted(items.items(), key=lambda x: x[1], reverse=True)

    if len(frequent_items) == 0:
        return None, None

    # root of the conditional tree
    root = FPNode(None, None, None)
    header_table = {}

    for item, count in frequent_items:
        header_table[item] = [count, None]

    for path in paths:
        path = [(node.item, node.count) for node in path if node.item in items]
        current_node = root
        for item, count in path:
            if item in current_node.children:
                current_node.children[item].count += count
            else:
                new_node = FPNode(item, count, current_node)
                current_node.children[item] = new_node

                if header_table[item][1] is None:
                    header_table[item][1] = new_node
                else:
                    previous_node = header_table[item][1]
                    while previous_node.next_sibling is not None:
                        previous_node = previous_node.next_sibling
                    previous_node.next_sibling = new_node

            current_node = current_node.children[item]

    return root, header_table

def find_prefix_path(node):
    path = []
    while node is not None:
        prefix_path = []
        current_node = node
        while current_node.parent is not None:
            prefix_path.append(current_node)
            current_node = current_node.parent
        if prefix_path:
            path.append(prefix_path)
        node = node.next_sibling
    return path

def mine_frequent_patterns(tree, header_table, min_support, prefix, frequent_patterns):
    for item, node in header_table.items():
        support = node[0]
        if support >= min_support and item not in prefix:
            new_freq_set = prefix.copy()
            new_freq_set.add(item)
            frequent_patterns[tuple(sorted(new_freq_set))] = support

            conditional_tree, conditional_header = conditional_tree_from_paths(
                find_prefix_path(node[1]), min_support)

            if conditional_tree is not None:
                mine_frequent_patterns(conditional_tree, conditional_header, min_support, new_freq_set, frequent_patterns)

def load_transactions(file_path):
    transactions = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            transactions.append(row)
    return transactions

def fp_growth(transactions, min_support):
    # Count the frequency
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1


    item_counts = {item: count for item, count in item_counts.items() if count >= min_support}

    # Sort items by support count
    frequent_items = sorted(item_counts.items(), key=lambda x: x[1], reverse=True)

    frequent_patterns = {}

    # FP-tree and header table
    root = FPNode(None, None, None)
    header_table = {}

    for item, count in frequent_items:
        header_table[item] = [count, None]

    for transaction in transactions:
        current_node = root
        for item in transaction:
            if item in header_table:
                current_node = insert_tree_node(current_node, item, header_table, 1)


    mine_frequent_patterns(root, header_table, min_support, set(), frequent_patterns)

    return frequent_patterns

def insert_tree_node(current_node, item, header_table, count):
    if item in current_node.children:
        current_node.children[item].count += count
    else:
        new_node = FPNode(item, count, current_node)
        current_node.children[item] = new_node

        if header_table[item][1] is None:
            header_table[item][1] = new_node
        else:
            previous_node = header_table[item][1]
            while previous_node.next_sibling is not None:
                previous_node = previous_node.next_sibling
            previous_node.next_sibling = new_node

    return current_node.children[item]

if __name__ == "__main__":
    transactions = load_transactions("groceries_subset.csv")
    min_support = 0.05 * len(transactions)
    frequent_patterns = fp_growth(transactions, min_support)
    print("Frequent Patterns:")
    for pattern, support in frequent_patterns.items():
        print(f"{pattern}: {support}")

FileNotFoundError: [Errno 2] No such file or directory: 'groceries_subset.csv'