construct the fp(frequent pattern) tree for given transaction dataset using suitable programming language for appropriate dataset for association mining and perform following operations,
1) find the max ferquent item set
2) how many transactions does it contains
3) simulate the frequent pattern 
4) compare this algorithm with approire algorithm.
NOTES:
->Support and confidence are the two elements of apriori algorithm.
->support of the item set is the number of transaction in which the item set appears.
->confidence : confidence is the measure of likelihood that an item set will be appear if another item set appears.
->

In [2]:

# Step 1: Load and Preprocess Dataset
import pandas as pd
from collections import defaultdict
from itertools import combinations

In [3]:
# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    transactions = df.iloc[:, 1:].apply(lambda x: x.dropna().tolist(), axis=1).tolist()
    return transactions

file_path = r"groceries.csv"
transactions = load_data(file_path)
transactions

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product'],
 ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner'],
 ['rolls/buns'],
 ['other vegetables',
  'UHT-milk',
  'rolls/buns',
  'bottled beer',
  'liquor (appetizer)'],
 ['potted plants'],
 ['whole milk', 'cereals'],
 ['tropical fruit',
  'other vegetables',
  'white bread',
  'bottled water',
  'chocolate'],
 ['citrus fruit',
  'tropical fruit',
  'whole milk',
  'butter',
  'curd',
  'yogurt',
  'flour',
  'bottled water',
  'dishes'],
 ['beef'],
 ['frankfurter', 'rolls/buns', 'soda'],
 ['chicken', 'tropical fruit'],
 ['butter', 'sugar', 'fruit/vegetable juice', 'newspapers'],
 ['fruit/vegetable juice'],
 ['packaged fruit/vegetables'],
 ['chocolate'],
 ['specialty bar'],
 ['other vegetables'],
 ['butter mi

In [4]:
def preprocess_transactions(transactions, min_support):
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    frequent_items = {item for item, count in item_counts.items() if count >= min_support}
    processed_transactions = []
    for transaction in transactions:
        sorted_transaction = [item for item in transaction if item in frequent_items]
        sorted_transaction.sort(key=lambda item: item_counts[item], reverse=True)
        if sorted_transaction:
            processed_transactions.append(sorted_transaction)
    
    return processed_transactions, item_counts

min_support = 10
processed_transactions, item_counts = preprocess_transactions(transactions, min_support)


In [5]:
class FPTreeNode:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.next = None

    def increment(self, count):
        self.count += count


In [6]:
def construct_fp_tree(transactions):
    root = FPTreeNode(None, 0, None)
    header_table = {}
    for transaction in transactions:
        for item in transaction:
            if item not in header_table:
                header_table[item] = [0, None]
            header_table[item][0] += 1

    for transaction in transactions:
        sorted_items = [item for item in transaction if item in header_table]
        sorted_items.sort(key=lambda item: header_table[item][0], reverse=True)
        current_node = root
        for item in sorted_items:
            if item in current_node.children:
                current_node.children[item].increment(1)
            else:
                new_node = FPTreeNode(item, 1, current_node)
                current_node.children[item] = new_node
                if header_table[item][1] is None:
                    header_table[item][1] = new_node
                else:
                    temp_node = header_table[item][1]
                    while temp_node.next is not None:
                        temp_node = temp_node.next
                    temp_node.next = new_node
            current_node = current_node.children[item]
    return root, header_table

fp_tree, header_table = construct_fp_tree(processed_transactions)

In [7]:
max_frequent_itemset = max(header_table.items(), key=lambda x: x[1][0])
max_item, (max_count, _) = max_frequent_itemset
transaction_count = sum(1 for t in processed_transactions if max_item in t)

In [8]:
def find_frequent_patterns(transactions, min_support):
    item_counts = defaultdict(int)
    pair_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1
        for pair in combinations(transaction, 2):
            pair_counts[pair] += 1
    return {pair: count for pair, count in pair_counts.items() if count >= min_support}

frequent_patterns = find_frequent_patterns(processed_transactions, min_support)

In [9]:
print("Most Frequent Itemset:", max_item)
print("Frequency:", max_count)
print("Transaction Count:", transaction_count)
print("Top 5 Frequent Patterns:", sorted(frequent_patterns.items(), key=lambda x: x[1], reverse=True)[:5])
for pattern, count in sorted(frequent_patterns.items(), key=lambda x: x[1], reverse=True):
    print(pattern, "->", count)

Most Frequent Itemset: whole milk
Frequency: 2513
Transaction Count: 2513
Top 5 Frequent Patterns: [(('whole milk', 'other vegetables'), 736), (('whole milk', 'rolls/buns'), 557), (('whole milk', 'yogurt'), 551), (('whole milk', 'root vegetables'), 481), (('other vegetables', 'root vegetables'), 466)]
('whole milk', 'other vegetables') -> 736
('whole milk', 'rolls/buns') -> 557
('whole milk', 'yogurt') -> 551
('whole milk', 'root vegetables') -> 481
('other vegetables', 'root vegetables') -> 466
('other vegetables', 'yogurt') -> 427
('other vegetables', 'rolls/buns') -> 419
('whole milk', 'tropical fruit') -> 416
('whole milk', 'soda') -> 394
('rolls/buns', 'soda') -> 377
('other vegetables', 'tropical fruit') -> 353
('whole milk', 'bottled water') -> 338
('rolls/buns', 'yogurt') -> 338
('whole milk', 'pastry') -> 327
('other vegetables', 'soda') -> 322
('whole milk', 'whipped/sour cream') -> 317
('rolls/buns', 'sausage') -> 301
('whole milk', 'citrus fruit') -> 300
('whole milk', 'pip