In [7]:
import os
import numpy as np
from collections import defaultdict
import itertools

In [1]:


def read_data(file_path):
    dataset = []
    with open(file_path, 'r') as file:
        for line in file:
            transaction = line.strip().split()
            dataset.append(transaction)
    return dataset


file_path = 'browsing.txt'


dataset = read_data(file_path)
print("Sample data:", dataset[:5])


Sample data: [['FRO11987', 'ELE17451', 'ELE89019', 'SNA90258', 'GRO99222'], ['GRO99222', 'GRO12298', 'FRO12685', 'ELE91550', 'SNA11465', 'ELE26917', 'ELE52966', 'FRO90334', 'SNA30755', 'ELE17451', 'FRO84225', 'SNA80192'], ['ELE17451', 'GRO73461', 'DAI22896', 'SNA99873', 'FRO86643'], ['ELE17451', 'ELE37798', 'FRO86643', 'GRO56989', 'ELE23393', 'SNA11465'], ['ELE17451', 'SNA69641', 'FRO86643', 'FRO78087', 'SNA11465', 'GRO39357', 'ELE28573', 'ELE11375', 'DAI54444']]


In [2]:


def create_bitmap(hash_buckets, support_threshold):
    bitmap = np.zeros(len(hash_buckets))
    for i, count in enumerate(hash_buckets):
        if count >= support_threshold:
            bitmap[i] = 1
    return bitmap

In [3]:
def get_frequent_items(dataset, min_support):
    item_counts = defaultdict(int)
    for transaction in dataset:
        for item in transaction:
            item_counts[item] += 1
    return {item for item, count in item_counts.items() if count >= min_support}

In [4]:




def pcy(dataset, min_support):
    
    hash_buckets = defaultdict(int)
    for transaction in dataset:
        for pair in itertools.combinations(transaction, 2):
            hash_buckets[hash(pair) % len(dataset)] += 1

    bitmap = create_bitmap(hash_buckets, min_support)

    
    frequent_items = get_frequent_items(dataset, min_support)
    candidate_pairs = set()
    for transaction in dataset:
        valid_items = [item for item in transaction if item in frequent_items]
        for pair in itertools.combinations(valid_items, 2):
            if bitmap[hash(pair) % len(dataset)] == 1:
                candidate_pairs.add(pair)

    
    pair_counts = defaultdict(int)
    for transaction in dataset:
        for pair in itertools.combinations(transaction, 2):
            if pair in candidate_pairs:
                pair_counts[pair] += 1

    frequent_pairs = {pair for pair, count in pair_counts.items() if count >= min_support}
    return frequent_pairs




In [8]:
min_support = 100
frequent_itemsets_pcy = pcy(dataset, min_support)
print("Frequent Itemsets (PCY):", frequent_itemsets_pcy)

Frequent Itemsets (PCY): {('FRO92469', 'DAI75645'), ('DAI55148', 'DAI62779'), ('SNA45677', 'ELE78169'), ('ELE99737', 'DAI62779'), ('SNA99873', 'SNA45677'), ('DAI62779', 'SNA55762'), ('ELE32164', 'ELE66810'), ('DAI75645', 'GRO71621'), ('SNA53220', 'FRO19221'), ('GRO73461', 'GRO71621'), ('DAI42493', 'ELE92920'), ('ELE17451', 'SNA96271'), ('DAI85309', 'SNA93860'), ('FRO40251', 'ELE20847'), ('FRO40251', 'GRO38983'), ('GRO46854', 'FRO40251'), ('FRO85978', 'SNA99873'), ('ELE17451', 'ELE32164'), ('ELE26917', 'GRO99222'), ('GRO73461', 'DAI83733'), ('GRO46854', 'FRO79022'), ('GRO21487', 'ELE66810'), ('DAI62779', 'ELE66810'), ('FRO31317', 'FRO80039'), ('ELE32164', 'DAI62779'), ('FRO79022', 'DAI62779'), ('ELE59028', 'FRO85978'), ('GRO46854', 'GRO73461'), ('GRO21487', 'GRO81647'), ('FRO31317', 'DAI75645'), ('FRO53271', 'GRO59710'), ('SNA80324', 'DAI75645'), ('FRO89565', 'DAI62779'), ('GRO30386', 'DAI62779'), ('GRO73461', 'GRO30386'), ('DAI62779', 'ELE56788'), ('GRO61133', 'GRO81087'), ('FRO80039',

In [9]:
def generate_association_rules(frequent_itemsets, dataset, min_confidence):
    item_counts = defaultdict(int)
    for transaction in dataset:
        for item in transaction:
            item_counts[item] += 1

    rules = []
    for itemset in frequent_itemsets:
        for item in itemset:
            antecedent = {item}
            consequent = set(itemset) - antecedent
            if consequent:
                antecedent_count = sum(1 for transaction in dataset if antecedent.issubset(transaction))
                rule_count = sum(1 for transaction in dataset if antecedent.issubset(transaction) and consequent.issubset(transaction))
                confidence = rule_count / antecedent_count
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence))
    return rules

In [10]:



min_confidence = 0.5




association_rules_pcy = generate_association_rules(frequent_itemsets_pcy, dataset, min_confidence)
print("Association Rules (PCY):")
for rule in association_rules_pcy:
    print(rule)


Association Rules (PCY):
({'DAI55148'}, {'DAI62779'}, 0.5394871794871795)
({'ELE20847'}, {'FRO40251'}, 0.530562347188264)
({'ELE21353'}, {'DAI62779'}, 0.5023255813953489)
({'ELE20847'}, {'FRO40251'}, 0.530562347188264)
({'DAI93865'}, {'FRO40251'}, 1.0)
({'SNA30859'}, {'GRO24246'}, 0.53125)
({'FRO92469'}, {'FRO40251'}, 0.983510011778563)
({'GRO89004'}, {'ELE25077'}, 0.698051948051948)
({'FRO92469'}, {'FRO40251'}, 0.983510011778563)
({'ELE92920'}, {'DAI62779'}, 0.7326649958228906)
({'DAI46755'}, {'FRO81176'}, 0.5803921568627451)
({'SNA30533'}, {'SNA96271'}, 0.5090361445783133)
({'FRO19221'}, {'DAI62779'}, 0.5976714100905562)
({'FRO47962'}, {'DAI75645'}, 0.6176470588235294)
({'GRO85051'}, {'FRO40251'}, 0.999176276771005)
({'ELE20847'}, {'SNA80324'}, 0.5012224938875306)
({'DAI43223'}, {'ELE32164'}, 0.5511627906976744)
({'DAI43223'}, {'ELE32164'}, 0.5511627906976744)
({'GRO38636'}, {'FRO40251'}, 0.9906542056074766)
({'GRO85051'}, {'FRO40251'}, 0.999176276771005)
({'GRO81647'}, {'GRO73461'},