## Applying ECLAT on small data

In [2]:
# Import necessary libraries
import pandas as pd
from collections import defaultdict
from itertools import combinations
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from io import StringIO

# Display styling
display(HTML('''
    <style>
    body {
        background-color: #800080;
        font-family: Calibri, sans-serif;
        font-weight: bold;
        text-align: center;
        color: white;
    }
    .widget-label {
        color: white !important;
        font-weight: bold;
    }
    .my_output_area {
        background-color: #D8BFD8;
        border: 1px solid #4B0082;
        padding: 10px;
        border-radius: 5px;
        color: white !important;
        text-align: center;
        font-size: 18px;
        z-index: 10;
        position: relative;
        margin: 5px 0;
    }
    .my_output_area {
        background-color: transparent !important; /* Ensures transparency */
        border: none; /* Remove any default border */
        padding: 0 !important; /* Remove padding if needed */
    }
    .widget-button {
        background-color: #4B0082;
        color: white;
        font-weight: bold;
    }
    select {
        background-color: #4B0082;
        color: white;
        font-weight: bold;
    }
</style>
'''))

# Display the heading
display(HTML('''
    <h1 style="text-align: center; color: white; font-family: Calibri, sans-serif; font-weight: bold;">
        Generate Frequent Itemsets using ECLAT Algorithm
    </h1>
'''))

# Create the file upload widget
upload = widgets.FileUpload(
    accept='.csv',
    multiple=False,
    description='Upload CSV',
    style={'description_width': 'initial'}
)

# Create the minimum support dropdown widget
min_sup_input = widgets.Dropdown(
    options=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    value=2,
    description='Minimum Support:',
    style={'description_width': 'initial'}
)

# Create the run button
run_button = widgets.Button(
    description='Run ECLAT',
    button_style='',
    tooltip='Click to Run ECLAT',
    style={'description_width': 'initial'}
)

# Create the output area
output = widgets.Output()

# Display the widgets in a vertical box
widgets_box = widgets.VBox([upload, min_sup_input, run_button, output])
display(widgets_box)

# Define the function to generate vertical data format (VDF)
def generate_vdf(transactions, itemsets):
    vdf = defaultdict(set)
    for tid, trans in transactions.items():
        for itemset in itemsets:
            if set(itemset).issubset(trans):
                vdf[itemset].add(tid)
    return vdf

# Define the function to prune itemsets based on minimum support
def prune_vdf(vdf, min_sup):
    return {itemset: tids for itemset, tids in vdf.items() if len(tids) >= min_sup}

# Define the main ECLAT algorithm
def run_eclat(transactions, min_sup):
    result = []
    k = 1
    all_items = sorted({item for trans in transactions.values() for item in trans})
    current_itemsets = [(item,) for item in all_items]

    while current_itemsets:
        vdf = generate_vdf(transactions, current_itemsets)

        with output:
            display(HTML(f'<div class="my_output_area"><strong>VDF for {k}-itemsets (before pruning):</strong></div>'))
            for itemset, tids in vdf.items():
                display(HTML(f'<div class="my_output_area">{", ".join(itemset)} : {tids} (Support: {len(tids)})</div>'))

        pruned_vdf = prune_vdf(vdf, min_sup)
        if not pruned_vdf:
            break

        with output:
            display(HTML(f'<div class="my_output_area"><strong>VDF for {k}-itemsets (after pruning):</strong></div>'))
            for itemset, tids in pruned_vdf.items():
                display(HTML(f'<div class="my_output_area">{", ".join(itemset)} : {tids} (Support: {len(tids)})</div>'))
                result.append((itemset, tids))

        prev_frequent = list(pruned_vdf.keys())
        k += 1
        current_itemsets = list(set(
            tuple(sorted(set(a) | set(b)))
            for i, a in enumerate(prev_frequent)
            for b in prev_frequent[i + 1:]
            if len(set(a) | set(b)) == k
        ))

    return result

# Define the function to handle the run button click event
def on_run_button_clicked(b):
    output.clear_output()

    if not upload.value:
        with output:
            display(HTML('<div class="my_output_area">❗Please upload a CSV file first.</div>'))
        return

    # Read the uploaded file
    uploaded_filename = list(upload.value.keys())[0]
    content = upload.value[uploaded_filename]['content']
    df = pd.read_csv(StringIO(content.decode('utf-8')))

    # Display the CSV content as a table
    with output:
        display(HTML('<div class="my_output_area"><strong>Uploaded CSV Data:</strong></div>'))
        display(df)

    # Expect 'Items' column
    if 'Items' not in df.columns:
        with output:
            display(HTML('<div class="my_output_area">❗CSV must have a column named "Items" containing transactions.</div>'))
        return

    transactions = {i + 1: set(str(row).split()) for i, row in enumerate(df['Items'].dropna())}
    min_sup = min_sup_input.value

    with output:
        display(HTML(f'<div class="my_output_area">Running ECLAT with minimum support = {min_sup}</div>'))

    frequent_itemsets = run_eclat(transactions, min_sup)

    with output:
        display(HTML('<div class="my_output_area"><strong>All Frequent Itemsets Found:</strong></div>'))
        for itemset, tids in frequent_itemsets:
            display(HTML(f'<div class="my_output_area">Itemset: {itemset} | Support: {len(tids)}</div>'))

# Bind the button click event to the handler function
run_button.on_click(on_run_button_clicked)


VBox(children=(FileUpload(value={}, accept='.csv', description='Upload CSV'), Dropdown(description='Minimum Su…

## Applying ECLAT on Online Retail Dataset with 1500 rows

In [1]:
# Import necessary libraries
import pandas as pd
from collections import defaultdict
from itertools import combinations
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from io import StringIO

# Display styling
display(HTML('''
    <style>
    body {
        background-color: #800080;
        font-family: Calibri, sans-serif;
        font-weight: bold;
        text-align: center;
        color: white;
    }
    .widget-label {
        color: white !important;
        font-weight: bold;
    }
    .my_output_area {
        background-color: #D8BFD8;
        border: 1px solid #4B0082;
        padding: 10px;
        border-radius: 5px;
        color: white !important;
        text-align: center;
        font-size: 18px;
        z-index: 10;
        position: relative;
        margin: 5px 0;
    }
    .my_output_area {
        background-color: transparent !important; /* Ensures transparency */
        border: none; /* Remove any default border */
        padding: 0 !important; /* Remove padding if needed */
    }
    .widget-button {
        background-color: #4B0082;
        color: white;
        font-weight: bold;
    }
    select {
        background-color: #4B0082;
        color: white;
        font-weight: bold;
    }
</style>
'''))

# Display the heading
display(HTML('''
    <h1 style="text-align: center; color: white; font-family: Calibri, sans-serif; font-weight: bold;">
        Generate Frequent Itemsets using ECLAT Algorithm
    </h1>
'''))

# Create widgets
upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload CSV', style={'description_width': 'initial'})
min_sup_input = widgets.Dropdown(options=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], value=2, description='Minimum Support:', style={'description_width': 'initial'})
run_button = widgets.Button(description='Run ECLAT', button_style='', tooltip='Click to Run ECLAT', style={'description_width': 'initial'})
output = widgets.Output()

widgets_box = widgets.VBox([upload, min_sup_input, run_button, output])
display(widgets_box)

# Define ECLAT algorithm
def run_eclat(transactions, min_sup):
    vertical_db = defaultdict(set)
    for tid, items in transactions.items():
        for item in items:
            vertical_db[item].add(tid)

    vertical_db = {item: tids for item, tids in vertical_db.items() if len(tids) >= min_sup}
    frequent_itemsets = [(frozenset([item]), tids) for item, tids in vertical_db.items()]
    result = []

    def eclat(prefix, items):
        items = list(items)
        for i in range(len(items)):
            item_i, tid_i = items[i]
            new_itemset = prefix.union([item_i])
            support = len(tid_i)
            if support >= min_sup:
                result.append((new_itemset, tid_i))
                suffix = []
                for j in range(i + 1, len(items)):
                    item_j, tid_j = items[j]
                    tids_intersection = tid_i & tid_j
                    if len(tids_intersection) >= min_sup:
                        suffix.append((item_j, tids_intersection))
                if suffix:
                    eclat(new_itemset, suffix)

    eclat(frozenset(), frequent_itemsets)
    return result

# Button click event
def on_run_button_clicked(b):
    output.clear_output()

    if not upload.value:
        with output:
            display(HTML('<div class="my_output_area">❗Please upload a CSV file first.</div>'))
        return

    # uploaded_filename = list(upload.value.keys())[0]
    # content = upload.value[uploaded_filename]['content']
    # df = pd.read_csv(StringIO(content.decode('utf-8')))


    uploaded_filename = list(upload.value.keys())[0]
    content = upload.value[uploaded_filename]['content']
    df = pd.read_csv(StringIO(content.decode('utf-8')))

    # Display first few rows of the dataset
    with output:
        display(HTML(f'<div class="my_output_area"><strong>Dataset Preview:</strong><br><br>{df.head().to_html()}</div>'))



    # Preprocess
    if 'InvoiceNo' in df.columns and 'Description' in df.columns:
        df = df.dropna(subset=['InvoiceNo', 'Description'])
        grouped = df.groupby('InvoiceNo')['Description'].apply(lambda x: set(x)).to_dict()
        transactions = {i + 1: items for i, items in enumerate(grouped.values())}
    elif 'Items' in df.columns:
        transactions = {i + 1: set(str(row).split()) for i, row in enumerate(df['Items'].dropna())}
    else:
        with output:
            display(HTML('<div class="my_output_area">❗CSV must have "InvoiceNo and Description" or "Items" column.'))
        return

    min_sup = min_sup_input.value
    frequent_itemsets = run_eclat(transactions, min_sup)

    with output:
     if not frequent_itemsets:
        display(HTML('<div class="my_output_area">❗No frequent itemsets found for the selected minimum support.</div>'))
     else:
        final_html = "<div class='my_output_area'><strong>Frequent Itemsets:</strong><br><br>"
        for itemset, tids in frequent_itemsets:
            items = ', '.join(str(i) for i in itemset)   # <-- Fixed here!
            final_html += f"Itemset: [{items}] | Support: {len(tids)}<br>"
        final_html += "</div>"
        display(HTML(final_html))


run_button.on_click(on_run_button_clicked)


VBox(children=(FileUpload(value={}, accept='.csv', description='Upload CSV'), Dropdown(description='Minimum Su…