In [54]:
# 🧠 Cell 1: Import required modules
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
import io
import random

# 🛠️ Globals
population_df = None
filtered_df = None
sample_df = None
filename = None


In [None]:
# 🧠 Final Fusion Cell: All-in-One Audit Engine 🧑‍💻

# 🔁 Globals
filter_widgets = []
sample_output = widgets.Output()

# 🧼 Create dynamic filters
def create_filter_ui():
    clear_output(wait=True)
    display(widgets.HTML("<h3>🎛️ Create Filters</h3>"))

    global filter_widgets, population_cleaned
    filter_widgets.clear()

    if population_df is None:
        print("⚠️ No data loaded.")
        return

    df_cleaned = population_df.copy()
    for col in df_cleaned.select_dtypes(include='object').columns:
        df_cleaned[col] = df_cleaned[col].astype(str).str.strip().str.title()

    df_cleaned.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_cleaned.dropna(how='all', axis=1, inplace=True)
    df_cleaned.dropna(how='all', axis=0, inplace=True)

    population_cleaned = df_cleaned

    for col in df_cleaned.columns:
        try:
            dtype = df_cleaned[col].dtype
            if np.issubdtype(dtype, np.number):
                min_val, max_val = df_cleaned[col].min(), df_cleaned[col].max()
                step = (max_val - min_val) / 100 if (max_val - min_val) > 0 else 1
                widget = widgets.FloatRangeSlider(
                    value=[min_val, max_val], min=min_val, max=max_val,
                    step=step, description=str(col),
                    layout=widgets.Layout(width='95%')
                )
            elif np.issubdtype(dtype, np.datetime64):
                min_date, max_date = df_cleaned[col].min(), df_cleaned[col].max()
                widget = widgets.DateRangePicker(value=(min_date, max_date), description=str(col))
            else:
                unique_vals = df_cleaned[col].dropna().unique().tolist()[:20]
                widget = widgets.SelectMultiple(
                    options=unique_vals, description=str(col),
                    layout=widgets.Layout(width='95%', height='100px')
                )
            filter_widgets.append((col, widget))
            display(widget)
        except Exception as e:
            print(f"⚠️ Could not create filter for column '{col}': {e}")
    display(run_filter_button, reset_button)

# ✅ Apply filters
run_filter_button = widgets.Button(description="✅ Apply Filters")

def apply_filters(_):
    global filtered_df
    df = population_cleaned.copy()

    for col, widget in filter_widgets:
        if isinstance(widget, widgets.FloatRangeSlider):
            df = df[df[col].between(widget.value[0], widget.value[1])]
        elif isinstance(widget, widgets.SelectMultiple):
            if widget.value:
                df = df[df[col].isin(widget.value)]

    filtered_df = df

    with output_area:
        clear_output()
        if len(filtered_df) == 0:
            print("⚠️ Filtered DataFrame is empty. Try relaxing your filters.")
        else:
            display(filtered_df.head())
            print(f"🔎 Filtered Rows: {len(filtered_df)}")
            display_sampling_ui()

run_filter_button.on_click(apply_filters)

# 🧼 Reset filters
reset_button = widgets.Button(description="🧼 Reset Filters")

def reset_filters(_):
    global filtered_df, sample_df
    filtered_df = population_df.copy()
    sample_df = None
    with output_area:
        clear_output()
        print("🔁 Filters reset. Reloading full dataset.")
        display(population_df.head())
    create_filter_ui()

reset_button.on_click(reset_filters)

# 🧠 Smart sample size logic
def determine_sample_size(n):
    if n <= 50: return n
    elif n <= 250: return 25
    elif n <= 500: return 40
    else: return 60

# 🎲 Sampling UI elements
sample_method = widgets.Dropdown(
    options=["Random", "Monetary Unit Sampling"],
    description="Method:"
)
sample_count = widgets.BoundedIntText(value=5, min=1, max=100000, step=1, description="Sample Size:")
suggested_label = widgets.Label(value="Suggested sample: 5")
sample_button = widgets.Button(description="🎯 Select Sample")

def display_stratified_ui():
    stratify_col_dropdown.options = list(filtered_df.columns)
    display(widgets.HTML("<h3>🔀 Stratified Sampling</h3>"))
    display(stratify_col_dropdown, samples_per_group, strat_sample_button, strat_sample_output)

def display_sampling_ui():
    suggested = determine_sample_size(len(filtered_df))
    sample_count.value = suggested
    sample_count.max = len(filtered_df)
    suggested_label.value = f"📊 Suggested sample size: {suggested} of {len(filtered_df)}"
    display(widgets.HTML("<h3>🎲 Sampling Options</h3>"))
    display(sample_method, sample_count, suggested_label, sample_button, sample_output)
    display_stratified_ui()

# 🎯 Sampling logic
def perform_sampling(_):
    global sample_df, filtered_df
    method = sample_method.value
    size = sample_count.value

    with sample_output:
        clear_output()
        if filtered_df is None or len(filtered_df) == 0:
            print("⚠️ No data to sample from. Check your filters.")
            return
        if size > len(filtered_df):
            print(f"⚠️ Sample size ({size}) exceeds available rows ({len(filtered_df)}).")
            return
        try:
            if method == "Random":
                sample_df = filtered_df.sample(n=size)
            elif method == "Monetary Unit Sampling":
                numeric_cols = filtered_df.select_dtypes(include='number').columns.tolist()
                monetary_keywords = ['amount', 'value', 'total', 'cost', 'price', 'payment', 'invoice', 'fee']
                selected_col = next((col for col in numeric_cols if any(k in col.lower() for k in monetary_keywords)), None)
                if not selected_col:
                    print("❌ No monetary column detected for MUS.")
                    return
                weights = filtered_df[selected_col]
                probs = weights / weights.sum()
                sample_df = filtered_df.sample(n=size, weights=probs)
                print(f"💰 MUS used column: '{selected_col}'")
            display(sample_df)
            print(f"✅ Sample of {size} rows selected.")
            display(export_button)
        except Exception as e:
            print(f"❌ Sampling failed: {e}")

sample_button.on_click(perform_sampling)

# 🔀 Stratified sampling
stratify_col_dropdown = widgets.Dropdown(options=[], description='Stratify by:', layout=widgets.Layout(width='50%'))
samples_per_group = widgets.BoundedIntText(value=5, min=1, max=1000, step=1, description='Per Group:', layout=widgets.Layout(width='30%'))
strat_sample_button = widgets.Button(description="🔀 Stratified Sample")
strat_sample_output = widgets.Output()

def stratified_sample(df, group_col, n_per_group):
    result = pd.DataFrame()
    for group in df[group_col].dropna().unique():
        group_df = df[df[group_col] == group]
        n = min(n_per_group, len(group_df))
        if n > 0:
            result = pd.concat([result, group_df.sample(n=n)])
    return result

def perform_stratified_sample(_):
    global sample_df
    col = stratify_col_dropdown.value
    n_per_group = samples_per_group.value
    with strat_sample_output:
        clear_output()
        if filtered_df is None or col is None:
            print("⚠️ No data or stratification column selected.")
            return
        try:
            sample_df = stratified_sample(filtered_df, col, n_per_group)
            display(sample_df)
            print(f"✅ Stratified sample by '{col}' with {n_per_group} per group.")
            display(export_button)
        except Exception as e:
            print(f"❌ Stratified sampling failed: {e}")

strat_sample_button.on_click(perform_stratified_sample)

# 💾 Export sample + audit log
export_button = widgets.Button(description="💾 Export Sample to Excel")

def export_sample(_):
    if sample_df is None or sample_df.empty:
        with sample_output:
            print("⚠️ No sample to export.")
        return
    try:
        log_info = {
            'Filename': filename,
            'Filtered Rows': len(filtered_df),
            'Population Rows': len(population_df),
            'Filters': [f"{col}: {widget.value}" for col, widget in filter_widgets]
        }
        out_name = f"output/sample_with_log_{filename.split('.')[0]}.xlsx"
        with pd.ExcelWriter(out_name) as writer:
            sample_df.to_excel(writer, sheet_name="Sample", index=False)
            pd.DataFrame.from_dict(log_info, orient='index').to_excel(writer, sheet_name="AuditLog")
        with sample_output:
            print(f"✅ Sample + audit log exported to: {out_name}")
    except Exception as e:
        with sample_output:
            print(f"❌ Export failed: {e}")

export_button.on_click(export_sample)


In [56]:
# 📤 Cell 2: File upload + read into DataFrame
file_upload = widgets.FileUpload(accept='.csv,.xlsx', multiple=False)
output_area = widgets.Output()

def handle_upload(change):
    global population_df, filename
    output_area.clear_output()
    
    if file_upload.value:
        uploaded = file_upload.value[0]  # It's a tuple in newer versions
        content = uploaded['content']
        filename = uploaded['name']
        
        try:
            if filename.endswith('.csv'):
                population_df = pd.read_csv(io.BytesIO(content))
            else:
                population_df = pd.read_excel(io.BytesIO(content))
            
            with output_area:
                display(population_df.head())
                print(f"✅ Loaded file: {filename} | Rows: {len(population_df)} | Columns: {list(population_df.columns)}")
                create_filter_ui()  # trigger filter creation
        except Exception as e:
            with output_area:
                print(f"❌ Error: {e}")

file_upload.observe(handle_upload, names='value')
display(widgets.HTML("<h3>📁 Upload Population File</h3>"), file_upload, output_area)


HTML(value='<h3>📁 Upload Population File</h3>')

FileUpload(value=(), accept='.csv,.xlsx', description='Upload')

Output()

In [46]:
# 🎛️ Cell 3: Dynamic Filter UI with Data Cleaning & Debugging
filter_widgets = []

def create_filter_ui():
    clear_output(wait=True)
    display(widgets.HTML("<h3>🎛️ Create Filters</h3>"))

    global filter_widgets
    filter_widgets.clear()
    
    if population_df is None:
        print("⚠️ No data loaded.")
        return

    # 🧽 Clean DataFrame before filtering
    df_cleaned = population_df.copy()

    # Normalize strings (strip + title case)
    for col in df_cleaned.select_dtypes(include='object').columns:
        df_cleaned[col] = df_cleaned[col].astype(str).str.strip().str.title()

    # Remove NaNs / infinite values
    df_cleaned.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_cleaned.dropna(how='all', axis=1, inplace=True)
    df_cleaned.dropna(how='all', axis=0, inplace=True)

    # Use cleaned version globally
    global population_cleaned
    population_cleaned = df_cleaned

    # 🔁 Build widgets per column
    for col in df_cleaned.columns:
        try:
            dtype = df_cleaned[col].dtype
            
            if np.issubdtype(dtype, np.number):
                min_val, max_val = df_cleaned[col].min(), df_cleaned[col].max()
                widget = widgets.FloatRangeSlider(
                    value=[min_val, max_val], min=min_val, max=max_val,
                    step=(max_val - min_val)/100 if (max_val - min_val) > 0 else 1,
                    description=str(col), layout=widgets.Layout(width='95%')
                )
            elif np.issubdtype(dtype, np.datetime64):
                min_date, max_date = df_cleaned[col].min(), df_cleaned[col].max()
                widget = widgets.DateRangePicker(
                    value=(min_date, max_date), description=str(col)
                )
            else:
                unique_vals = df_cleaned[col].dropna().unique().tolist()[:20]
                widget = widgets.SelectMultiple(
                    options=unique_vals, description=str(col),
                    layout=widgets.Layout(width='95%', height='100px')
                )

            filter_widgets.append((col, widget))
            display(widget)
        
        except Exception as e:
            print(f"⚠️ Could not create filter for column '{col}': {e}")
    
    display(run_filter_button)

# ✅ Filter Apply Button
run_filter_button = widgets.Button(description="✅ Apply Filters")

def apply_filters(_):
    global filtered_df
    df = population_cleaned.copy()

    print("🧪 Applying filters...")

    for col, widget in filter_widgets:
        if isinstance(widget, widgets.FloatRangeSlider):
            df = df[df[col].between(widget.value[0], widget.value[1])]
        elif isinstance(widget, widgets.SelectMultiple):
            print(f"🔍 {col}: filtering for {widget.value}")
            if widget.value:
                df = df[df[col].isin(widget.value)]

    filtered_df = df

    with output_area:
        clear_output()
        if len(filtered_df) == 0:
            print("⚠️ Filtered DataFrame is empty. Try relaxing your filters.")
        else:
            display(filtered_df.head())
            print(f"🔎 Filtered Rows: {len(filtered_df)}")
            display_sampling_ui()

run_filter_button.on_click(apply_filters)


In [47]:
# 📦 Cell 6: Stratified Sampling Logic + UI
stratify_col_dropdown = widgets.Dropdown(
    options=[],
    description='Stratify by:',
    layout=widgets.Layout(width='50%')
)

samples_per_group = widgets.BoundedIntText(
    value=5, min=1, max=1000, step=1,
    description='Per Group:',
    layout=widgets.Layout(width='30%')
)

strat_sample_button = widgets.Button(description="🔀 Stratified Sample")
strat_sample_output = widgets.Output()

def stratified_sample(df, group_col, n_per_group):
    result = pd.DataFrame()
    for group in df[group_col].dropna().unique():
        group_df = df[df[group_col] == group]
        n = min(n_per_group, len(group_df))
        if n > 0:
            result = pd.concat([result, group_df.sample(n=n)])
    return result

def perform_stratified_sample(_):
    global sample_df
    col = stratify_col_dropdown.value
    n_per_group = samples_per_group.value

    with strat_sample_output:
        clear_output()
        if filtered_df is None or col is None:
            print("⚠️ No data or stratification column selected.")
            return

        try:
            sample_df = stratified_sample(filtered_df, col, n_per_group)
            display(sample_df)
            print(f"✅ Stratified sample created from '{col}' with {n_per_group} per group.")
            display(export_button)
        except Exception as e:
            print(f"❌ Stratified sampling failed: {e}")

strat_sample_button.on_click(perform_stratified_sample)

def display_stratified_ui():
    stratify_col_dropdown.options = list(filtered_df.columns)
    display(widgets.HTML("<h3>🔀 Stratified Sampling</h3>"))
    display(stratify_col_dropdown, samples_per_group, strat_sample_button, strat_sample_output)


In [48]:
# 🎲 Cell 4: Enhanced Sampling Method UI + Logic (Random + MUS)

sample_output = widgets.Output()

# 🧠 Audit-style sample sizing logic
def determine_sample_size(n):
    if n <= 50:
        return n  # Test all
    elif n <= 250:
        return 25
    elif n <= 500:
        return 40
    else:
        return 60

# UI Elements
sample_method = widgets.Dropdown(
    options=["Random", "Monetary Unit Sampling"],
    description="Method:"
)

# This gets updated after filtering
sample_count = widgets.BoundedIntText(
    value=5, min=1, max=100000, step=1,
    description="Sample Size:"
)

suggested_label = widgets.Label(value="Suggested sample: 5")
sample_button = widgets.Button(description="🎯 Select Sample")

def display_sampling_ui():
    suggested = determine_sample_size(len(filtered_df))
    sample_count.value = suggested
    sample_count.max = len(filtered_df)
    suggested_label.value = f"📊 Suggested sample size: {suggested} of {len(filtered_df)}"

    display(widgets.HTML("<h3>🎲 Sampling Options</h3>"))
    display(sample_method, sample_count, suggested_label, sample_button, sample_output)
    display_stratified_ui()  # 👈 Show stratified option below too

# 🎯 Sample selection logic (Random or MUS)
def perform_sampling(_):
    global sample_df, filtered_df
    method = sample_method.value
    size = sample_count.value

    with sample_output:
        clear_output()

        if filtered_df is None or len(filtered_df) == 0:
            print("⚠️ No data to sample from. Check your filters.")
            return
        if size > len(filtered_df):
            print(f"⚠️ Sample size ({size}) exceeds available rows ({len(filtered_df)}). Lower the sample size.")
            return

        try:
            if method == "Random":
                sample_df = filtered_df.sample(n=size)

            elif method == "Monetary Unit Sampling":
                # 🧠 Auto-detect monetary column
                numeric_cols = filtered_df.select_dtypes(include='number').columns.tolist()
                monetary_keywords = ['amount', 'value', 'total', 'cost', 'price', 'payment', 'invoice', 'fee']
                selected_col = None
                for col in numeric_cols:
                    col_lower = col.lower()
                    if any(keyword in col_lower for keyword in monetary_keywords):
                        selected_col = col
                        break

                if not selected_col:
                    print("❌ No monetary column detected for MUS. Please rename or add a column with amount values.")
                    return

                weights = filtered_df[selected_col]
                probs = weights / weights.sum()
                sample_df = filtered_df.sample(n=size, weights=probs)
                print(f"💰 MUS sampling used column: '{selected_col}'")

            display(sample_df)
            print(f"✅ Sample of {size} rows selected.")
            display(export_button)

        except Exception as e:
            print(f"❌ Sampling failed: {e}")

sample_button.on_click(perform_sampling)


In [49]:
# 🧱 Unified Logic Init Cell: Filters, Sampling, Stratified, UI Logic

filter_widgets = []
sample_output = widgets.Output()

# 🧼 Create dynamic filters
def create_filter_ui():
    clear_output(wait=True)
    display(widgets.HTML("<h3>🎛️ Create Filters</h3>"))

    global filter_widgets, population_cleaned
    filter_widgets.clear()

    if population_df is None:
        print("⚠️ No data loaded.")
        return

    df_cleaned = population_df.copy()

    for col in df_cleaned.select_dtypes(include='object').columns:
        df_cleaned[col] = df_cleaned[col].astype(str).str.strip().str.title()

    df_cleaned.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_cleaned.dropna(how='all', axis=1, inplace=True)
    df_cleaned.dropna(how='all', axis=0, inplace=True)

    population_cleaned = df_cleaned

    for col in df_cleaned.columns:
        try:
            dtype = df_cleaned[col].dtype

            if np.issubdtype(dtype, np.number):
                min_val, max_val = df_cleaned[col].min(), df_cleaned[col].max()
                step = (max_val - min_val) / 100 if (max_val - min_val) > 0 else 1
                widget = widgets.FloatRangeSlider(
                    value=[min_val, max_val], min=min_val, max=max_val,
                    step=step, description=str(col),
                    layout=widgets.Layout(width='95%')
                )
            elif np.issubdtype(dtype, np.datetime64):
                min_date, max_date = df_cleaned[col].min(), df_cleaned[col].max()
                widget = widgets.DateRangePicker(
                    value=(min_date, max_date), description=str(col)
                )
            else:
                unique_vals = df_cleaned[col].dropna().unique().tolist()[:20]
                widget = widgets.SelectMultiple(
                    options=unique_vals, description=str(col),
                    layout=widgets.Layout(width='95%', height='100px')
                )

            filter_widgets.append((col, widget))
            display(widget)

        except Exception as e:
            print(f"⚠️ Could not create filter for column '{col}': {e}")

    display(run_filter_button)

# ✅ Apply filters
run_filter_button = widgets.Button(description="✅ Apply Filters")

def apply_filters(_):
    global filtered_df
    df = population_cleaned.copy()

    print("🧪 Applying filters...")

    for col, widget in filter_widgets:
        if isinstance(widget, widgets.FloatRangeSlider):
            df = df[df[col].between(widget.value[0], widget.value[1])]
        elif isinstance(widget, widgets.SelectMultiple):
            print(f"🔍 {col}: filtering for {widget.value}")
            if widget.value:
                df = df[df[col].isin(widget.value)]

    filtered_df = df

    with output_area:
        clear_output()
        if len(filtered_df) == 0:
            print("⚠️ Filtered DataFrame is empty. Try relaxing your filters.")
        else:
            display(filtered_df.head())
            print(f"🔎 Filtered Rows: {len(filtered_df)}")
            display_sampling_ui()

run_filter_button.on_click(apply_filters)

# 🧠 Smart sample size logic
def determine_sample_size(n):
    if n <= 50: return n
    elif n <= 250: return 25
    elif n <= 500: return 40
    else: return 60

# 🎲 Sampling UI elements
sample_method = widgets.Dropdown(
    options=["Random", "Monetary Unit Sampling"],
    description="Method:"
)
sample_count = widgets.BoundedIntText(value=5, min=1, max=100000, step=1, description="Sample Size:")
suggested_label = widgets.Label(value="Suggested sample: 5")
sample_button = widgets.Button(description="🎯 Select Sample")

def display_stratified_ui():
    stratify_col_dropdown.options = list(filtered_df.columns)
    display(widgets.HTML("<h3>🔀 Stratified Sampling</h3>"))
    display(stratify_col_dropdown, samples_per_group, strat_sample_button, strat_sample_output)

def display_sampling_ui():
    suggested = determine_sample_size(len(filtered_df))
    sample_count.value = suggested
    sample_count.max = len(filtered_df)
    suggested_label.value = f"📊 Suggested sample size: {suggested} of {len(filtered_df)}"
    display(widgets.HTML("<h3>🎲 Sampling Options</h3>"))
    display(sample_method, sample_count, suggested_label, sample_button, sample_output)
    display_stratified_ui()

# 🎯 Sampling logic
def perform_sampling(_):
    global sample_df, filtered_df
    method = sample_method.value
    size = sample_count.value

    with sample_output:
        clear_output()
        if filtered_df is None or len(filtered_df) == 0:
            print("⚠️ No data to sample from. Check your filters.")
            return
        if size > len(filtered_df):
            print(f"⚠️ Sample size ({size}) exceeds available rows ({len(filtered_df)}). Lower the sample size.")
            return
        try:
            if method == "Random":
                sample_df = filtered_df.sample(n=size)
            elif method == "Monetary Unit Sampling":
                numeric_cols = filtered_df.select_dtypes(include='number').columns.tolist()
                monetary_keywords = ['amount', 'value', 'total', 'cost', 'price', 'payment', 'invoice', 'fee']
                selected_col = next((col for col in numeric_cols if any(k in col.lower() for k in monetary_keywords)), None)
                if not selected_col:
                    print("❌ No monetary column detected for MUS.")
                    return
                weights = filtered_df[selected_col]
                probs = weights / weights.sum()
                sample_df = filtered_df.sample(n=size, weights=probs)
                print(f"💰 MUS sampling used column: '{selected_col}'")
            display(sample_df)
            print(f"✅ Sample of {size} rows selected.")
            display(export_button)
        except Exception as e:
            print(f"❌ Sampling failed: {e}")

sample_button.on_click(perform_sampling)

# 🔀 Stratified logic
stratify_col_dropdown = widgets.Dropdown(options=[], description='Stratify by:', layout=widgets.Layout(width='50%'))
samples_per_group = widgets.BoundedIntText(value=5, min=1, max=1000, step=1, description='Per Group:', layout=widgets.Layout(width='30%'))
strat_sample_button = widgets.Button(description="🔀 Stratified Sample")
strat_sample_output = widgets.Output()

def stratified_sample(df, group_col, n_per_group):
    result = pd.DataFrame()
    for group in df[group_col].dropna().unique():
        group_df = df[df[group_col] == group]
        n = min(n_per_group, len(group_df))
        if n > 0:
            result = pd.concat([result, group_df.sample(n=n)])
    return result

def perform_stratified_sample(_):
    global sample_df
    col = stratify_col_dropdown.value
    n_per_group = samples_per_group.value
    with strat_sample_output:
        clear_output()
        if filtered_df is None or col is None:
            print("⚠️ No data or stratification column selected.")
            return
        try:
            sample_df = stratified_sample(filtered_df, col, n_per_group)
            display(sample_df)
            print(f"✅ Stratified sample created from '{col}' with {n_per_group} per group.")
            display(export_button)
        except Exception as e:
            print(f"❌ Stratified sampling failed: {e}")

strat_sample_button.on_click(perform_stratified_sample)


In [34]:
# 💾 Cell 5: Export sample
export_button = widgets.Button(description="💾 Export Sample to Excel")

def export_sample(_):
    if sample_df is not None:
        out_path = f"output/sample_{filename.split('.')[0]}.xlsx"
        sample_df.to_excel(out_path, index=False)
        with sample_output:
            print(f"📁 Sample exported: {out_path}")
    else:
        with sample_output:
            print("⚠️ No sample to export.")

export_button.on_click(export_sample)


In [None]:
# 📦 Cell 7: Export sample + audit log
export_button = widgets.Button(description="💾 Export Sample to Excel")

def export_sample(_):
    if sample_df is None or sample_df.empty:
        with sample_output:
            print("⚠️ No sample to export.")
        return

    try:
        log_info = {
            'Filename': filename,
            'Filtered Rows': len(filtered_df),
            'Population Rows': len(population_df),
            'Columns Filtered': [f"{col}: {widget.value}" for col, widget in filter_widgets]
        }

        # Export both sample and audit log
        with pd.ExcelWriter(f"output/sample_with_log_{filename.split('.')[0]}.xlsx") as writer:
            sample_df.to_excel(writer, sheet_name="Sample", index=False)
            pd.DataFrame.from_dict(log_info, orient='index').to_excel(writer, sheet_name="AuditLog")

        with sample_output:
            print("✅ Sample + Audit log exported.")

    except Exception as e:
        with sample_output:
            print(f"❌ Export failed: {e}")

export_button.on_click(export_sample)


In [None]:
# 📦 Cell 8: Reset filters
reset_button = widgets.Button(description="🧼 Reset Filters")

def reset_filters(_):
    global filtered_df, sample_df
    filtered_df = population_df.copy()
    sample_df = None
    with output_area:
        clear_output()
        print("🔁 Filters reset. Reloading full dataset.")
        display(population_df.head())
    create_filter_ui()

reset_button.on_click(reset_filters)
display(reset_button)
