# Basic Audit Sampling Tool

In [4]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from tkinter import Tk, filedialog

# Define widgets for user input
sampling_method_dropdown = widgets.Dropdown(
    options=['Simple Random Sampling', 'Stratified Sampling', 'Systematic Sampling'],
    value='Simple Random Sampling',
    description='Sampling Method:'
)

sample_size_slider = widgets.IntSlider(
    value=25,
    min=1,
    max=1000,  # Set a default max value, will be updated after file upload
    step=1,
    description='Sample Size:'
)

# File upload widget
def upload_file(_):
    root = Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(title='Select Population Data File')
    if file_path:
        global population_df
        population_df = pd.read_csv(file_path)
        sample_size_slider.max = len(population_df)  # Update the max value of the sample size slider
        print(f"Population data loaded from: {file_path}")
    else:
        print("File not selected or not found.")

upload_button = widgets.Button(description='Upload Population Data')
upload_button.on_click(upload_file)

# Save sample button
def save_sample(_):
    if 'sample_df' in globals():
        file_path = filedialog.asksaveasfilename(title='Save Sample File', defaultextension='.csv')
        if file_path:
            sample_df.to_csv(file_path, index=False)
            print(f"Sample saved to: {file_path}")
        else:
            print("Save operation cancelled.")
    else:
        print("No sample to save. Please generate a sample first.")

save_button = widgets.Button(description='Save Sample')
save_button.on_click(save_sample)

# Create a button to trigger sample selection
generate_sample_button = widgets.Button(description='Generate Sample')



def generate_sample(_):
    selected_method = sampling_method_dropdown.value
    selected_size = sample_size_slider.value
    
    # Initialize sample_indices
    sample_indices = None
    
    # Select sample based on chosen method
    if selected_method == 'Simple Random Sampling':
        sample_indices = np.random.choice(population_df.index, size=selected_size, replace=False)
    elif selected_method == 'Stratified Sampling':
        # Implement stratified sampling logic
        # Make sure to assign a value to sample_indices within this block
        pass
    elif selected_method == 'Systematic Sampling':
        # Implement systematic sampling logic
        pass
    else:
        print("Invalid sampling method selected.")
        return
    
    # Check if sample_indices has been set
    if sample_indices is not None:
        global sample_df
        sample_df = population_df.loc[sample_indices]
        print(f"Sample of {selected_size} records selected successfully.")
    else:
        print("Sampling method not implemented or sample_indices not set.")


# Attach the button click event
generate_sample_button.on_click(generate_sample)

# Display widgets
display(upload_button, sampling_method_dropdown, sample_size_slider, generate_sample_button, save_button)

# Note: The script assumes that 'population_df' is a global variable that will be set after file upload.
# Make sure to upload a file before generating a sample.


Button(description='Upload Population Data', style=ButtonStyle())

Dropdown(description='Sampling Method:', options=('Simple Random Sampling', 'Stratified Sampling', 'Systematic…

IntSlider(value=25, description='Sample Size:', max=1000, min=1)

Button(description='Generate Sample', style=ButtonStyle())

Button(description='Save Sample', style=ButtonStyle())

In [4]:
import pandas as pd
import numpy as np
import io
from IPython.display import display
import ipywidgets as widgets

# Define widgets for user input
sampling_method_dropdown = widgets.Dropdown(
    options=['Simple Random Sampling', 'Stratified Sampling', 'Systematic Sampling'],
    value='Simple Random Sampling',
    description='Sampling Method:',
    disabled=False,
)

sample_size_slider = widgets.IntSlider(
    value=25,
    min=1,
    max=1000,  # Initially disabled, enabled upon successful file upload
    step=1,
    description='Sample Size:',
    disabled=True
)

file_upload = widgets.FileUpload(
    accept='.csv',
    multiple=False,
    description='Upload Population Data'
)

def handle_file_upload(change):
    global population_df
    try:
        # Ensure we are accessing the file content correctly
        uploaded_files = file_upload.value
        if uploaded_files:
            # Assuming the first file in the uploaded file list
            uploaded_file = next(iter(uploaded_files.values()))
            content = uploaded_file['content']
            population_df = pd.read_csv(io.BytesIO(content))
            sample_size_slider.max = len(population_df)
            sample_size_slider.disabled = False
            print("Population data loaded successfully.")
        else:
            print("No file uploaded.")
    except Exception as e:
        print("Failed to load data:", e)

file_upload.observe(handle_file_upload, names='value')

generate_sample_button = widgets.Button(description='Generate Sample')

def generate_sample(_):
    selected_method = sampling_method_dropdown.value
    selected_size = sample_size_slider.value
    sample_indices = None

    if selected_method == 'Simple Random Sampling':
        sample_indices = np.random.choice(population_df.index, size=selected_size, replace=False)
    elif selected_method == 'Stratified Sampling':
        print("Stratified Sampling not yet implemented.")
    elif selected_method == 'Systematic Sampling':
        print("Systematic Sampling not yet implemented.")
    else:
        print("Invalid sampling method selected.")
        return

    if sample_indices is not None:
        global sample_df
        sample_df = population_df.loc[sample_indices]
        print(f"Sample of {selected_size} records selected successfully.")

generate_sample_button.on_click(generate_sample)

save_button = widgets.Button(description='Save Sample')

def save_sample(_):
    if 'sample_df' in globals():
        sample_df.to_csv('sample.csv', index=False)
        print("Sample saved to 'sample.csv'.")
    else:
        print("No sample to save. Please generate a sample first.")

save_button.on_click(save_sample)

# Display widgets
display(file_upload, sampling_method_dropdown, sample_size_slider, generate_sample_button, save_button)


FileUpload(value={}, accept='.csv', description='Upload Population Data')

Dropdown(description='Sampling Method:', options=('Simple Random Sampling', 'Stratified Sampling', 'Systematic…

IntSlider(value=25, description='Sample Size:', disabled=True, max=1000, min=1)

Button(description='Generate Sample', style=ButtonStyle())

Button(description='Save Sample', style=ButtonStyle())

# Audit Sampling Tool Based on Risk

In [None]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from tkinter import Tk, filedialog

# Define a style for the descriptions to ensure they don't get cut off
style = {'description_width': 'initial'}

# Function to upload the population data file
def upload_file():
    root = Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(title='Select Population Data File')
    if file_path:
        population_df = pd.read_csv(file_path)
        print(f"Population data loaded from: {file_path}")
        return population_df
    else:
        print("File not selected or not found.")
        return None

# Widget to upload population data
upload_button = widgets.Button(description='Upload Population Data', style=style)
upload_output = widgets.Output()

def on_upload_clicked(b):
    with upload_output:
        clear_output()
        global population_df
        population_df = upload_file()
        if population_df is not None:
            sample_size_slider.max = len(population_df)  # Update the max value of the sample size slider
upload_button.on_click(on_upload_clicked)

# Widget to input total sample size
total_sample_size_widget = widgets.IntSlider(
    value=100,
    min=1,
    max=1000,  # Set a default max value, will be updated after file upload
    step=1,
    description='Total Sample Size:',
    style=style
)

# Function to calculate recommended sample sizes using Neyman allocation
def calculate_sample_sizes(b):
    with sample_output:
        clear_output()
        if 'population_df' in globals():
            # Assume 'stratum' is the column defining each stratum
            # Also assume 'value' is the column you want to analyze
            stratum_stats = population_df.groupby('stratum')['value'].agg(['size', 'std']).rename(columns={'size': 'N', 'std': 'sigma'})
            N = stratum_stats['N'].sum()
            sum_product_N_sigma = (stratum_stats['N'] * stratum_stats['sigma']).sum()
            total_sample_size = total_sample_size_widget.value
            stratum_stats['recommended_n'] = total_sample_size * (stratum_stats['N'] * stratum_stats['sigma']) / sum_product_N_sigma
            print(stratum_stats['recommended_n'])
        else:
            print("Please upload population data first.")

calculate_button = widgets.Button(description='Calculate Sample Sizes', style=style)
sample_output = widgets.Output()
calculate_button.on_click(calculate_sample_sizes)

# Sampling method dropdown
sampling_method_dropdown = widgets.Dropdown(
    options=['Simple Random Sampling', 'Stratified Sampling', 'Systematic Sampling'],
    value='Simple Random Sampling',
    description='Sampling Method:',
    style=style
)

# Sample size slider
sample_size_slider = widgets.IntSlider(
    value=100,
    min=1,
    max=1000,  # This will be updated based on the population data
    step=1,
    description='Sample Size:',
    style=style
)

# Generate sample button
generate_sample_button = widgets.Button(description='Generate Sample', style=style)
sample_generation_output = widgets.Output()

def on_generate_sample_clicked(b):
    with sample_generation_output:
        clear_output()
        if 'population_df' in globals():
            selected_method = sampling_method_dropdown.value
            selected_size = sample_size_slider.value
            # Implement the sampling logic here based on the selected method
            # For example, for Simple Random Sampling:
            if selected_method == 'Simple Random Sampling':
                sample_indices = np.random.choice(population_df.index, size=selected_size, replace=False)
                global sample_df
                sample_df = population_df.loc[sample_indices]
                print(f"Sample of {selected_size} records selected successfully.")
            # Add logic for other sampling methods
        else:
            print("Please upload population data first.")

generate_sample_button.on_click(on_generate_sample_clicked)

# Save sample button
save_button = widgets.Button(description='Save Sample', style=style)
save_output = widgets.Output()

def on_save_clicked(b):
    with save_output:
        clear_output()
        if 'sample_df' in globals():
            root = Tk()
            root.withdraw()  # Hide the main window
            file_path = filedialog.asksaveasfilename(title='Save Sample File', defaultextension='.csv')
            if file_path:
                sample_df.to_csv(file_path, index=False)
                print(f"Sample saved to: {file_path}")
            else:
                print("Save operation cancelled.")
        else:
            print("No sample to save. Please generate a sample first.")

save_button.on_click(on_save_clicked)

# Display all widgets
display(upload_button, upload_output)
display(total_sample_size_widget, calculate_button, sample_output)
display(sampling_method_dropdown, sample_size_slider)
display(generate_sample_button, sample_generation_output)
display(save_button, save_output)
