In [1]:
import tkinter as tk
from tkinter import ttk
import math
import statsmodels.stats.power as smp
import matplotlib.pyplot as plt  # Import Matplotlib for plotting
import numpy as np


print("=" * 80)
print("SAMPLE SIZE CALCULATION USING ESTIMATES".center(80))
print("=" * 80)
print("\n")

print("PURPOSE:")
print("=" * 80)
print("~ Calculate the required sample size for statistical tests to achieve\n"
      "~ significance at specified confidence levels and margins of error.\n"
      "~ Supports both continuous and discrete data with applicable statistical test methods.")
print("\n")

print("INPUTS:")
print("=" * 80)
print("~ Confidence Level: The likelihood, expressed as a percentage (e.g., 95%, 99%), that\n"
      "  the true parameter lies within the specified range. This level affects the width\n"
      "  of the confidence interval and the stringency of the hypothesis testing.\n"
      "~ Margin of Error: The desired level of precision around the estimated parameter, often\n"
      "  expressed as a percentage. This defines the range within which the true value is\n"
      "  expected to fall with the specified confidence level.\n"
      "~ Data Type: Indicates whether the data is 'Continuous' or 'Discrete'. This choice\n"
      "  determines the appropriate statistical methods and formulas used in the calculation.\n"
      "~ Value: Required statistical input that varies by data type. For 'Continuous' data,\n"
      "  it is the population standard deviation, which measures the variability or spread\n"
      "  of data points. For 'Discrete' data, it refers to the expected proportion of the\n"
      "  outcome of interest within the population.\n"
      "~ Test Type: Specifies the hypothesis test orientation. 'One-sided' tests hypothesize\n"
      "  a specific direction of effect (either greater or less than), while 'Two-sided'\n"
      "  tests assess for any significant difference in either direction. This input\n"
      "  significantly impacts the calculation and interpretation of results, particularly\n"
      "  for discrete data.")

print("\n")

print("OUTPUTS:")
print("=" * 80)
print("~ Sample Size: Number of observations required to meet statistical criteria.\n"
      "~ Method Description: Explanation of the statistical method and assumptions used.")

print("\n")
print("OPERATIONAL FLOW:")
print("=" * 80)
print("1. User inputs confidence level, margin of error, data type, value, and test type.\n"
      "2. Script calculates the sample size using z-score for normal distributions or\n"
      "   normal approximation for binomial distributions.\n"
      "3. Generates a detailed report and optional visual plot of proportion vs.\n"
      "   sample size for discrete data.")
print("=" * 80)
print("\n\n\n")

def calculate_sample_size(confidence_level, margin_of_error, data_type, value, test_type):
    z_score_two_sided = {
        "80%": 1.2816,
        "85%": 1.4408,
        "90%": 1.6449,
        "95%": 1.9600,
        "98%": 2.3263,
        "99%": 2.5758
    }
    z_score_one_sided = {
        "80%": 0.8416,
        "85%": 1.0364,
        "90%": 1.2816,
        "95%": 1.6449,
        "98%": 2.0537,
        "99%": 2.3263
    }

    z = z_score_two_sided.get(confidence_level)
    if z is None:
        print(f"Confidence level '{confidence_level}' not supported.")
        return None, None  # Return None for both values

    if data_type == 'Continuous':
        sample_size = ((z ** 2 * value ** 2) / margin_of_error ** 2)
        method_description = "Using normal distribution and known standard deviation."
    elif data_type == 'Discrete':
        if test_type == 'One-sided':
            z = z_score_one_sided[confidence_level]  # One-sided Z-score
        # Calculation for proportion estimation
        sample_size = (z**2 * value * (1 - value)) / (margin_of_error**2)
        method_description = f"Using normal approximation to binomial distribution for {'One-sided' if test_type == 'One-sided' else 'Two-sided'} test."
    else:
        print("Invalid data type. Supported values are 'Continuous' and 'Discrete'.")
        return None, None

    return math.ceil(sample_size), method_description



def generate_report(confidence_level, margin_of_error, data_type, value, test_type, sample_size, method_description):
    if sample_size is None or method_description is None:
        print("Error occurred during sample size calculation.")
        return

    report = (
        "ENTERED PARAMETERS:\n" +
        "=" * 80 + "\n" +
        f"Confidence Level: {confidence_level}\n" +
        f"Margin of Error: {margin_of_error}\n" +
        f"Data Type: {data_type}\n" +
        (f"Population Standard Deviation: {value}\n" if data_type == 'Continuous' else f"Expected Proportion: {value}\n") +
        f"Test Type: {test_type}\n" +
        "\nRESULTS\n" +
        "=" * 80 + "\n" +
        f"Sample Size: {sample_size}\n" +
        "Calculation Method Used:\n" +
        f"{method_description}\n" +
        "=" * 80 + "\n"
    )



    print("=" * 80)
    print("SAMPLE SIZE CALCULATION REPORT".center(80))
    print("=" * 80)
    print("\n")
    print(report)

def update_fields_state(event=None):
    data_type = data_combobox.get()
    if data_type == 'Continuous':
        std_dev_entry.config(state='normal')
        proportion_entry.config(state='disabled')
    elif data_type == 'Discrete':
        std_dev_entry.config(state='disabled')
        proportion_entry.config(state='normal')
    else:
        std_dev_entry.config(state='disabled')
        proportion_entry.config(state='disabled')

def calculate_button_clicked():
    try:
        conf_level = confidence_combobox.get()
        margin_err = float(margin_entry.get())
        data_type = data_combobox.get()
        if data_type == 'Continuous':
            value = float(std_dev_entry.get())
        elif data_type == 'Discrete':
            value = float(proportion_entry.get())
            if not (0 <= value <= 1):
                raise ValueError("Expected Proportion must be between 0 and 1.")
        else:
            raise ValueError("Invalid data type. Supported values are 'Continuous' and 'Discrete'.")

        test_type = test_combobox.get()
        sample_size, method_description = calculate_sample_size(conf_level, margin_err, data_type, value, test_type)
        generate_report(conf_level, margin_err, data_type, value, test_type, sample_size, method_description)
        result_label.config(text="Report printed in terminal!")
    except ValueError as e:
        result_label.config(text=f"Error: {e}")

# Set up the main window
root = tk.Tk()
root.title("Sample Size Calculator")

# Set up and place GUI components
ttk.Label(root, text="Confidence Level:").pack()
confidence_combobox = ttk.Combobox(root, values=["80%", "85%", "90%", "95%", "98%", "99%"])
confidence_combobox.pack()
confidence_combobox.set("95%")  # Default value

ttk.Label(root, text="Margin of Error:").pack()
margin_entry = ttk.Entry(root)
margin_entry.pack()
margin_entry.insert(0, "0.05")  # Default value

ttk.Label(root, text="Data Type:").pack()
data_combobox = ttk.Combobox(root, values=["", "Continuous", "Discrete"])
data_combobox.pack()
data_combobox.set("")  # Default value
data_combobox.bind("<<ComboboxSelected>>", update_fields_state)

ttk.Label(root, text="Population Standard Deviation (for Continuous):").pack()
std_dev_entry = ttk.Entry(root, state='disabled')
std_dev_entry.pack()

ttk.Label(root, text="Expected Proportion (for Discrete):").pack()
proportion_entry = ttk.Entry(root, state='disabled')
proportion_entry.pack()

ttk.Label(root, text="Test Type:").pack()
test_combobox = ttk.Combobox(root, values=["One-sided", "Two-sided"])
test_combobox.pack()
test_combobox.set("One-sided")  # Default value

calculate_button = ttk.Button(root, text="Calculate Sample Size", command=calculate_button_clicked)
calculate_button.pack()

result_label = ttk.Label(root, text="")
result_label.pack()

root.mainloop()


                    SAMPLE SIZE CALCULATION USING ESTIMATES                     


PURPOSE:
~ Calculate the required sample size for statistical tests to achieve
~ significance at specified confidence levels and margins of error.
~ Supports both continuous and discrete data with applicable statistical test methods.


INPUTS:
~ Confidence Level: The likelihood, expressed as a percentage (e.g., 95%, 99%), that
  the true parameter lies within the specified range. This level affects the width
  of the confidence interval and the stringency of the hypothesis testing.
~ Margin of Error: The desired level of precision around the estimated parameter, often
  expressed as a percentage. This defines the range within which the true value is
  expected to fall with the specified confidence level.
~ Data Type: Indicates whether the data is 'Continuous' or 'Discrete'. This choice
  determines the appropriate statistical methods and formulas used in the calculation.
~ Value: Required statistical inp