In [None]:
import numpy as np
from scipy.stats import t
import pandas as pd

# Given data
data = {
    "Comparison Cohort Scanned": {"mean": 0.062225, "std": 0.180819662, "n": 60},
    "Comparison Cohort Not Scanned": {"mean": 0.061967976, "std": 0.020532609, "n": 840},
    "Intervention Cohort Scanned": {"mean": 0.0639587174348698, "std": 0.0197143338460697, "n": 499},
    "Intervention Cohort Not Scanned": {"mean": 0.0607112219451371, "std": 0.0139410167761583, "n": 401},
}

def two_sample_t_test(mean1, std1, n1, mean2, std2, n2):
    """
    Perform a two-sample t-test and return the t-statistic, degrees of freedom, and p-value.

    Parameters:
    mean1, std1, n1: Mean, standard deviation, and sample size of the first sample
    mean2, std2, n2: Mean, standard deviation, and sample size of the second sample

    Returns:
    t_stat: t-statistic
    df: Degrees of freedom
    p_value: p-value
    """
    pooled_std = np.sqrt((std1**2 / n1) + (std2**2 / n2))
    t_stat = (mean1 - mean2) / pooled_std

    df_num = ((std1**2 / n1) + (std2**2 / n2))**2
    df_denom = ((std1**2 / n1)**2 / (n1 - 1)) + ((std2**2 / n2)**2 / (n2 - 1))
    df = df_num / df_denom

    p_value = 2 * (1 - t.cdf(abs(t_stat), df))

    return t_stat, df, p_value

# Define the pairs to compare
comparisons = [
    ("Comparison Cohort Scanned", "Intervention Cohort Scanned"),
    ("Comparison Cohort Not Scanned", "Intervention Cohort Not Scanned"),
    ("Comparison Cohort Scanned", "Comparison Cohort Not Scanned"),
    ("Intervention Cohort Scanned", "Intervention Cohort Not Scanned")
]

# Perform the t-tests and store the results
results = {}
for comp1, comp2 in comparisons:
    key = f"{comp1} vs {comp2}"
    results[key] = two_sample_t_test(
        data[comp1]["mean"], data[comp1]["std"], data[comp1]["n"],
        data[comp2]["mean"], data[comp2]["std"], data[comp2]["n"]
    )

# Creating a DataFrame to display the results
results_df = pd.DataFrame({
    "Mean 1": [round(data[comp1]["mean"], 3) for comp1, comp2 in comparisons],
    "STD 1": [round(data[comp1]["std"], 3) for comp1, comp2 in comparisons],
    "N 1": [data[comp1]["n"] for comp1, comp2 in comparisons],
    "Mean 2": [round(data[comp2]["mean"], 3) for comp1, comp2 in comparisons],
    "STD 2": [round(data[comp2]["std"], 3) for comp1, comp2 in comparisons],
    "N 2": [data[comp2]["n"] for comp1, comp2 in comparisons],
    "t-statistic": [round(results[f"{comp1} vs {comp2}"][0], 3) for comp1, comp2 in comparisons],
    "df": [round(results[f"{comp1} vs {comp2}"][1], 1) for comp1, comp2 in comparisons],
    "p-value": [round(results[f"{comp1} vs {comp2}"][2], 3) for comp1, comp2 in comparisons]
}, index=[f"{comp1} vs {comp2}" for comp1, comp2 in comparisons])

# Display the results DataFrame
results_df

Unnamed: 0,Mean 1,STD 1,N 1,Mean 2,STD 2,N 2,t-statistic,df,p-value
Comparison Cohort Scanned vs Intervention Cohort Scanned,0.062,0.181,60,0.064,0.02,499,-0.074,59.2,0.941
Comparison Cohort Not Scanned vs Intervention Cohort Not Scanned,0.062,0.021,840,0.061,0.014,401,1.265,1096.7,0.206
Comparison Cohort Scanned vs Comparison Cohort Not Scanned,0.062,0.181,60,0.062,0.021,840,0.011,59.1,0.991
Intervention Cohort Scanned vs Intervention Cohort Not Scanned,0.064,0.02,499,0.061,0.014,401,2.889,884.3,0.004


In [5]:
# Scan Rate
population_size = 900
scans_comparison = 60
scans_intervention = 416

# Contingency table for Scan Rate
contingency_scan_rate = [
    [scans_comparison, population_size - scans_comparison],
    [scans_intervention, population_size - scans_intervention]
]

# Chi-square test for Scan Rate
chi2_scan_rate, p_scan_rate, dof_scan_rate, ex_scan_rate = stats.chi2_contingency(contingency_scan_rate)


# Scanned Positivity Rate
scanned_positives_comparison = 8
scanned_positives_intervention = 27

# Contingency table for Scanned Positivity Rate
contingency_scanned_positivity_rate = [
    [scanned_positives_comparison, scans_comparison - scanned_positives_comparison],
    [scanned_positives_intervention, scans_intervention - scanned_positives_intervention]
]

# Chi-square test for Scanned Positivity Rate
chi2_scanned_positivity_rate, p_scanned_positivity_rate, dof_scanned_positivity_rate, ex_scanned_positivity_rate = stats.chi2_contingency(contingency_scanned_positivity_rate)



# Mortality Rate (% of Total Population)
deaths_comparison = 4.9
deaths_intervention = 3.1

# Contingency table for Mortality Rate (% of Total Population)
contingency_mortality_population = [
    [deaths_comparison, population_size - deaths_comparison],
    [deaths_intervention, population_size - deaths_intervention]
]

# Chi-square test for Mortality Rate (% of Total Population)
chi2_mortality_population, p_mortality_population, dof_mortality_population, ex_mortality_population = stats.chi2_contingency(contingency_mortality_population)



# Mortality Rate (% of Total Positives)
total_positives = 58.4

# Survivors
survivors_comparison = total_positives - deaths_comparison
survivors_intervention = total_positives - deaths_intervention

# Contingency table for Mortality Rate (% of Total Positives)
contingency_mortality_positives = [
    [deaths_comparison, survivors_comparison],
    [deaths_intervention, survivors_intervention]
]

# Chi-square test for Mortality Rate (% of Total Positives)
chi2_mortality_positives, p_mortality_positives, dof_mortality_positives, ex_mortality_positives = stats.chi2_contingency(contingency_mortality_positives)

In [9]:
p_mortality_positives

0.7694786517266778