In [1]:
import pandas as pd 
import numpy as np
from scipy.stats import stats, trim_mean, ttest_ind, shapiro

In [2]:
import os

In [3]:
pip install Pyreadstat

Note: you may need to restart the kernel to use updated packages.


In [4]:
os.chdir("C:/Users/Eleonora/Desktop/")

In [5]:
# Read data
df = pd.read_spss("C:/Users/Eleonora/Desktop/Python.sav")

In [6]:
# Define groups
group1 = df[df['ДемКан'] == 'Деменција']['Процесирање']
group2 = df[df['ДемКан'] == 'Канцер']['Процесирање']
t_stat, p_value = ttest_ind(group1, group2)
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: 0.5062166056726686, P-value: 0.6136490736596483


In [7]:
# Analysis
def yuenbt_with_stats(group1, group2, trim_frac, n_boot=1000, alpha=0.05):
    #  Calculate trimmed means for the original data
    trimmed_mean1 = trim_mean(group1, proportiontocut=trim_frac)
    trimmed_mean2 = trim_mean(group2, proportiontocut=trim_frac)
    observed_diff = trimmed_mean1 - trimmed_mean2

    #  Bootstrap resampling to estimate trimmed mean differences 
    boot_diffs = []
    for _ in range(n_boot):
        boot_group1 = np.random.choice(group1, size=len(group1), replace=True)
        boot_group2 = np.random.choice(group2, size=len(group2), replace=True)

        boot_trimmed_mean1 = trim_mean(boot_group1, proportiontocut=trim_frac)
        boot_trimmed_mean2 = trim_mean(boot_group2, proportiontocut=trim_frac)
        
        boot_diffs.append(boot_trimmed_mean1 - boot_trimmed_mean2)
    
    boot_diffs = np.array(boot_diffs)
    lower_ci = np.percentile(boot_diffs, 100 * (alpha / 2))
    upper_ci = np.percentile(boot_diffs, 100 * (1 - alpha / 2))
    
    # Calculate p-value based on bootstrap distribution
    p_value_boot = (np.sum(boot_diffs >= np.abs(observed_diff)) +
                    np.sum(boot_diffs <= -np.abs(observed_diff))) / n_boot

    #  Calculate the bootstrapped test statistic
    boot_se = np.std(boot_diffs, ddof=1)  # Bootstrapped standard error
    boot_t_stat = observed_diff / boot_se  # Test statistic (similar to Yuen's t)

    # Calculate Cohen's d
    pooled_sd = np.sqrt((np.var(group1, ddof=1) + np.var(group2, ddof=1)) / 2)  # Pooled SD
    cohens_d = observed_diff / pooled_sd  # Effect size
    
    
    # Results
    return {
        "Trimmed Mean 1":trimmed_mean1,
        "Trimmed Mean 2":trimmed_mean2,
        "Trimmed Mean Difference": observed_diff,
        "Bootstrap Confidence Interval": (lower_ci, upper_ci),
        "P-value (Bootstrap)": p_value_boot,
        "Test Statistic (Bootstrap)": boot_t_stat,
        "Bootstrap Standard Error": boot_se,
        "Cohen's d": cohens_d  
    }

# Yuen's bootstrapped test with 10% trimming
trim_frac = 0.1
results = yuenbt_with_stats(group1, group2, trim_frac)

# Output 
for key, value in results.items():
    print(f"{key}: {value}")

Trimmed Mean 1: 18.9375
Trimmed Mean 2: 18.645833333333332
Trimmed Mean Difference: 0.29166666666666785
Bootstrap Confidence Interval: (-2.1885416666666666, 3.0416666666666643)
P-value (Bootstrap): 0.834
Test Statistic (Bootstrap): 0.22674962877384314
Bootstrap Standard Error: 1.2862939103533089
Cohen's d: 0.04621104231843666


In [8]:
# Define groups
group1 = df[df['ДемКан'] == 'Деменција']['Експресија']
group2 = df[df['ДемКан'] == 'Канцер']['Експресија']
t_stat, p_value = ttest_ind(group1, group2)
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: 1.3435331966393653, P-value: 0.18167737227622338


In [9]:
# Analysis
def yuenbt_with_stats(group1, group2, trim_frac, n_boot=1000, alpha=0.05):
    #  Calculate trimmed means for the original data
    trimmed_mean1 = trim_mean(group1, proportiontocut=trim_frac)
    trimmed_mean2 = trim_mean(group2, proportiontocut=trim_frac)
    observed_diff = trimmed_mean1 - trimmed_mean2

    #  Bootstrap resampling to estimate trimmed mean differences 
    boot_diffs = []
    for _ in range(n_boot):
        boot_group1 = np.random.choice(group1, size=len(group1), replace=True)
        boot_group2 = np.random.choice(group2, size=len(group2), replace=True)

        boot_trimmed_mean1 = trim_mean(boot_group1, proportiontocut=trim_frac)
        boot_trimmed_mean2 = trim_mean(boot_group2, proportiontocut=trim_frac)
        
        boot_diffs.append(boot_trimmed_mean1 - boot_trimmed_mean2)
    
    boot_diffs = np.array(boot_diffs)
    lower_ci = np.percentile(boot_diffs, 100 * (alpha / 2))
    upper_ci = np.percentile(boot_diffs, 100 * (1 - alpha / 2))
    
    # Calculate p-value based on bootstrap distribution
    p_value_boot = (np.sum(boot_diffs >= np.abs(observed_diff)) +
                    np.sum(boot_diffs <= -np.abs(observed_diff))) / n_boot

    #  Calculate the bootstrapped test statistic
    boot_se = np.std(boot_diffs, ddof=1)  # Bootstrapped standard error
    boot_t_stat = observed_diff / boot_se  # Test statistic (similar to Yuen's t)

    # Calculate Cohen's d
    pooled_sd = np.sqrt((np.var(group1, ddof=1) + np.var(group2, ddof=1)) / 2)  # Pooled SD
    cohens_d = observed_diff / pooled_sd  # Effect size
    
    
    # Results
    return {
        "Trimmed Mean 1":trimmed_mean1,
        "Trimmed Mean 2":trimmed_mean2,
        "Trimmed Mean Difference": observed_diff,
        "Bootstrap Confidence Interval": (lower_ci, upper_ci),
        "P-value (Bootstrap)": p_value_boot,
        "Test Statistic (Bootstrap)": boot_t_stat,
        "Bootstrap Standard Error": boot_se,
        "Cohen's d": cohens_d  
    }

# Yuen's bootstrapped test with 10% trimming
trim_frac = 0.1
results = yuenbt_with_stats(group1, group2, trim_frac)

# Output 
for key, value in results.items():
    print(f"{key}: {value}")

Trimmed Mean 1: 18.75
Trimmed Mean 2: 17.541666666666668
Trimmed Mean Difference: 1.2083333333333321
Bootstrap Confidence Interval: (-1.0833333333333321, 3.458333333333332)
P-value (Bootstrap): 0.504
Test Statistic (Bootstrap): 1.0306898533158173
Bootstrap Standard Error: 1.1723539621992207
Cohen's d: 0.20678895075712891
