In [2]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the CSV data
def analyze_sleep_data(file_path='filleddata.csv'):
    # Read the data
    data = pd.read_csv(file_path)
    
    # Group data by intervention
    interventions = ['Baseline', 'Avoid Screen Time', 'Breathing']
    grouped_data = {}
    for intervention in interventions:
        grouped_data[intervention] = data[data['Intervention'] == intervention]
    
    # Create a dataframe for descriptive statistics of restless scores
    restless_stats = pd.DataFrame(index=interventions, 
                                 columns=['n', 'Mean', 'Median', 'Min', 'Max', 'SD'])
    
    # Calculate descriptive statistics for restless scores
    for intervention in interventions:
        restless_scores = grouped_data[intervention]['manual restless score'].dropna()
        
        restless_stats.loc[intervention, 'n'] = len(restless_scores)
        restless_stats.loc[intervention, 'Mean'] = round(restless_scores.mean(), 2)
        restless_stats.loc[intervention, 'Median'] = round(restless_scores.median(), 2)
        restless_stats.loc[intervention, 'Min'] = round(restless_scores.min(), 2)
        restless_stats.loc[intervention, 'Max'] = round(restless_scores.max(), 2)
        restless_stats.loc[intervention, 'SD'] = round(restless_scores.std(), 2)
    
    # Calculate descriptive statistics for sleep duration (in hours)
    sleep_duration_stats = pd.DataFrame(index=interventions, 
                                      columns=['n', 'Mean (hrs)', 'Median (hrs)', 'Min (hrs)', 'Max (hrs)', 'SD (hrs)'])
    
    for intervention in interventions:
        sleep_duration = grouped_data[intervention]['total sleep duration'].dropna() / 60  # Convert to hours
        
        sleep_duration_stats.loc[intervention, 'n'] = len(sleep_duration)
        sleep_duration_stats.loc[intervention, 'Mean (hrs)'] = round(sleep_duration.mean(), 2)
        sleep_duration_stats.loc[intervention, 'Median (hrs)'] = round(sleep_duration.median(), 2)
        sleep_duration_stats.loc[intervention, 'Min (hrs)'] = round(sleep_duration.min(), 2)
        sleep_duration_stats.loc[intervention, 'Max (hrs)'] = round(sleep_duration.max(), 2)
        sleep_duration_stats.loc[intervention, 'SD (hrs)'] = round(sleep_duration.std(), 2)
    
    # Calculate adherence statistics
    adherence_stats = pd.DataFrame(index=interventions, columns=['Total Days', 'Adherent Days', 'Adherence Rate (%)'])
    
    for intervention in interventions:
        total_days = len(grouped_data[intervention])
        adherent_days = grouped_data[intervention]['start & end controlled?'].sum()
        adherence_rate = (adherent_days / total_days) * 100 if total_days > 0 else 0
        
        adherence_stats.loc[intervention, 'Total Days'] = total_days
        adherence_stats.loc[intervention, 'Adherent Days'] = adherent_days
        adherence_stats.loc[intervention, 'Adherence Rate (%)'] = round(adherence_rate, 1)
    
    # Calculate statistical significance (t-tests)
    baseline_scores = grouped_data['Baseline']['manual restless score'].dropna()
    screen_time_scores = grouped_data['Avoid Screen Time']['manual restless score'].dropna()
    breathing_scores = grouped_data['Breathing']['manual restless score'].dropna()
    
    # Perform t-tests
    baseline_vs_screen = stats.ttest_ind(baseline_scores, screen_time_scores, equal_var=False)
    baseline_vs_breathing = stats.ttest_ind(baseline_scores, breathing_scores, equal_var=False)
    screen_vs_breathing = stats.ttest_ind(screen_time_scores, breathing_scores, equal_var=False)
    
    # Calculate Cohen's d effect sizes
    def cohen_d(x, y):
        nx, ny = len(x), len(y)
        dof = nx + ny - 2
        return (x.mean() - y.mean()) / np.sqrt(((nx-1)*x.std()**2 + (ny-1)*y.std()**2) / dof)
    
    effect_baseline_screen = cohen_d(baseline_scores, screen_time_scores)
    effect_baseline_breathing = cohen_d(baseline_scores, breathing_scores)
    effect_screen_breathing = cohen_d(screen_time_scores, breathing_scores)
    
    # Compile statistical test results
    statistical_tests = pd.DataFrame(
        index=['Baseline vs. Screen Time', 'Baseline vs. Breathing', 'Screen Time vs. Breathing'],
        columns=['t-statistic', 'p-value', "Cohen's d", 'Interpretation']
    )
    
    statistical_tests.loc['Baseline vs. Screen Time', 't-statistic'] = round(baseline_vs_screen.statistic, 2)
    statistical_tests.loc['Baseline vs. Screen Time', 'p-value'] = round(baseline_vs_screen.pvalue, 3)
    statistical_tests.loc['Baseline vs. Screen Time', "Cohen's d"] = round(effect_baseline_screen, 2)
    
    statistical_tests.loc['Baseline vs. Breathing', 't-statistic'] = round(baseline_vs_breathing.statistic, 2)
    statistical_tests.loc['Baseline vs. Breathing', 'p-value'] = round(baseline_vs_breathing.pvalue, 3)
    statistical_tests.loc['Baseline vs. Breathing', "Cohen's d"] = round(effect_baseline_breathing, 2)
    
    statistical_tests.loc['Screen Time vs. Breathing', 't-statistic'] = round(screen_vs_breathing.statistic, 2)
    statistical_tests.loc['Screen Time vs. Breathing', 'p-value'] = round(screen_vs_breathing.pvalue, 3)
    statistical_tests.loc['Screen Time vs. Breathing', "Cohen's d"] = round(effect_screen_breathing, 2)
    
    # Add interpretation based on p-value and effect size
    for idx in statistical_tests.index:
        p_val = statistical_tests.loc[idx, 'p-value']
        d_val = abs(statistical_tests.loc[idx, "Cohen's d"])
        
        if p_val < 0.05:
            sig_text = "Significant"
        else:
            sig_text = "Not significant"
            
        if d_val < 0.2:
            effect_text = "Negligible effect"
        elif d_val < 0.5:
            effect_text = "Small effect"
        elif d_val < 0.8:
            effect_text = "Medium effect"
        else:
            effect_text = "Large effect"
            
        statistical_tests.loc[idx, 'Interpretation'] = f"{sig_text}, {effect_text}"
    
    # Print formatted tables for the presentation
    print("\n===== DESCRIPTIVE STATISTICS FOR RESTLESS SCORES =====")
    print(restless_stats.to_string())
    
    print("\n===== DESCRIPTIVE STATISTICS FOR SLEEP DURATION =====")
    print(sleep_duration_stats.to_string())
    
    print("\n===== ADHERENCE STATISTICS =====")
    print(adherence_stats.to_string())
    
    print("\n===== STATISTICAL TESTS =====")
    print(statistical_tests.to_string())
    
    # Return all the statistics for further use
    return {
        'restless_stats': restless_stats,
        'sleep_duration_stats': sleep_duration_stats,
        'adherence_stats': adherence_stats,
        'statistical_tests': statistical_tests
    }

# Run the analysis
if __name__ == "__main__":
    results = analyze_sleep_data()
    
    # Optionally, save results to Excel for presentation use
    with pd.ExcelWriter('sleep_study_results.xlsx') as writer:
        results['restless_stats'].to_excel(writer, sheet_name='Restless Scores')
        results['sleep_duration_stats'].to_excel(writer, sheet_name='Sleep Duration')
        results['adherence_stats'].to_excel(writer, sheet_name='Adherence')
        results['statistical_tests'].to_excel(writer, sheet_name='Statistical Tests')
    
    print("\nResults saved to 'sleep_study_results.xlsx'")

FileNotFoundError: [Errno 2] No such file or directory: 'filleddata.csv'