In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from functools import reduce
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats as stats
import os

output_dir = r'C:\Users\danie\Downloads\SleepData\checkpoints'

def save_checkpoint(df, step_name, print_info=True):
    """Save checkpoint data and print summary information"""
    output_path = os.path.join(output_dir, f'checkpoint_py_{step_name}.csv')
    df.to_csv(output_path, index=False)

    if print_info:
        print(f"\nCHECKPOINT {step_name}:")
        print(f"Shape: {df.shape} ({df.shape[0]} rows, {df.shape[1]} columns)")
        print(f"Columns: {list(df.columns)}")
        print("First few rows:")
        print(df.head())

        # Check for NaN values
        nan_counts = df.isna().sum()
        if nan_counts.sum() > 0:
            print("\nNaN value counts:")
            print(nan_counts[nan_counts > 0])

    return output_path

# Define the columns used for calculating "Average Sleep Between Modalities"
avg_columns = ['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']

# Read data files
actlumus_data = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\Actlumus.csv')
GW_data = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\GW.csv')
preprocessed_data = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\Preprocessed.csv')
survey_data = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\Survey.csv')
deep_sleep_first_2_hours = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\GW_deep_sleep_2_hours_finalcopy.csv')
deep_sleep_first_1_hours = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\GW_deep_sleep_1_hour_finalcopy.csv')
deep_sleep_first_half_hours = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\GW_deep_sleep_half_hour_finalcopy.csv')

# Save raw data checkpoints
save_checkpoint(actlumus_data, '0_raw_actlumus')
save_checkpoint(GW_data, '0_raw_GW')
save_checkpoint(preprocessed_data, '0_raw_preprocessed')
save_checkpoint(survey_data, '0_raw_survey')
save_checkpoint(deep_sleep_first_2_hours, '0_raw_deep_sleep_2hr')
save_checkpoint(deep_sleep_first_1_hours, '0_raw_deep_sleep_1hr')
save_checkpoint(deep_sleep_first_half_hours, '0_raw_deep_sleep_halfhr')

# Process and unify all data sources into a single DataFrame
actlumus_data = actlumus_data[['Participant', 'Date of Sleep', 'TST (Without Naps)', 'TST (With Naps)']]
actlumus_data['TST (Without Naps)'] = pd.to_timedelta(actlumus_data['TST (Without Naps)'], errors='coerce').dt.total_seconds() / 3600
actlumus_data['TST (With Naps)'] = pd.to_timedelta(actlumus_data['TST (With Naps)'], errors='coerce').dt.total_seconds() / 3600

GW_data = GW_data[['Participant', 'Date of Sleep', 'TST (Seconds)', 'Deep Sleep time (Stage 5) (Seconds)']]
GW_data['TST (Hours)'] = GW_data['TST (Seconds)'] / 3600
GW_data['Deep Sleep (Hours)'] = GW_data['Deep Sleep time (Stage 5) (Seconds)'] / 3600

preprocessed_data = preprocessed_data[['Participant', 'Date of Sleep', 'Total Sleep Time']]
preprocessed_data['Total Sleep Time'] = pd.to_timedelta(preprocessed_data['Total Sleep Time'], errors='coerce').dt.total_seconds() / 3600

survey_data = survey_data[['Participant', 'Date of Sleep', 'TST']]
survey_data['TST'] = pd.to_timedelta(survey_data['TST'], errors='coerce').dt.total_seconds() / 3600

# Process deep sleep data
deep_sleep_first_2_hours = deep_sleep_first_2_hours[['Participant', 'Date of Sleep', 'TST (Seconds)', 'Deep Sleep in First 2 Hours (Seconds)', 'Percentage Deep Sleep', 'Percentage Deep Sleep First 2 Hours']]
deep_sleep_first_2_hours['Deep Sleep in First 2 Hours (Hours)'] = deep_sleep_first_2_hours['Deep Sleep in First 2 Hours (Seconds)'] / 3600
deep_sleep_first_2_hours['TST (Hours)'] = deep_sleep_first_2_hours['TST (Seconds)'] / 3600

# Process deep sleep data
deep_sleep_first_1_hours = deep_sleep_first_1_hours[['Participant', 'Date of Sleep', 'TST (Seconds)', 'Deep Sleep in First 1 Hour (Seconds)', 'Percentage Deep Sleep in First 1 Hour']]
deep_sleep_first_1_hours['Deep Sleep in First 1 Hour (Hours)'] = deep_sleep_first_1_hours['Deep Sleep in First 1 Hour (Seconds)'] / 3600
deep_sleep_first_1_hours['TST (Hours)'] = deep_sleep_first_1_hours['TST (Seconds)'] / 3600
deep_sleep_first_1_hours.drop(columns=['TST (Seconds)'], inplace=True)

# Process deep sleep data
deep_sleep_first_half_hours = deep_sleep_first_half_hours[['Participant', 'Date of Sleep', 'TST (Seconds)', 'Deep Sleep in First Half Hour (Seconds)', 'Percentage Deep Sleep in First Half Hour']]
deep_sleep_first_half_hours['Deep Sleep in First Half Hour (Hours)'] = deep_sleep_first_half_hours['Deep Sleep in First Half Hour (Seconds)'] / 3600
deep_sleep_first_half_hours['TST (Hours)'] = deep_sleep_first_half_hours['TST (Seconds)'] / 3600
deep_sleep_first_half_hours.drop(columns=['TST (Seconds)'], inplace=True)

# Load Stroop data
stroop_data = pd.read_csv('C:\\Users\\danie\\Downloads\\SleepData\\stroop_results.csv')
stroop_data = stroop_data[['Participant', 'Date', 'congruent_response_time_ms_median', 'incongruent_response_time_ms_median']]
stroop_data.rename(columns={'Date': 'Date of Sleep'}, inplace=True)
stroop_data['Date of Sleep'] = pd.to_datetime(stroop_data['Date of Sleep'], errors='coerce')

# Calculate Stroop metrics
stroop_data['Interference Effect (ms)'] = stroop_data['incongruent_response_time_ms_median'] - stroop_data['congruent_response_time_ms_median']
stroop_data['Ratio Interference Score'] = stroop_data['incongruent_response_time_ms_median'] / stroop_data['congruent_response_time_ms_median']

# Rename columns
data_frames = [
    actlumus_data.rename(columns={
        'TST (Without Naps)': 'Actlumus (No Naps)',
        'TST (With Naps)': 'Actlumus (With Naps)'
    }),
    GW_data.rename(columns={
        'TST (Hours)': 'GW (Total Sleep)',
        'Deep Sleep (Hours)': 'GW (Deep Sleep)'
    }),
    preprocessed_data.rename(columns={
        'Total Sleep Time': 'Preprocessed'
    }),
    survey_data.rename(columns={
        'TST': 'Survey'
    }),
    deep_sleep_first_2_hours.rename(columns={
        'Deep Sleep in First 2 Hours (Hours)': 'Deep Sleep in First 2 Hours',
        'TST (Hours)': 'DeepSleep_TST'
    }),
    deep_sleep_first_1_hours.rename(columns={
        'Deep Sleep in First 1 Hour (Hours)': 'Deep Sleep in First 1 Hour',
        # 'TST (Hours)': 'DeepSleep_1hr_TST'
    }),
    deep_sleep_first_half_hours.rename(columns={
        'Deep Sleep in First Half Hour (Hours)': 'Deep Sleep in First Half Hour',
        # 'TST (Hours)': 'DeepSleep_halfhr_TST'
    })
]

# Merge all dataframes
merged_data = reduce(lambda left, right: pd.merge(left, right, on=['Participant', 'Date of Sleep'], how='outer'), data_frames)
save_checkpoint(merged_data, '2_merged_data_initial')

merged_data = merged_data.drop_duplicates()
merged_data = merged_data.groupby(['Participant', 'Date of Sleep'], as_index=False).mean()
save_checkpoint(merged_data, '2b_merged_data_after_groupby')

# Convert dates and sort
merged_data['Date of Sleep'] = pd.to_datetime(merged_data['Date of Sleep'], errors='coerce')
merged_data = merged_data.sort_values(by=['Participant', 'Date of Sleep'])
save_checkpoint(merged_data, '2c_merged_data_sorted')

# Cohort date ranges
cohorts = {
    1: ('2024-03-23', '2024-04-07', range(1, 8)),
    2: ('2024-05-04', '2024-05-19', range(8, 28)),
    3: ('2024-05-25', '2024-06-09', range(28, 41)),
    4: ('2024-06-01', '2024-06-16', range(44, 56))
}

# Fill missing dates for each participant
def fill_missing_dates(participant_data, start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    participant_data = participant_data.set_index('Date of Sleep')
    participant_data = participant_data.reindex(date_range).reset_index()
    participant_data.rename(columns={'index': 'Date of Sleep'}, inplace=True)
    participant_data['Participant'] = participant_data['Participant'].ffill()
    return participant_data

filled_dataframes = []

for cohort_id, (start_date, end_date, participants) in cohorts.items():
    for participant in participants:
        participant_data = merged_data[merged_data['Participant'] == participant]
        if not participant_data.empty:
            filled_dataframes.append(fill_missing_dates(participant_data, start_date, end_date))

filled_data = pd.concat(filled_dataframes, ignore_index=True)
save_checkpoint(filled_data, '3_filled_data_all')

# Shift 'GW (Deep Sleep)' for night prior comparison
filled_data['GW_DeepSleep_NightPrior'] = filled_data.groupby('Participant')['GW (Deep Sleep)'].shift(1)

# Shift 'Deep Sleep in First 2 Hours' for night prior comparison
filled_data['DeepSleep_First2Hours_NightPrior'] = filled_data.groupby('Participant')['Deep Sleep in First 2 Hours'].shift(1)

# Merge Stroop with filled_data
filled_data = pd.merge(filled_data, stroop_data[['Participant', 'Date of Sleep', 'Interference Effect (ms)', 'Ratio Interference Score']], 
                       on=['Participant', 'Date of Sleep'], how='left')

# Get unique participants
all_participants = filled_data['Participant'].dropna().unique()

def create_participant_sleep_comparison(participant_number):
    participant_data = filled_data[filled_data['Participant'] == participant_number].copy()

    participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)

    # Add new median columns
    median_sleep_time = participant_data['Average Sleep Between Modalities'].median()
    median_deep_sleep = participant_data['GW (Deep Sleep)'].median()

    participant_data.to_csv(f'C:\\Users\\danie\\Downloads\\SleepData\\Results\\participant_{int(participant_number)}.csv', index=False)

    fig = go.Figure()

    data_sources = {
        'Actlumus (No Naps)': '#FF9999',
        'Actlumus (With Naps)': '#FFB366',
        'GW (Total Sleep)': '#99CCFF',
        'GW (Deep Sleep)': '#F507F1',
        'GW_DeepSleep_NightPrior': '#7C06B8',  
        'Preprocessed': '#99FF99',
        'Survey': '#FFEB99',
        'Average Sleep Between Modalities': '#F50707',
        'Deep Sleep in First 2 Hours': '#FF4500',  # Orange Red - very bright and distinct
        'DeepSleep_First2Hours_NightPrior': '#32CD32'  # Lime Green - bright and contrasting
    }

    for column_name, color in data_sources.items():
        if column_name in participant_data.columns and participant_data[column_name].notnull().any():
            fig.add_trace(go.Bar(
                x=participant_data['Date of Sleep'],
                y=participant_data[column_name],
                name=column_name,
                marker_color=color
            ))

    # Add median lines
    fig.add_trace(go.Scatter(
        x=participant_data['Date of Sleep'], 
        y=[median_sleep_time] * len(participant_data),
        mode="lines",
        name="Median Sleep Time",
        line=dict(color="black", dash="dot")
    ))

    fig.add_trace(go.Scatter(
        x=participant_data['Date of Sleep'], 
        y=[median_deep_sleep] * len(participant_data),
        mode="lines",
        name="Median Deep Sleep Time",
        line=dict(color="grey", dash="dot")
    ))

    fig.update_layout(
        title=dict(
            text=f'Participant {participant_number} Sleep Time Comparison',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Date of Sleep",
        yaxis_title="Total Sleep Time (Hours)",
        barmode='group',
        showlegend=True,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    return fig

def create_sleep_nights_count_summary():
    """Create an HTML file with total counts of short and long nights across all participants"""
    short_nights_total = 0
    long_nights_total = 0
    total_valid_nights = 0

    # Track per-participant counts
    valid_nights_per_participant = []
    disqualified_nights_per_participant = []

    try:
        for participant in all_participants:
            participant_data = filled_data[filled_data['Participant'] == participant].copy()

            # Calculate median and average sleep
            participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)

            # Shift deep sleep for next day comparison
            participant_data['Next_Day_Deep_Sleep'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

            # Valid night criteria:
            # 1. Has valid Average Sleep Between Modalities (not NaN)
            # 2. Has valid Deep Sleep for current date
            # 3. Has valid Deep Sleep for following date
            valid_mask = (
                participant_data['Average Sleep Between Modalities'].notna() &
                participant_data['Deep Sleep in First 2 Hours'].notna() &
                participant_data['Next_Day_Deep_Sleep'].notna()
            )

            # Filter valid nights
            valid_nights_data = participant_data[valid_mask]

            # Count nights
            participant_valid_count = len(valid_nights_data)
            participant_total_nights = len(participant_data)
            participant_disqualified_count = participant_total_nights - participant_valid_count

            valid_nights_per_participant.append(participant_valid_count)
            disqualified_nights_per_participant.append(participant_disqualified_count)

            if len(valid_nights_data) > 0:
                # Calculate median
                median_sleep = valid_nights_data['Average Sleep Between Modalities'].median()

                # Count valid nights
                participant_valid_nights = len(valid_nights_data)
                total_valid_nights += participant_valid_nights

                # Count short nights
                short_nights = sum(valid_nights_data['Average Sleep Between Modalities'] < (median_sleep - 0.5))
                short_nights_total += short_nights

                # Count long nights
                long_nights = sum(valid_nights_data['Average Sleep Between Modalities'] > (median_sleep + 0.5))
                long_nights_total += long_nights

        # Calculate normal nights
        normal_nights_total = total_valid_nights - short_nights_total - long_nights_total

        # Calculate statistics
        valid_nights_array = np.array(valid_nights_per_participant)
        disqualified_nights_array = np.array(disqualified_nights_per_participant)

        valid_mean = np.mean(valid_nights_array)
        valid_se = np.std(valid_nights_array, ddof=1) / np.sqrt(len(valid_nights_array))

        disqualified_mean = np.mean(disqualified_nights_array)
        disqualified_se = np.std(disqualified_nights_array, ddof=1) / np.sqrt(len(disqualified_nights_array))

        # Generate HTML
        html_content = """<!DOCTYPE html>
<html>
<head>
    <title>Sleep Nights Summary</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 50px auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            background-color: white;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        h1 {
            color: #333;
            text-align: center;
            margin-bottom: 30px;
        }
        .count-box {
            display: inline-block;
            width: 45%;
            margin: 10px 2.5%;
            padding: 20px;
            border-radius: 5px;
            text-align: center;
            color: white;
            font-size: 18px;
        }
        .short-nights {
            background-color: #4682B4;
        }
        .long-nights {
            background-color: #228B22;
        }
        .count-number {
            font-size: 36px;
            font-weight: bold;
            display: block;
            margin: 10px 0;
        }
        .count-number-small {
            font-size: 24px;
            font-weight: bold;
            display: block;
            margin: 10px 0;
        }
        .definition {
            margin-top: 30px;
            padding: 15px;
            background-color: #f9f9f9;
            border-left: 4px solid #ccc;
            font-style: italic;
        }
        .criteria {
            margin-top: 20px;
            padding: 15px;
            background-color: #e8f4fd;
            border-left: 4px solid #4682B4;
            font-size: 14px;
        }
        .per-participant {
            margin-top: 20px;
            padding: 15px;
            background-color: #f0fff0;
            border-left: 4px solid #228B22;
            font-size: 14px;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Sleep Nights Summary - All Participants (Valid Nights Only)</h1>
        
        <div class="count-box total-nights" style="width: 90%; background-color: #333;">
            <div>Total Valid Nights</div>
            <span class="count-number">""" + str(total_valid_nights) + """</span>
            <div>All valid nights across all participants</div>
        </div>
        
        <div class="count-box short-nights">
            <div>Short Nights</div>
            <span class="count-number">""" + str(short_nights_total) + """</span>
            <div>Total across all participants</div>
        </div>
        
        <div class="count-box long-nights">
            <div>Long Nights</div>
            <span class="count-number">""" + str(long_nights_total) + """</span>
            <div>Total across all participants</div>
        </div>
        
        <div class="count-box" style="width: 90%; background-color: #FFA500;">
            <div>Normal Nights</div>
            <span class="count-number">""" + str(normal_nights_total) + """</span>
            <div>Within ±0.5 hours of participant median TST</div>
        </div>
        
        <div class="count-box" style="width: 45%; background-color: #2E8B57;">
            <div>Valid Nights Per Participant</div>
            <span class="count-number-small">""" + f"{valid_mean:.2f} ± {valid_se:.2f}" + """</span>
            <div>Mean ± SE</div>
        </div>
        
        <div class="count-box" style="width: 45%; background-color: #DC143C;">
            <div>Disqualified Nights Per Participant</div>
            <span class="count-number-small">""" + f"{disqualified_mean:.2f} ± {disqualified_se:.2f}" + """</span>
            <div>Mean ± SE</div>
        </div>
        
        <div class="per-participant">
            <strong>Per-Participant Statistics:</strong><br>
            <strong>Valid Nights:</strong> """ + f"{valid_mean:.2f} ± {valid_se:.2f}" + """ (Mean ± SE across """ + str(len(all_participants)) + """ participants)<br>
            <strong>Disqualified Nights:</strong> """ + f"{disqualified_mean:.2f} ± {disqualified_se:.2f}" + """ (Mean ± SE across """ + str(len(all_participants)) + """ participants)<br>
            <em>Disqualified nights are those missing Average Sleep Between Modalities, Deep Sleep for current date, or Deep Sleep for the following date.</em>
        </div>
        
        <div class="criteria">
            <strong>Valid Night Criteria:</strong><br>
            A night is considered valid only if ALL of the following conditions are met:<br>
            • Has a valid Average Sleep Between Modalities (mean of: Actlumus No Naps, GW Total Sleep, Preprocessed, Survey)<br>
            • Has valid Deep Sleep data for the current date<br>
            • Has valid Deep Sleep data for the following date<br>
            <em>Only valid nights are included in all counts and calculations.</em>
        </div>
        
        <div class="definition">
            <strong>Definitions:</strong><br>
            <strong>Total Valid Nights:</strong> All nights meeting the valid criteria across all participants<br>
            <strong>Short Nights:</strong> Sleep duration ≥0.5 hours below participant's median TST<br>
            <strong>Long Nights:</strong> Sleep duration ≥0.5 hours above participant's median TST<br>
            <strong>Normal Nights:</strong> Sleep duration within ±0.5 hours of participant's median TST<br>
            <strong>TST:</strong> Total Sleep Time (Average Sleep Between Modalities)
        </div>
    </div>
</body>
</html>"""

        # Save HTML
        output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Summary_Statistics\sleep_nights_count_summary.html'

        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(html_content)

        print(f"Sleep nights count summary saved to: {output_path}")
        print(f"Total Valid Nights: {total_valid_nights}")
        print(f"Short Nights Total: {short_nights_total}")
        print(f"Long Nights Total: {long_nights_total}")
        print(f"Normal Nights Total: {normal_nights_total}")
        print(f"Valid Nights Per Participant: {valid_mean:.2f} ± {valid_se:.2f} (Mean ± SE)")
        print(f"Disqualified Nights Per Participant: {disqualified_mean:.2f} ± {disqualified_se:.2f} (Mean ± SE)")
        print(f"File size: {os.path.getsize(output_path)} bytes")

        return total_valid_nights, short_nights_total, long_nights_total, normal_nights_total

    except Exception as e:
        print(f"Error creating HTML file: {e}")
        return None, None, None, None

# Calculate short nights for all participants and create summary graph
def create_short_nights_summary():
    short_nights_counts = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate median and average sleep
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Count short nights
        short_nights = sum(participant_data['Average Sleep Between Modalities'] < (median_sleep - 0.5))
        short_nights_counts.append(short_nights)

    # Create distribution
    short_nights_dist = pd.Series(short_nights_counts).value_counts().sort_index()

    # Create bar graph
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=short_nights_dist.index,
        y=short_nights_dist.values,
        marker_color='#4682B4'  # Steel Blue color
    ))

    fig.update_layout(
        title=dict(
            text='Distribution of Short Nights Across Participants (>=0.5 hours below participant median TST)',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Number of Short Nights per Subject",
        yaxis_title="Count",
        barmode='group',
        showlegend=False,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    # Save figure
    fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\short_nights_distribution.html')
    fig.show()

# Calculate long nights for all participants and create summary graph
def create_long_nights_summary():
    long_nights_counts = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate median and average sleep
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Count long nights
        long_nights = sum(participant_data['Average Sleep Between Modalities'] > (median_sleep + 0.5))
        long_nights_counts.append(long_nights)

    # Create distribution
    long_nights_dist = pd.Series(long_nights_counts).value_counts().sort_index()

    # Create bar graph
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=long_nights_dist.index,
        y=long_nights_dist.values,
        marker_color='#228B22'  # Forest Green color
    ))

    fig.update_layout(
        title=dict(
            text='Distribution of Long Nights Across Participants (>=0.5 hours above participant median TST)',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Number of Long Nights per Subject",
        yaxis_title="Count",
        barmode='group',
        showlegend=False,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    # Save figure
    fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\long_nights_distribution.html')
    fig.show()

def create_consecutive_short_nights_graph():
    # Store consecutive sequences
    all_consecutive_sequences = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Skip if insufficient data
        if len(participant_data) < 2:
            continue

        # Calculate average sleep
        avg_columns = ['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)

        # Calculate median
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Mark short nights
        participant_data['is_short_night'] = participant_data['Average Sleep Between Modalities'] < (median_sleep - 2)
        participant_data['is_short_night'] = participant_data['is_short_night'].fillna(False)  # Handle NaN values

        # Sort by date
        participant_data = participant_data.sort_values('Date of Sleep').reset_index(drop=True)

        # Track consecutive sequences
        current_sequence = []

        # Find consecutive sequences
        for i in range(len(participant_data)):
            current_row = participant_data.iloc[i]

            # Check if part of sequence
            if current_row['is_short_night']:
                current_date = current_row['Date of Sleep']

                # Start new sequence
                if not current_sequence:
                    current_sequence = [current_date]
                else:
                    # Check if consecutive
                    previous_date = current_sequence[-1]
                    date_diff = (current_date - previous_date).days

                    if date_diff == 1:
                        # Add to sequence
                        current_sequence.append(current_date)
                    else:
                        # Process previous sequence
                        if len(current_sequence) >= 2:
                            all_consecutive_sequences.append(len(current_sequence))

                        # Start new sequence
                        current_sequence = [current_date]
            else:
                # Process existing sequence
                if len(current_sequence) >= 2:
                    all_consecutive_sequences.append(len(current_sequence))
                current_sequence = []

        # Check final sequence
        if len(current_sequence) >= 2:
            all_consecutive_sequences.append(len(current_sequence))

    # Create frequency distribution
    if not all_consecutive_sequences:
        # Create an empty graph
        fig = go.Figure()

        fig.update_layout(
            title=dict(
                text='Distribution of Consecutive Short Nights Across All Participants',
                x=0.5,
                font=dict(size=20)
            ),
            xaxis_title="Number of Consecutive Short Nights",
            yaxis_title="Count",
            showlegend=False,
            height=600,
            width=1200,
            plot_bgcolor='white',
            paper_bgcolor='white',
            xaxis=dict(rangemode='nonnegative', zeroline=True), 
            yaxis=dict(
                rangemode='nonnegative', 
                zeroline=True,
                tickmode='linear',
                tick0=0,
                dtick=1  
            )
        )

        # Save figure
        fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\consecutive_short_nights.html')
        fig.show()
        return

    # Count sequence lengths
    sequence_counts = {}
    for length in all_consecutive_sequences:
        if length in sequence_counts:
            sequence_counts[length] += 1
        else:
            sequence_counts[length] = 1

    # Convert to DataFrame
    consecutive_df = pd.DataFrame({
        'Consecutive Short Nights': list(sequence_counts.keys()),
        'Count': list(sequence_counts.values())
    }).sort_values('Consecutive Short Nights')

    # Create graph
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=consecutive_df['Consecutive Short Nights'],
        y=consecutive_df['Count'],
        marker_color='#8B008B' 
    ))

    # Calculate max count and set appropriate y-axis max
    max_count = max(consecutive_df['Count']) if len(consecutive_df) > 0 else 0
    y_max = max_count + 1  # Add one more for a little space at the top

    fig.update_layout(
        title=dict(
            text='Distribution of Consecutive Short Nights Across All Participants',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Number of Consecutive Short Nights",
        xaxis=dict(
            tickmode='array',
            tickvals=list(consecutive_df['Consecutive Short Nights']),
            dtick=1,
            range=[0, max(consecutive_df['Consecutive Short Nights']) + 0.5],  
            rangemode='nonnegative',  
            zeroline=True            
        ),
        yaxis_title="Count",
        yaxis=dict(
            range=[0, y_max], 
            rangemode='nonnegative',  
            zeroline=True,           
            tickmode='linear',        
            tick0=0,                  
            dtick=1                   
        ),
        barmode='group',
        showlegend=False,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    # Save figure
    fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\consecutive_short_nights.html')
    fig.show()

# Generate histogram and CDF for short night durations
def create_short_night_duration_histogram():
    short_night_durations = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate median sleep time and determine short nights
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Extract TST for short nights (more than 2 hours below median)
        short_nights = participant_data[participant_data['Average Sleep Between Modalities'] < (median_sleep - 0.5)]
        short_night_durations.extend(short_nights['Average Sleep Between Modalities'].dropna().values)

    # Create histogram and CDF
    hist_values, bin_edges = np.histogram(short_night_durations, bins=8, density=False)
    cdf = np.cumsum(hist_values) / np.sum(hist_values)

    fig = go.Figure()

    # Add histogram
    fig.add_trace(go.Bar(
        x=bin_edges[:-1],
        y=hist_values,
        name="Count",
        marker_color='#4682B4',
        opacity=0.7
    ))

    # Add CDF
    fig.add_trace(go.Scatter(
        x=bin_edges[:-1],
        y=cdf,
        name="CDF",
        yaxis="y2",
        mode="lines",
        line=dict(color='black')
    ))

    # Update layout with dual y-axes
    fig.update_layout(
        title=dict(
            text='Duration of Short Nights (TST) with CDF',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Mean Total Sleep Time (hrs)",
        yaxis=dict(title="Count"),
        yaxis2=dict(title="Density", overlaying="y", side="right", showgrid=False),
        barmode='overlay',
        showlegend=True,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    # Save figure
    fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\short_night_duration_histogram.html')
    fig.show()

def create_all_night_duration_histogram():
    all_night_durations = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate the mean sleep duration from available modalities
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)

        # Collect all valid sleep durations
        all_night_durations.extend(participant_data['Average Sleep Between Modalities'].dropna().values)

    # Create histogram and CDF
    hist_values, bin_edges = np.histogram(all_night_durations, bins=10, density=False)
    cdf = np.cumsum(hist_values) / np.sum(hist_values)

    fig = go.Figure()

    # Add histogram
    fig.add_trace(go.Bar(
        x=bin_edges[:-1],
        y=hist_values,
        name="Count",
        marker_color='#4682B4',  # Steel Blue color
        opacity=0.7
    ))

    # Add CDF
    fig.add_trace(go.Scatter(
        x=bin_edges[:-1],
        y=cdf,
        name="CDF",
        yaxis="y2",
        mode="lines",
        line=dict(color='black')
    ))

    # Update layout with dual y-axes
    fig.update_layout(
        title=dict(
            text='Distribution of Sleep Durations (TST) with CDF',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis_title="Mean Total Sleep Time (hrs)",
        yaxis=dict(title="Count"),
        yaxis2=dict(title="Density", overlaying="y", side="right", showgrid=False),
        barmode='overlay',
        showlegend=True,
        height=600,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    # Save figure
    fig.write_html('C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\all_night_duration_histogram.html')
    fig.show()

def create_relative_sleep_loss_violin_plot():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate deep sleep percentage
        participant_data['Deep Sleep %'] = (participant_data['GW (Deep Sleep)'] / participant_data['Average Sleep Between Modalities']) * 100

        # Shift deep sleep percentage to get the following night's deep sleep %
        participant_data['Deep Sleep % Next Night'] = participant_data['Deep Sleep %'].shift(-1)

        # Compute deep sleep % change
        participant_data['Deep Sleep % Change'] = participant_data['Deep Sleep % Next Night'] - participant_data['Deep Sleep %']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep % Change'])

        # Determine relative sleep loss bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Loss'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else None
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Loss'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Loss', 'Deep Sleep % Change']])

    # Combine all participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    order = ["0.5-1", "1-2", "2-3", ">3"]

    # Create violin plot using Plotly
    fig = px.violin(violin_data, x='Relative Sleep Loss', y='Deep Sleep % Change', 
                    box=True, points='all', category_orders={'Relative Sleep Loss': order}, 
                    title="Relative Sleep Loss vs. Deep Sleep % Change")

    # Add horizontal line at y=0
    fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-0.5, x1=3.5, y0=0, y1=0,
            line=dict(color="red", width=2, dash="dash")
        )
    )

    fig.update_layout(
        xaxis_title="Relative Sleep Loss on Short Nights (hours)",
        yaxis_title="Deep Sleep % Change (Next Night - Current Night)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save as HTML
    html_file_path = 'C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Sleep_Patterns\\relative_sleep_loss_violin.html'
    fig.write_html(html_file_path)
    fig.show()

def create_relative_sleep_loss_violin_plot_fullrange():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate deep sleep percentage
        participant_data['Deep Sleep %'] = (participant_data['GW (Deep Sleep)'] / participant_data['Average Sleep Between Modalities']) * 100

        # Shift deep sleep percentage to get the following night's deep sleep %
        participant_data['Deep Sleep % Next Night'] = participant_data['Deep Sleep %'].shift(-1)

        # Compute deep sleep % change
        participant_data['Deep Sleep % Change'] = participant_data['Deep Sleep % Next Night'] - participant_data['Deep Sleep %']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep % Change'])

        # Assign bins for **both negative and positive sleep deviations**
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep % Change']])

    # Combine all participant data **outside the loop**
    violin_data = pd.concat(data_list, ignore_index=True)

    # Define x-axis order for bins to include both sleep loss and extra sleep
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    # Create violin plot using Plotly
    fig = px.violin(violin_data, x='Relative Sleep Change', y='Deep Sleep % Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title="Relative Sleep Deviation vs. Deep Sleep % Change")

    # Add horizontal line at y=0
    fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-0.5, x1=7.5, y0=0, y1=0,
            line=dict(color="red", width=2, dash="dash")
        )
    )

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation (Negative: Sleep Deprivation | Positive: Extra Sleep)",
        yaxis_title="Deep Sleep % Change (Next Night - Current Night)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save as HTML
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\relative_sleep_loss_violin_fullrange.html'
    fig.write_html(html_file_path)
    fig.show()

def create_deep_sleep_first_2_hours_violin_plot(): #consider another function with x-axis being all nights to see how deep sleep reacts if tst is above median (should be negative trend)
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift deep sleep in first 2 hours to get the following night's value
        participant_data['Deep Sleep First 2 Hours Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep First 2 Hours Change'] = participant_data['Deep Sleep First 2 Hours Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep First 2 Hours Change'])

        # Determine relative sleep loss bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Loss'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else None
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Loss'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Loss', 'Deep Sleep First 2 Hours Change']])

    # Combine all participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    order = ["0.5-1", "1-2", "2-3", ">3"]

    # Create violin plot using Plotly
    fig = px.violin(violin_data, x='Relative Sleep Loss', y='Deep Sleep First 2 Hours Change', 
                    box=True, points='all', category_orders={'Relative Sleep Loss': order}, 
                    title="Relative Sleep Loss vs. Deep Sleep First 2 Hours Change")

    # Add horizontal line at y=0
    fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-0.5, x1=3.5, y0=0, y1=0,
            line=dict(color="red", width=2, dash="dash")
        )
    )

    fig.update_layout(
        xaxis_title="Relative Sleep Loss on Short Nights (hours)",
        yaxis_title="Deep Sleep First 2 Hours Change (Next Night - Current Night) (hrs)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save as HTML
    html_file_path = 'C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Sleep_Patterns\\deep_sleep_first_2_hours_violin.html'
    fig.write_html(html_file_path)
    fig.show()

def create_deep_sleep_first_2_hours_violin_plot_fullrange():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift deep sleep in first 2 hours to get the following night's value
        participant_data['Deep Sleep First 2 Hours Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep First 2 Hours Change'] = participant_data['Deep Sleep First 2 Hours Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep First 2 Hours Change'])

        # Assign sleep deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep First 2 Hours Change']])

    # Combine all participant data **outside the loop**
    violin_data = pd.concat(data_list, ignore_index=True)

    # Define x-axis order for bins
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    # Create violin plot using Plotly
    fig = px.violin(violin_data, x='Relative Sleep Change', y='Deep Sleep First 2 Hours Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title="Relative Sleep Deviation vs. Deep Sleep in First 2 Hours Change")

    # Add horizontal line at y=0
    fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-0.5, x1=7.5, y0=0, y1=0,
            line=dict(color="red", width=2, dash="dash")
        )
    )

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation (Negative: Sleep Deprivation | Positive: Extra Sleep)",
        yaxis_title="Deep Sleep in First 2 Hours Change (Next Night - Current Night) (hrs)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save as HTML
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_first_2_hours_violin_fullrange.html'
    fig.write_html(html_file_path)
    fig.show()

def create_deep_sleep_violin_plot_fullrange():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep Next Night'] = participant_data['GW (Deep Sleep)'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change'] = participant_data['Deep Sleep Next Night'] - participant_data['GW (Deep Sleep)']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change'])

        # Assign sleep deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep Change']])

    # Combine all participant data **outside the loop**
    violin_data = pd.concat(data_list, ignore_index=True)

    # Define x-axis order for bins
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    # Create violin plot using Plotly
    fig = px.violin(violin_data, x='Relative Sleep Change', y='Deep Sleep Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title="Relative Sleep Deviation vs. Total Deep Sleep Change")

    # Add horizontal line at y=0
    fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-0.5, x1=7.5, y0=0, y1=0,
            line=dict(color="red", width=2, dash="dash")
        )
    )

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation (Negative: Sleep Deprivation | Positive: Extra Sleep)",
        yaxis_title="Total Deep Sleep Change (Next Night - Current Night) (hrs)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save as HTML
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_violin_fullrange.html'
    fig.write_html(html_file_path)
    fig.show()

def create_deep_sleep_comparison_table():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep Next Night'] = participant_data['GW (Deep Sleep)'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change'] = participant_data['Deep Sleep Next Night'] - participant_data['GW (Deep Sleep)']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change'])

        # Assign sleep deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep Change']])

    # Combine all participant data **outside the loop**
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Deep Sleep Change'].agg(
        **{"Deep Sleep Change Mean (hrs)": 'mean', "Deep Sleep Change Median (hrs)": 'median', "STD (hrs)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (hrs)'] = summary_table['STD (hrs)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Save table as an HTML file with properly aligned headers
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_comparison_table.html'
    summary_table.to_html(html_file_path, index=False, float_format="%.2f")

    print(f"Table saved as HTML: {html_file_path}")

    return summary_table

def create_deep_sleep_comparison_graph_first_2_hours(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change (In First Two hrs)'] = participant_data['Deep Sleep (In First Two hrs) Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change (In First Two hrs)'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3' if x > (median_sleep + 3) else 
                      '- 1-3' if (median_sleep + 1) <= x <= (median_sleep + 3) else 
                      '- 1-1' if (median_sleep - 1) <= x < (median_sleep + 1) else
                      '1-3' if (median_sleep - 3) <= x < (median_sleep - 1) else 
                      '>3' if x < (median_sleep - 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Convert deep sleep change from hours to minutes
        valid_data['Deep Sleep Change (In First Two hrs) Minutes'] = valid_data['Deep Sleep Change (In First Two hrs)'] * 60

        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Deep Sleep Change (In First Two hrs) Minutes']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Deep Sleep Change (In First Two hrs) Minutes'].agg(
        **{"Deep Sleep in First Two Hours Change From Night Prior": 'mean', "STD (mins)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (mins)'] = summary_table['STD (mins)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3", "- 1-3", "- 1-1", "1-3", ">3"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    X = summary_table['Category Numeric']
    y = summary_table['Deep Sleep in First Two Hours Change From Night Prior']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add mean points
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Change From Night Prior'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Deep Sleep Change'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Change From Night Prior'],
        error_y=dict(type='data', array=summary_table['STD (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Change From Night Prior'],
        error_y=dict(type='data', array=summary_table['SE (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add individual points
    fig.add_trace(go.Scatter(
        x=violin_data['Relative Sleep Change'],
        y=violin_data['Deep Sleep Change (In First Two hrs) Minutes'],
        mode='markers',
        marker=dict(
            size=5,
            color='black',
            opacity=0.4,
            symbol='circle'
        ),
        name='Individual Data Points',
        hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Add title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the First Two Hours",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="of Deep Sleep the Following Night Relative to Night Prior",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="in First 2 Hours (Mins)",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_comparison_graph_first_2_hours.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

# Modified function for create_deep_sleep_comparison_graph
def create_deep_sleep_comparison_graph(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep Next Night'] = participant_data['GW (Deep Sleep)'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change'] = participant_data['Deep Sleep Next Night'] - participant_data['GW (Deep Sleep)']

        # Convert deep sleep change from hours to minutes
        participant_data['Deep Sleep Change Minutes'] = participant_data['Deep Sleep Change'] * 60

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change Minutes'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3.5' if x > (median_sleep + 3.5) else 
                      '- 2.5-3.5' if (median_sleep + 2.5) <= x <= (median_sleep + 3.5) else 
                      '- 1.5-2.5' if (median_sleep + 1.5) <= x < (median_sleep + 2.5) else 
                      '- 0.5-1.5' if (median_sleep + 0.5) <= x < (median_sleep + 1.5) else
                      '- 0.5-0.5' if (median_sleep - 0.5) <= x < (median_sleep + 0.5) else
                      '0.5-1.5' if (median_sleep - 1.5) <= x < (median_sleep - 0.5) else
                      '1.5-2.5' if (median_sleep - 2.5) <= x < (median_sleep - 1.5) else 
                      '2.5-3.5' if (median_sleep - 3.5) <= x < (median_sleep - 2.5) else 
                      '>3.5' if x < (median_sleep - 3.5) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Deep Sleep Change Minutes']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Deep Sleep Change Minutes'].agg(
        **{"Deep Sleep Change Mean (mins)": 'mean', "STD (mins)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (mins)'] = summary_table['STD (mins)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3.5", "- 2.5-3.5", "- 1.5-2.5", "- 0.5-1.5", "- 0.5-0.5", "0.5-1.5", "1.5-2.5", "2.5-3.5", ">3.5"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    import statsmodels.api as sm
    X = summary_table['Category Numeric']
    y = summary_table['Deep Sleep Change Mean (mins)']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add mean points
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep Change Mean (mins)'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Deep Sleep Change'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep Change Mean (mins)'],
        error_y=dict(type='data', array=summary_table['STD (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep Change Mean (mins)'],
        error_y=dict(type='data', array=summary_table['SE (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add individual points
    fig.add_trace(go.Scatter(
        x=violin_data['Relative Sleep Change'],
        y=violin_data['Deep Sleep Change Minutes'],
        mode='markers',
        marker=dict(
            size=5,
            color='black',
            opacity=0.4,
            symbol='circle'
        ),
        name='Individual Data Points',
        hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Add title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="Deep Sleep of the Following Night",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="From Night Prior (Mins)",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_comparison_graph.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()
def create_deep_sleep_comparison_table_first_2_hours():
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change (In First Two hrs)'] = participant_data['Deep Sleep (In First Two hrs) Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change (In First Two hrs)'])

        # Assign sleep deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep Change (In First Two hrs)']])

    # Combine all participant data **outside the loop**
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Deep Sleep Change (In First Two hrs)'].agg(
        **{"Deep Sleep Change in First 2 Hours Mean (hrs)": 'mean', "Deep Sleep Change in First 2 Hours Median (hrs)": 'median', "STD (hrs)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (hrs)'] = summary_table['STD (hrs)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Save table as an HTML file with properly aligned headers
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_comparison_table_first_2_hours.html'
    summary_table.to_html(html_file_path, index=False, float_format="%.2f")

    return summary_table

def create_deep_sleep_comparison_graph_first_2_hours_vs_median(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate the median deep sleep in first 2 hours for this participant across all nights
        participant_median_deep_sleep_first_2hrs = participant_data['Deep Sleep in First 2 Hours'].median()

        # Shift to get next night
        participant_data['Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep deviation from participant's median (Next night - Participant median)
        participant_data['Deep Sleep Deviation From Median (In First Two hrs)'] = participant_data['Deep Sleep (In First Two hrs) Next Night'] - participant_median_deep_sleep_first_2hrs

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Deviation From Median (In First Two hrs)', 'Average Sleep Between Modalities'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3' if x > (median_sleep + 3) else 
                      '- 1-3' if (median_sleep + 1) <= x <= (median_sleep + 3) else 
                      '- 1-1' if (median_sleep - 1) <= x < (median_sleep + 1) else
                      '1-3' if (median_sleep - 3) <= x < (median_sleep - 1) else 
                      '>3' if x < (median_sleep - 3) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Convert deep sleep deviation from hours to minutes
        valid_data['Deep Sleep Deviation From Median (In First Two hrs) Minutes'] = valid_data['Deep Sleep Deviation From Median (In First Two hrs)'] * 60

        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Deep Sleep Deviation From Median (In First Two hrs) Minutes']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

   # Extract and save raw data for '1-3' bin
    target_bin_1_3 = violin_data[violin_data['Relative Sleep Change'] == '1-3'].copy()
    target_bin_1_3.to_csv(r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\outliers_1-3_bin.csv', index=False)

    # Extract and save raw data for '>3' bin
    target_bin_gt_3 = violin_data[violin_data['Relative Sleep Change'] == '>3'].copy()
    target_bin_gt_3.to_csv(r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\outliers_gt3_bin.csv', index=False)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Deep Sleep Deviation From Median (In First Two hrs) Minutes'].agg(
        **{"Deep Sleep in First Two Hours Deviation From Participant Median": 'mean', "STD (mins)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (mins)'] = summary_table['STD (mins)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3", "- 1-3", "- 1-1", "1-3", ">3"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    X = summary_table['Category Numeric']
    y = summary_table['Deep Sleep in First Two Hours Deviation From Participant Median']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add scatter points for Mean Deep Sleep Deviation
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Deviation From Participant Median'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Deep Sleep Deviation'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['STD (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Deep Sleep in First Two Hours Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['SE (mins)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Add title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the First Two Hours",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="of Deep Sleep the Following Night Relative to Participant's Median",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="in First 2 Hours (Mins)",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_trace(go.Scatter(
    x=violin_data['Relative Sleep Change'],
    y=violin_data['Deep Sleep Deviation From Median (In First Two hrs) Minutes'],
    mode='markers',
    marker=dict(
        size=5,
        color='black',
        opacity=0.4,
        symbol='circle'
    ),
    name='Individual Data Points',
    hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_comparison_graph_first_2_hours_vs_median.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

def run_lmm_from_first_2_hours_vs_median(filled_data, all_participants):

    data_list = []

    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate average TST and medians
        df_part['AverageTST'] = df_part[['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']].mean(axis=1, skipna=True)
        median_tst = df_part['AverageTST'].median()
        median_deep = df_part['Deep Sleep in First 2 Hours'].median()

        # Calculate sleep loss (positive = sleep loss)
        df_part['Sleep Loss'] = median_tst - df_part['AverageTST']

        # Calculate next night's deep sleep and change from median in minutes
        df_part['Deep Sleep Next Night'] = df_part['Deep Sleep in First 2 Hours'].shift(-1)
        df_part['Deep Sleep Change (mins)'] = (df_part['Deep Sleep Next Night'] - median_deep) * 60

        # Filter valid data points
        valid = df_part.dropna(subset=['Sleep Loss', 'Deep Sleep Change (mins)'])
        valid = valid.copy()
        valid['Participant'] = valid['Participant'].astype(str)

        data_list.append(valid[['Participant', 'Sleep Loss', 'Deep Sleep Change (mins)']])

    # Combine all participant data
    model_data = pd.concat(data_list, ignore_index=True)

    print(f"Dataset for LMM contains {len(model_data)} observations from {len(model_data['Participant'].unique())} participants")
    print("Summary statistics:")
    print(model_data.describe())

    # Fit model with random slopes
    model = smf.mixedlm(
        "Q('Deep Sleep Change (mins)') ~ Q('Sleep Loss')", 
        data=model_data, 
        groups="Participant", 
        re_formula="~Q('Sleep Loss')"
    )
    result = model.fit(reml=False)

    # Extract fixed effects for plotting
    fixed_intercept = result.fe_params['Intercept']
    fixed_slope = result.fe_params['Q(\'Sleep Loss\')']

    # Plot all participant lines
    fig = go.Figure()

    # Generate x values once outside the loop
    x_vals = np.linspace(model_data['Sleep Loss'].min(), model_data['Sleep Loss'].max(), 100)

    # Define a dark color palette for visibility
    dark_colors = [
        '#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#ff7f0e',
        '#1a5276', '#943126', '#196f3d', '#5b2c6f', '#78281f',
        '#a93226', '#884ea0', '#17a589', '#d4ac0d', '#ca6f1e',
        '#2e4053', '#633974', '#1e8449', '#9c640c', '#935116',
    ]

    # Plot individual participant regression lines
    for i, (pid, re) in enumerate(result.random_effects.items()):
        color_idx = i % len(dark_colors)
        participant_color = dark_colors[color_idx]

        # Calculate regression line with random effects
        slope = fixed_slope + re.get('Q(\'Sleep Loss\')', 0)
        intercept = fixed_intercept + re.get('Intercept', 0)
        y_vals = intercept + slope * x_vals

        # Extract this participant's data points
        participant_data = model_data[model_data['Participant'] == pid]

        # Add regression line
        fig.add_trace(go.Scatter(
            x=x_vals, y=y_vals,
            mode='lines',
            line=dict(width=2, color=participant_color),
            name=f'Participant {pid}',
            legendgroup=f'participant_{pid}',
            hoverinfo='name'
        ))

        # Add data points
        fig.add_trace(go.Scatter(
            x=participant_data['Sleep Loss'],
            y=participant_data['Deep Sleep Change (mins)'],
            mode='markers',
            marker=dict(size=6, color=participant_color, opacity=0.7, symbol='circle'),
            name=f'Points {pid}',
            legendgroup=f'participant_{pid}',
            showlegend=False,
            hovertext=participant_data['Participant']
        ))

    # Add overall (fixed effect) line
    y_fit = fixed_intercept + fixed_slope * x_vals
    fig.add_trace(go.Scatter(
        x=x_vals, y=y_fit,
        mode='lines',
        line=dict(color='black', width=3, dash='dash'),
        name='Overall LMM Fit'
    ))

    # Add a red dotted line at y = 0
    fig.add_shape(
        type="line",
        x0=model_data['Sleep Loss'].min(),
        x1=model_data['Sleep Loss'].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot"),
        xref="x",
        yref="y"
    )

    # Add shaded regions
    shaded_bins = [
        {"label": ">3.5", "x0": 3.5, "x1": model_data['Sleep Loss'].max(), "color": "#FFDDDD"},
        {"label": "2.5-3.5", "x0": 2.5, "x1": 3.5, "color": "#FFEEDD"},
        {"label": "1.5-2.5", "x0": 1.5, "x1": 2.5, "color": "#FFFFDD"},
        {"label": "0.5-1.5", "x0": 0.5, "x1": 1.5, "color": "#EEFFDD"},
        {"label": "-0.5-0.5", "x0": -0.5, "x1": 0.5, "color": "#DDFFEE"},
        {"label": "-1.5--0.5", "x0": -1.5, "x1": -0.5, "color": "#DDEEFF"},
        {"label": "-2.5--1.5", "x0": -2.5, "x1": -1.5, "color": "#DDDDEE"},
        {"label": "-3.5--2.5", "x0": -3.5, "x1": -2.5, "color": "#EEDDEE"},
        {"label": "<-3.5", "x0": model_data['Sleep Loss'].min(), "x1": -3.5, "color": "#FFDDFF"},
    ]

    for bin_def in shaded_bins:
        fig.add_shape(
            type="rect",
            x0=bin_def["x0"],
            x1=bin_def["x1"],
            y0=model_data['Deep Sleep Change (mins)'].min() - 10,
            y1=model_data['Deep Sleep Change (mins)'].max() + 10,
            xref="x",
            yref="y",
            fillcolor=bin_def["color"],
            opacity=0.2,
            layer="below",
            line_width=0
        )

    for bin_def in shaded_bins:
        label_x = (bin_def["x0"] + bin_def["x1"]) / 2
        fig.add_annotation(
            x=label_x,
            y=model_data['Deep Sleep Change (mins)'].max() + 5,
            text=bin_def["label"],
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="center",
            yanchor="bottom"
        )

    # Update layout
    fig.update_layout(
        title="Effect of Sleep Loss on Deep Sleep in First 2 Hours (LMM)",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="Change in Deep Sleep (Next Night - Median) (mins)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    # Save and display plot
    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_vs_median_LMM_graph.html'
    fig.write_html(output_path)
    fig.show()
    print(f"LMM plot saved to: {output_path}")

    # Save detailed results
    output_path_results = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_vs_median_LMM_results.html'
    with open(output_path_results, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())
    print(f"LMM results saved to: {output_path_results}")

    return result.summary()

def summarize_lmm_results(filled_data, all_participants):

    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        # Calculate average sleep across modalities
        participant_data["Average Sleep Between Modalities"] = participant_data[
            ["Actlumus (No Naps)", "GW (Total Sleep)", "Preprocessed", "Survey"]
        ].mean(axis=1, skipna=True)

        # Calculate participant-specific medians
        median_sleep = participant_data["Average Sleep Between Modalities"].median()
        median_deep_sleep_2hr = participant_data["Deep Sleep in First 2 Hours"].median()

        # Calculate next night's deep sleep and deviation from median
        participant_data["Deep Sleep Next Night"] = participant_data["Deep Sleep in First 2 Hours"].shift(-1)
        participant_data["Deep Sleep Change"] = participant_data["Deep Sleep Next Night"] - median_deep_sleep_2hr

        # Calculate sleep loss (positive = sleep loss)
        participant_data["Sleep Loss"] = median_sleep - participant_data["Average Sleep Between Modalities"]

        # Drop rows with missing values and prepare for model
        valid_data = participant_data.dropna(subset=["Deep Sleep Change", "Sleep Loss"]).copy()
        valid_data["Participant"] = valid_data["Participant"].astype(str)

        # Convert deep sleep change to minutes for consistency
        valid_data["Deep Sleep Change (mins)"] = valid_data["Deep Sleep Change"] * 60

        save_checkpoint(participant_data, f'11_lmm_participant_{participant}', print_info=False)

        data_list.append(valid_data[["Participant", "Sleep Loss", "Deep Sleep Change (mins)"]])

    # Combine all participant data
    model_data = pd.concat(data_list, ignore_index=True)

    # Print dataset summary to verify
    print(f"Dataset for LMM contains {len(model_data)} observations from {len(model_data['Participant'].unique())} participants")
    print("Summary statistics:")
    print(model_data.describe())

    # Fit the model with random slopes
    model = smf.mixedlm(
        "Q('Deep Sleep Change (mins)') ~ Q('Sleep Loss')",
        data=model_data,
        groups="Participant",
        re_formula="~Q('Sleep Loss')"  # Add random slopes
    )
    result = model.fit(reml=False)  # REML=FALSE to match R

    # Save results to HTML file
    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_vs_median_LMM.html'
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())
    print(f"LMM results saved to: {output_path}")

    return result.summary()

def run_lmm_from_first_2_hours_vs_current(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()

        df_part['AverageTST'] = df_part[[
            'Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']].mean(axis=1, skipna=True)

        median_tst = df_part['AverageTST'].median()

        df_part['SleepLoss'] = median_tst - df_part['AverageTST']  # +ve = sleep loss
        df_part['DeepSleepNext'] = df_part['Deep Sleep in First 2 Hours'].shift(-1)
        df_part['DeepSleepChangeMins'] = (df_part['DeepSleepNext'] - df_part['Deep Sleep in First 2 Hours']) * 60

        valid = df_part.dropna(subset=['SleepLoss', 'DeepSleepChangeMins']).copy()
        valid['Participant'] = valid['Participant'].astype(str)
        data_list.append(valid[['Participant', 'SleepLoss', 'DeepSleepChangeMins']])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("DeepSleepChangeMins ~ SleepLoss", data=df, groups="Participant", re_formula="~SleepLoss")
    result = model.fit(reml=False)

    # Fixed effects
    fixed_intercept = result.fe_params['Intercept']
    fixed_slope = result.fe_params['SleepLoss']

    # Generate x values once outside the loop
    x_vals = np.linspace(df['SleepLoss'].min(), df['SleepLoss'].max(), 100)

    # Define a dark color palette - using darker versions of standard colors
    dark_colors = [
        '#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b',  # Dark blue, red, green, purple, brown
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#ff7f0e',  # Pink, gray, olive, cyan, orange
        '#1a5276', '#943126', '#196f3d', '#5b2c6f', '#78281f',  # Darker blue, red, green, purple, brown
        '#a93226', '#884ea0', '#17a589', '#d4ac0d', '#ca6f1e',  # More dark red, purple, teal, gold, orange
        '#2e4053', '#633974', '#1e8449', '#9c640c', '#935116',  # Dark slate, grape, forest, amber, rust
    ]

    # Plot participant lines
    fig = go.Figure()

    # Modified: Use the dark color palette for participants
    for i, (pid, re) in enumerate(result.random_effects.items()):
        # Get color index (cycle through colors if more participants than colors)
        color_idx = i % len(dark_colors)
        participant_color = dark_colors[color_idx]

        slope = fixed_slope + re.get('SleepLoss', 0)
        intercept = fixed_intercept + re.get('Intercept', 0)
        y_vals = intercept + slope * x_vals

        # Extract this participant's data points
        participant_data = df[df['Participant'] == pid]

        # Add regression line with dark color
        fig.add_trace(go.Scatter(
            x=x_vals, y=y_vals,
            mode='lines',
            line=dict(width=2, color=participant_color),  # Use darker color and slightly thicker line
            name=f'Participant {pid}',
            legendgroup=f'participant_{pid}',
            hoverinfo='name'
        ))

        # Add this participant's data points with matching color
        fig.add_trace(go.Scatter(
            x=participant_data['SleepLoss'],
            y=participant_data['DeepSleepChangeMins'],
            mode='markers',
            marker=dict(size=6, color=participant_color, opacity=0.7, symbol='circle'),  # Match line color, increase opacity
            name=f'Points {pid}',
            legendgroup=f'participant_{pid}',
            showlegend=False,
            hovertext=participant_data['Participant']
        ))

    # Add fixed effect line
    y_fit = fixed_intercept + fixed_slope * x_vals
    fig.add_trace(go.Scatter(
        x=x_vals, y=y_fit,
        mode='lines',
        line=dict(color='black', width=3, dash='dash'),
        name='Overall LMM Fit'
    ))

    # Add red dotted horizontal line
    fig.add_shape(
        type="line",
        x0=df['SleepLoss'].min(),
        x1=df['SleepLoss'].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot"),
        xref="x",
        yref="y"
    )

    # Background shading for sleep loss bins
    shaded_bins = [
        {"label": ">3.5", "x0": 3.5, "x1": df['SleepLoss'].max(), "color": "#FFDDDD"},
        {"label": "2.5-3.5", "x0": 2.5, "x1": 3.5, "color": "#FFEEDD"},
        {"label": "1.5-2.5", "x0": 1.5, "x1": 2.5, "color": "#FFFFDD"},
        {"label": "0.5-1.5", "x0": 0.5, "x1": 1.5, "color": "#EEFFDD"},
        {"label": "-0.5-0.5", "x0": -0.5, "x1": 0.5, "color": "#DDFFEE"},
        {"label": "-1.5--0.5", "x0": -1.5, "x1": -0.5, "color": "#DDEEFF"},
        {"label": "-2.5--1.5", "x0": -2.5, "x1": -1.5, "color": "#DDDDEE"},
        {"label": "-3.5--2.5", "x0": -3.5, "x1": -2.5, "color": "#EEDDEE"},
        {"label": "<-3.5", "x0": df['SleepLoss'].min(), "x1": -3.5, "color": "#FFDDFF"},
    ]

    for bin_def in shaded_bins:
        fig.add_shape(
            type="rect",
            x0=bin_def["x0"],
            x1=bin_def["x1"],
            y0=df['DeepSleepChangeMins'].min() - 10,
            y1=df['DeepSleepChangeMins'].max() + 10,
            xref="x",
            yref="y",
            fillcolor=bin_def["color"],
            opacity=0.2,
            layer="below",
            line_width=0
        )

    for bin_def in shaded_bins:
        label_x = (bin_def["x0"] + bin_def["x1"]) / 2
        fig.add_annotation(
            x=label_x,
            y=df['DeepSleepChangeMins'].max() + 5,
            text=bin_def["label"],
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="center",
            yanchor="bottom"
        )

    fig.update_layout(
        title="Effect of Sleep Loss on Deep Sleep (First 2 Hours): Change from Current to Next Night",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="Change in Deep Sleep (Next - Current Night) (mins)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_vs_current_LMM_graph.html'
    fig.write_html(output_path)
    fig.show()

    print(f"LMM plot saved to: {output_path}")
    return result.summary()

def summarize_lmm_results_from_current(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        participant_data["Average Sleep Between Modalities"] = participant_data[
            ["Actlumus (No Naps)", "GW (Total Sleep)", "Preprocessed", "Survey"]
        ].mean(axis=1, skipna=True)

        median_sleep = participant_data["Average Sleep Between Modalities"].median()

        participant_data["Sleep Loss (hrs)"] = median_sleep - participant_data["Average Sleep Between Modalities"]

        # Shift to get next night's deep sleep
        participant_data["Deep Sleep (In First Two hrs) Next Night"] = participant_data["Deep Sleep in First 2 Hours"].shift(-1)

        # Y = Next night - Current night
        participant_data["Deep Sleep Change (mins)"] = (participant_data["Deep Sleep (In First Two hrs) Next Night"] - participant_data["Deep Sleep in First 2 Hours"]) * 60

        valid_data = participant_data.dropna(subset=["Sleep Loss (hrs)", "Deep Sleep Change (mins)"]).copy()
        valid_data["Participant"] = valid_data["Participant"].astype(str)

        data_list.append(valid_data[["Participant", "Sleep Loss (hrs)", "Deep Sleep Change (mins)"]])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("Q('Deep Sleep Change (mins)') ~ Q('Sleep Loss (hrs)')", data=df, groups="Participant")
    result = model.fit(reml=False)

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_vs_current_LMM.html'
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())

    print(f"Summary saved to: {output_path}")
    return result.summary()

def run_lmm_stroop_interference_vs_sleep_loss(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()

        df_part['AverageTST'] = df_part[[
            'Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey'
        ]].mean(axis=1, skipna=True)

        median_tst = df_part['AverageTST'].median()

        # Sleep loss = median - actual, so positive = sleep loss
        df_part['SleepLoss'] = median_tst - df_part['AverageTST']

        # We are analyzing Stroop Interference from the same night (no shifting)
        valid = df_part.dropna(subset=['SleepLoss', 'Interference Effect (ms)']).copy()
        valid['Participant'] = valid['Participant'].astype(str)
        valid['Interference Effect (ms)'] = pd.to_numeric(valid['Interference Effect (ms)'], errors='coerce')

        data_list.append(valid[['Participant', 'SleepLoss', 'Interference Effect (ms)']])

    df = pd.concat(data_list, ignore_index=True)
    df = df.dropna(subset=['SleepLoss', 'Interference Effect (ms)'])

    model = smf.mixedlm("Q('Interference Effect (ms)') ~ SleepLoss", data=df, groups="Participant", re_formula="~SleepLoss")
    result = model.fit(reml=False)

    # Fixed effects
    fixed_intercept = result.fe_params['Intercept']
    fixed_slope = result.fe_params['SleepLoss']

    # Generate x values once outside the loop
    x_vals = np.linspace(df['SleepLoss'].min(), df['SleepLoss'].max(), 100)

    # Define a dark color palette - using darker versions of standard colors
    dark_colors = [
        '#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b',  # Dark blue, red, green, purple, brown
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#ff7f0e',  # Pink, gray, olive, cyan, orange
        '#1a5276', '#943126', '#196f3d', '#5b2c6f', '#78281f',  # Darker blue, red, green, purple, brown
        '#a93226', '#884ea0', '#17a589', '#d4ac0d', '#ca6f1e',  # More dark red, purple, teal, gold, orange
        '#2e4053', '#633974', '#1e8449', '#9c640c', '#935116',  # Dark slate, grape, forest, amber, rust
    ]

    # Plot all participant lines
    fig = go.Figure()

    # Modified: Use the dark color palette for participants
    for i, (pid, re) in enumerate(result.random_effects.items()):
        # Get color index (cycle through colors if more participants than colors)
        color_idx = i % len(dark_colors)
        participant_color = dark_colors[color_idx]

        slope = fixed_slope + re.get('SleepLoss', 0)
        intercept = fixed_intercept + re.get('Intercept', 0)
        y_vals = intercept + slope * x_vals

        # Extract this participant's data points
        participant_data = df[df['Participant'] == pid]

        # Add regression line with dark color
        fig.add_trace(go.Scatter(
            x=x_vals, y=y_vals,
            mode='lines',
            line=dict(width=2, color=participant_color),  # Use darker color and slightly thicker line
            name=f'Participant {pid}',
            legendgroup=f'participant_{pid}',
            hoverinfo='name'
        ))

        # Add this participant's data points with matching color
        fig.add_trace(go.Scatter(
            x=participant_data['SleepLoss'],
            y=participant_data['Interference Effect (ms)'],
            mode='markers',
            marker=dict(size=6, color=participant_color, opacity=0.7, symbol='circle'),  # Match line color, increase opacity
            name=f'Points {pid}',
            legendgroup=f'participant_{pid}',
            showlegend=False,
            hovertext=participant_data['Participant']
        ))

    # Overall fixed effect line
    y_fit = fixed_intercept + fixed_slope * x_vals
    fig.add_trace(go.Scatter(
        x=x_vals, y=y_fit,
        mode='lines',
        line=dict(color='black', width=3, dash='dash'),
        name='Overall LMM Fit'
    ))

    # Add red horizontal line at y = 0
    fig.add_shape(
        type="line",
        x0=df['SleepLoss'].min(),
        x1=df['SleepLoss'].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot"),
        xref="x",
        yref="y"
    )

    # Shaded SleepLoss bins
    shaded_bins = [
        {"label": ">3.5", "x0": 3.5, "x1": df['SleepLoss'].max(), "color": "#FFDDDD"},
        {"label": "2.5-3.5", "x0": 2.5, "x1": 3.5, "color": "#FFEEDD"},
        {"label": "1.5-2.5", "x0": 1.5, "x1": 2.5, "color": "#FFFFDD"},
        {"label": "0.5-1.5", "x0": 0.5, "x1": 1.5, "color": "#EEFFDD"},
        {"label": "-0.5-0.5", "x0": -0.5, "x1": 0.5, "color": "#DDFFEE"},
        {"label": "-1.5--0.5", "x0": -1.5, "x1": -0.5, "color": "#DDEEFF"},
        {"label": "-2.5--1.5", "x0": -2.5, "x1": -1.5, "color": "#DDDDEE"},
        {"label": "-3.5--2.5", "x0": -3.5, "x1": -2.5, "color": "#EEDDEE"},
        {"label": "<-3.5", "x0": df['SleepLoss'].min(), "x1": -3.5, "color": "#FFDDFF"},
    ]

    for bin_def in shaded_bins:
        fig.add_shape(
            type="rect",
            x0=bin_def["x0"],
            x1=bin_def["x1"],
            y0=df['Interference Effect (ms)'].min() - 50,
            y1=df['Interference Effect (ms)'].max() + 50,
            xref="x",
            yref="y",
            fillcolor=bin_def["color"],
            opacity=0.2,
            layer="below",
            line_width=0
        )
        label_x = (bin_def["x0"] + bin_def["x1"]) / 2
        fig.add_annotation(
            x=label_x,
            y=df['Interference Effect (ms)'].max() + 40,
            text=bin_def["label"],
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="center",
            yanchor="bottom"
        )

    fig.update_layout(
        title="Effect of Sleep Loss on Stroop Interference Effect (Same Night)",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="Stroop Interference Effect (ms)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\stroop_interference_vs_sleep_loss_LMM_graph.html'
    fig.write_html(output_path)
    fig.show()

    print(f"LMM plot saved to: {output_path}")
    return fig, result.summary()

def summarize_lmm_stroop_interference_from_current(filled_data, all_participants):

    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        participant_data["Average Sleep Between Modalities"] = participant_data[
            ["Actlumus (No Naps)", "GW (Total Sleep)", "Preprocessed", "Survey"]
        ].mean(axis=1, skipna=True)

        median_sleep = participant_data["Average Sleep Between Modalities"].median()
        participant_data["Sleep Loss (hrs)"] = median_sleep - participant_data["Average Sleep Between Modalities"]

        valid_data = participant_data.dropna(subset=["Sleep Loss (hrs)", "Interference Effect (ms)"]).copy()
        valid_data["Interference Effect (ms)"] = pd.to_numeric(valid_data["Interference Effect (ms)"], errors="coerce")
        valid_data["Participant"] = valid_data["Participant"].astype(str)

        data_list.append(valid_data[["Participant", "Sleep Loss (hrs)", "Interference Effect (ms)"]])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("Q('Interference Effect (ms)') ~ Q('Sleep Loss (hrs)')", data=df, groups="Participant")
    result = model.fit(reml=False)

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\stroop_interference_from_current_LMM.html'
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())

    print(f"Summary saved to: {output_path}")
    return result.summary()

def run_lmm_stroop_ratio_score(filled_data, all_participants):
    data_list = []
    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()
        df_part['AverageTST'] = df_part[['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']].mean(axis=1, skipna=True)
        median_tst = df_part['AverageTST'].median()
        df_part['SleepLoss'] = median_tst - df_part['AverageTST']  # +ve = sleep loss
        valid = df_part.dropna(subset=['SleepLoss', 'Ratio Interference Score']).copy()
        valid['Participant'] = valid['Participant'].astype(str)
        data_list.append(valid[['Participant', 'SleepLoss', 'Ratio Interference Score']])
    df = pd.concat(data_list, ignore_index=True)
    model = smf.mixedlm("Q('Ratio Interference Score') ~ SleepLoss", data=df, groups="Participant", re_formula="~SleepLoss")
    result = model.fit(reml=False)

    # Extract fixed effects
    fixed_intercept = result.fe_params['Intercept']
    fixed_slope = result.fe_params['SleepLoss']

    # Generate x values once outside the loop
    x_vals = np.linspace(df['SleepLoss'].min(), df['SleepLoss'].max(), 100)

    # Define a dark color palette - using darker versions of standard colors
    dark_colors = [
        '#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b',  # Dark blue, red, green, purple, brown
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#ff7f0e',  # Pink, gray, olive, cyan, orange
        '#1a5276', '#943126', '#196f3d', '#5b2c6f', '#78281f',  # Darker blue, red, green, purple, brown
        '#a93226', '#884ea0', '#17a589', '#d4ac0d', '#ca6f1e',  # More dark red, purple, teal, gold, orange
        '#2e4053', '#633974', '#1e8449', '#9c640c', '#935116',  # Dark slate, grape, forest, amber, rust
    ]

    # Plot all participant lines
    fig = go.Figure()

    # Modified: Use the dark color palette for participants
    for i, (pid, re) in enumerate(result.random_effects.items()):
        # Get color index (cycle through colors if more participants than colors)
        color_idx = i % len(dark_colors)
        participant_color = dark_colors[color_idx]

        slope = fixed_slope + re.get('SleepLoss', 0)
        intercept = fixed_intercept + re.get('Intercept', 0)
        y_vals = intercept + slope * x_vals

        # Extract this participant's data points
        participant_data = df[df['Participant'] == pid]

        # Add regression line with dark color
        fig.add_trace(go.Scatter(
            x=x_vals, y=y_vals,
            mode='lines',
            line=dict(width=2, color=participant_color),  # Use darker color and slightly thicker line
            name=f'Participant {pid}',
            legendgroup=f'participant_{pid}',
            hoverinfo='name'
        ))

        # Add this participant's data points with matching color
        fig.add_trace(go.Scatter(
            x=participant_data['SleepLoss'],
            y=participant_data['Ratio Interference Score'],
            mode='markers',
            marker=dict(size=6, color=participant_color, opacity=0.7, symbol='circle'),  # Match line color, increase opacity
            name=f'Points {pid}',
            legendgroup=f'participant_{pid}',
            showlegend=False,
            hovertext=participant_data['Participant']
        ))

    # Add overall (fixed effect) line
    y_fit = fixed_intercept + fixed_slope * x_vals
    fig.add_trace(go.Scatter(
        x=x_vals, y=y_fit,
        mode='lines',
        line=dict(color='black', width=3, dash='dash'),
        name='Overall LMM Fit'
    ))

    # Red line at y = 0
    fig.add_shape(
        type="line",
        x0=df['SleepLoss'].min(),
        x1=df['SleepLoss'].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot"),
        xref="x",
        yref="y"
    )

    # Shaded SleepLoss bins
    shaded_bins = [
        {"label": ">3.5", "x0": 3.5, "x1": df['SleepLoss'].max(), "color": "#FFDDDD"},
        {"label": "2.5-3.5", "x0": 2.5, "x1": 3.5, "color": "#FFEEDD"},
        {"label": "1.5-2.5", "x0": 1.5, "x1": 2.5, "color": "#FFFFDD"},
        {"label": "0.5-1.5", "x0": 0.5, "x1": 1.5, "color": "#EEFFDD"},
        {"label": "-0.5-0.5", "x0": -0.5, "x1": 0.5, "color": "#DDFFEE"},
        {"label": "-1.5--0.5", "x0": -1.5, "x1": -0.5, "color": "#DDEEFF"},
        {"label": "-2.5--1.5", "x0": -2.5, "x1": -1.5, "color": "#DDDDEE"},
        {"label": "-3.5--2.5", "x0": -3.5, "x1": -2.5, "color": "#EEDDEE"},
        {"label": "<-3.5", "x0": df['SleepLoss'].min(), "x1": -3.5, "color": "#FFDDFF"},
    ]

    for bin_def in shaded_bins:
        fig.add_shape(
            type="rect",
            x0=bin_def["x0"],
            x1=bin_def["x1"],
            y0=df['Ratio Interference Score'].min() - 0.05,
            y1=df['Ratio Interference Score'].max() + 0.05,
            xref="x",
            yref="y",
            fillcolor=bin_def["color"],
            opacity=0.2,
            layer="below",
            line_width=0
        )
        label_x = (bin_def["x0"] + bin_def["x1"]) / 2
        fig.add_annotation(
            x=label_x,
            y=df['Ratio Interference Score'].max() + 0.04,
            text=bin_def["label"],
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="center",
            yanchor="bottom"
        )

    fig.update_layout(
        title="Effect of Sleep Loss on Stroop Ratio Interference Score (LMM)",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="Stroop Ratio Interference Score",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\stroop_ratio_interference_LMM_graph.html'
    fig.write_html(output_path)
    fig.show()
    print(f"LMM plot saved to: {output_path}")
    return result.summary()

def summarize_lmm_stroop_ratio_score(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()
        df_part['AverageTST'] = df_part[['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']].mean(axis=1, skipna=True)
        median_tst = df_part['AverageTST'].median()

        df_part['SleepLoss'] = median_tst - df_part['AverageTST']
        valid = df_part.dropna(subset=['SleepLoss', 'Ratio Interference Score']).copy()
        valid['Participant'] = valid['Participant'].astype(str)
        data_list.append(valid[['Participant', 'SleepLoss', 'Ratio Interference Score']])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("Q('Ratio Interference Score') ~ SleepLoss", data=df, groups="Participant", re_formula="~SleepLoss")
    result = model.fit(reml=False)

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\stroop_ratio_interference_LMM_summary.html'
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())

    print(f"LMM summary saved to: {output_path}")
    return result.summary()

def run_lmm_from_first_2_hours_percentchange(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        df_part = filled_data[filled_data['Participant'] == participant].copy()

        df_part['AverageTST'] = df_part[[
            'Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']].mean(axis=1, skipna=True)

        median_tst = df_part['AverageTST'].median()

        df_part['SleepLoss'] = median_tst - df_part['AverageTST']  # +ve = sleep loss
        df_part['PercentDeepSleepNext'] = df_part['Percentage Deep Sleep First 2 Hours'].shift(-1)
        df_part['DeepSleepPercentChange'] = (df_part['PercentDeepSleepNext'] - df_part['Percentage Deep Sleep First 2 Hours'])

        valid = df_part.dropna(subset=['SleepLoss', 'DeepSleepPercentChange']).copy()
        valid['Participant'] = valid['Participant'].astype(str)
        data_list.append(valid[['Participant', 'SleepLoss', 'DeepSleepPercentChange']])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("DeepSleepPercentChange ~ SleepLoss", data=df, groups="Participant", re_formula="~SleepLoss")
    result = model.fit(reml=False)

    # Fixed effects
    fixed_intercept = result.fe_params['Intercept']
    fixed_slope = result.fe_params['SleepLoss']

    # Generate x values once outside the loop
    x_vals = np.linspace(df['SleepLoss'].min(), df['SleepLoss'].max(), 100)

    # Define a dark color palette - using darker versions of standard colors
    dark_colors = [
        '#1f77b4', '#d62728', '#2ca02c', '#9467bd', '#8c564b',  # Dark blue, red, green, purple, brown
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#ff7f0e',  # Pink, gray, olive, cyan, orange
        '#1a5276', '#943126', '#196f3d', '#5b2c6f', '#78281f',  # Darker blue, red, green, purple, brown
        '#a93226', '#884ea0', '#17a589', '#d4ac0d', '#ca6f1e',  # More dark red, purple, teal, gold, orange
        '#2e4053', '#633974', '#1e8449', '#9c640c', '#935116',  # Dark slate, grape, forest, amber, rust
    ]

    # Plot participant lines
    fig = go.Figure()

    # Modified: Use the dark color palette for participants
    for i, (pid, re) in enumerate(result.random_effects.items()):
        # Get color index (cycle through colors if more participants than colors)
        color_idx = i % len(dark_colors)
        participant_color = dark_colors[color_idx]

        slope = fixed_slope + re.get('SleepLoss', 0)
        intercept = fixed_intercept + re.get('Intercept', 0)
        y_vals = intercept + slope * x_vals

        # Extract this participant's data points
        participant_data = df[df['Participant'] == pid]

        # Add regression line with dark color
        fig.add_trace(go.Scatter(
            x=x_vals, y=y_vals,
            mode='lines',
            line=dict(width=2, color=participant_color),  # Use darker color and slightly thicker line
            name=f'Participant {pid}',
            legendgroup=f'participant_{pid}',
            hoverinfo='name'
        ))

        # Add this participant's data points with matching color
        fig.add_trace(go.Scatter(
            x=participant_data['SleepLoss'],
            y=participant_data['DeepSleepPercentChange'],
            mode='markers',
            marker=dict(size=6, color=participant_color, opacity=0.7, symbol='circle'),  # Match line color, increase opacity
            name=f'Points {pid}',
            legendgroup=f'participant_{pid}',
            showlegend=False,
            hovertext=participant_data['Participant']
        ))

    # Add fixed effect line
    y_fit = fixed_intercept + fixed_slope * x_vals
    fig.add_trace(go.Scatter(
        x=x_vals, y=y_fit,
        mode='lines',
        line=dict(color='black', width=3, dash='dash'),
        name='Overall LMM Fit'
    ))

    # Add red dotted horizontal line
    fig.add_shape(
        type="line",
        x0=df['SleepLoss'].min(),
        x1=df['SleepLoss'].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot"),
        xref="x",
        yref="y"
    )

    # Background shading for sleep loss bins
    shaded_bins = [
        {"label": ">3.5", "x0": 3.5, "x1": df['SleepLoss'].max(), "color": "#FFDDDD"},
        {"label": "2.5-3.5", "x0": 2.5, "x1": 3.5, "color": "#FFEEDD"},
        {"label": "1.5-2.5", "x0": 1.5, "x1": 2.5, "color": "#FFFFDD"},
        {"label": "0.5-1.5", "x0": 0.5, "x1": 1.5, "color": "#EEFFDD"},
        {"label": "-0.5-0.5", "x0": -0.5, "x1": 0.5, "color": "#DDFFEE"},
        {"label": "-1.5--0.5", "x0": -1.5, "x1": -0.5, "color": "#DDEEFF"},
        {"label": "-2.5--1.5", "x0": -2.5, "x1": -1.5, "color": "#DDDDEE"},
        {"label": "-3.5--2.5", "x0": -3.5, "x1": -2.5, "color": "#EEDDEE"},
        {"label": "<-3.5", "x0": df['SleepLoss'].min(), "x1": -3.5, "color": "#FFDDFF"},
    ]

    for bin_def in shaded_bins:
        fig.add_shape(
            type="rect",
            x0=bin_def["x0"],
            x1=bin_def["x1"],
            y0=df['DeepSleepPercentChange'].min() - 10,
            y1=df['DeepSleepPercentChange'].max() + 10,
            xref="x",
            yref="y",
            fillcolor=bin_def["color"],
            opacity=0.2,
            layer="below",
            line_width=0
        )

    for bin_def in shaded_bins:
        label_x = (bin_def["x0"] + bin_def["x1"]) / 2
        fig.add_annotation(
            x=label_x,
            y=df['DeepSleepPercentChange'].max() + 5,
            text=bin_def["label"],
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="center",
            yanchor="bottom"
        )

    fig.update_layout(
        title="Effect of Sleep Loss on Deep Sleep (First 2 Hours) Percentage: Change from Current to Next Night",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="Change in Deep Sleep Percentage (DS in first 2hrs / TST) (Next - Current Night)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_percentchange_LMM_graph.html'
    fig.write_html(output_path)
    fig.show()

    return result.summary()

def summarize_lmm_results_from_percentchange(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        participant_data["Average Sleep Between Modalities"] = participant_data[
            ["Actlumus (No Naps)", "GW (Total Sleep)", "Preprocessed", "Survey"]
        ].mean(axis=1, skipna=True)

        median_sleep = participant_data["Average Sleep Between Modalities"].median()

        participant_data["Sleep Loss (hrs)"] = median_sleep - participant_data["Average Sleep Between Modalities"]

        # Shift to get next night's deep sleep
        participant_data["PercentDeepSleepNext"] = participant_data["Percentage Deep Sleep First 2 Hours"].shift(-1)

        # Y = Next night - Current night
        participant_data["DeepSleepPercentChange"] = (participant_data["PercentDeepSleepNext"] - participant_data["Percentage Deep Sleep First 2 Hours"])

        valid_data = participant_data.dropna(subset=["Sleep Loss (hrs)", "DeepSleepPercentChange"]).copy()
        valid_data["Participant"] = valid_data["Participant"].astype(str)

        data_list.append(valid_data[["Participant", "Sleep Loss (hrs)", "DeepSleepPercentChange"]])

    df = pd.concat(data_list, ignore_index=True)

    model = smf.mixedlm("Q('DeepSleepPercentChange') ~ Q('Sleep Loss (hrs)')", data=df, groups="Participant")
    result = model.fit(reml=False)

    output_path = r'C:\Users\danie\Downloads\SleepData\Analysis\LMM\first2hrs_percentchange_LMM.html'
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.summary().as_html())

    return result.summary()

def generalized_deep_sleep_change_first_2_hours_table(filled_data, all_participants):

    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift to get next night
        participant_data['Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Compute deep sleep change
        participant_data['Deep Sleep Change (In First Two hrs)'] = participant_data['Deep Sleep (In First Two hrs) Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep Change (In First Two hrs)', 'Average Sleep Between Modalities'])

        # Identify short nights (>=0.5hrs less than the median)
        valid_data = valid_data[valid_data['Average Sleep Between Modalities'] < (median_sleep - 0.5)]

        # Append data
        data_list.append(valid_data[['Deep Sleep Change (In First Two hrs)']])

    # Combine participant data
    if not data_list:
        print("No valid data found for deep sleep change in first 2 hours.")
        return

    combined_data = pd.concat(data_list, ignore_index=True)

    # Compute Mean, STD, and SE for all short nights
    summary_table = pd.DataFrame({
        "Deep Sleep Change in First 2 Hours Mean (hrs)": [combined_data['Deep Sleep Change (In First Two hrs)'].mean()],
        "Deep Sleep Change in First 2 Hours Median (hrs)": [combined_data['Deep Sleep Change (In First Two hrs)'].median()],
        "STD (hrs)": [combined_data['Deep Sleep Change (In First Two hrs)'].std()],
        "Count": [combined_data.shape[0]]
    })

    # Calculate SE
    summary_table["SE (hrs)"] = summary_table["STD (hrs)"] / np.sqrt(summary_table["Count"])

    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\general_deep_sleep_change_first_2_hours_table.html'

    # Save table as an HTML file
    summary_table.to_html(html_file_path, index=False, float_format="%.2f")

    return summary_table

def absolute_deep_sleep_first_2_hours_table(filled_data, all_participants):

    short_night_data = []
    all_night_data = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift deep sleep in the first 2 hours to get the following night's value
        participant_data['Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep (In First Two hrs) Next Night', 'Average Sleep Between Modalities'])

        # Identify short nights (>=0.5hrs less than the median)
        short_nights = valid_data[valid_data['Average Sleep Between Modalities'] < (median_sleep - 0.5)]

        # Store deep sleep values following short nights
        short_night_data.extend(short_nights['Deep Sleep (In First Two hrs) Next Night'].tolist())

        # Store deep sleep values for all nights (regardless of previous sleep)
        all_night_data.extend(valid_data['Deep Sleep (In First Two hrs) Next Night'].tolist())

    # Convert lists to Pandas Series
    short_night_series = pd.Series(short_night_data)
    all_night_series = pd.Series(all_night_data)

     # Convert summaries into Pandas DataFrames before concatenation
    short_night_summary = pd.DataFrame({
        "Condition": ["Following a Short Night"],
        "Mean Deep Sleep (hrs)": [short_night_series.mean()],
        "Median Deep Sleep (hrs)": [short_night_series.median()],
        "STD (hrs)": [short_night_series.std()],
        "Count": [short_night_series.count()],
        "SE (hrs)": [short_night_series.std() / np.sqrt(short_night_series.count())]
    })

    all_night_summary = pd.DataFrame({
        "Condition": ["All Nights"],
        "Mean Deep Sleep (hrs)": [all_night_series.mean()],
        "Median Deep Sleep (hrs)": [all_night_series.median()],
        "STD (hrs)": [all_night_series.std()],
        "Count": [all_night_series.count()],
        "SE (hrs)": [all_night_series.std() / np.sqrt(all_night_series.count())]
    })

    # Combine both summaries into a single DataFrame
    summary_table = pd.concat([short_night_summary, all_night_summary], ignore_index=True)

    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\absolute_deep_sleep_first_2_hours_table.html'

     # Save table as an HTML file
    summary_table.to_html(html_file_path, index=False, float_format="%.2f")

    return summary_table

def paired_t_test_deep_sleep_first_2_hours_difference(filled_data, all_participants):
    # Ensure correct date column usage
    date_column = 'Date of Sleep' if 'Date of Sleep' in filled_data.columns else 'Date'

    if date_column not in filled_data.columns:
        raise KeyError(f"Neither 'Date of Sleep' nor 'Date' found in DataFrame columns: {filled_data.columns}")

    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()
        participant_data[date_column] = pd.to_datetime(participant_data[date_column], errors='coerce')

        # Compute average sleep across modalities
        participant_data.loc[:, 'Average Sleep Between Modalities'] = participant_data[
            ['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']
        ].mean(axis=1, skipna=True)

        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Shift deep sleep values for next night comparison
        participant_data.loc[:, 'Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)
        participant_data.loc[:, 'Next Night Date'] = participant_data[date_column].shift(-1)

        # Remove rows with missing values
        valid_data = participant_data.dropna(subset=['Deep Sleep in First 2 Hours', 'Deep Sleep (In First Two hrs) Next Night', 'Average Sleep Between Modalities', 'Next Night Date']).copy()
        valid_data['Next Night Date'] = pd.to_datetime(valid_data['Next Night Date'], errors='coerce')

        # Identify short nights (>=0.5 hrs below median)
        short_nights = valid_data[valid_data['Average Sleep Between Modalities'] < (median_sleep - 0.5)].copy()

        # Ensure the next night is actually the following calendar day
        short_nights = short_nights[short_nights["Next Night Date"] == short_nights[date_column] + pd.Timedelta(days=1)].copy()

        data_list.append(short_nights[['Deep Sleep (In First Two hrs) Next Night', 'Deep Sleep in First 2 Hours']])

    if not data_list:
        return "No valid short nights found for paired t-test."

    combined_data = pd.concat(data_list, ignore_index=True)

    # Compute deep sleep change
    combined_data.loc[:, "Deep Sleep Change (In First Two hrs)"] = combined_data["Deep Sleep (In First Two hrs) Next Night"] - combined_data["Deep Sleep in First 2 Hours"]

    # Perform paired t-test with corrected input order
    t_stat, p_value_two_tailed = stats.ttest_rel(
        combined_data["Deep Sleep (In First Two hrs) Next Night"], 
        combined_data["Deep Sleep in First 2 Hours"], 
        nan_policy='omit'
    )

    # Convert to one-tailed p-value (expecting an increase in deep sleep)
    p_value_one_tailed = p_value_two_tailed / 2 if t_stat > 0 else 1 - (p_value_two_tailed / 2)

    # Perform one-sample t-test on deep sleep change
    one_sample_t_stat, one_sample_p_value = stats.ttest_1samp(
        combined_data["Deep Sleep Change (In First Two hrs)"], popmean=0
    )

    # Compute summary statistics
    summary_table = pd.DataFrame({
        "Metric": ["Mean", "Median", "STD", "Count", "Paired t-value", "Paired p-value (one-tailed)", "One-sample t-value", "One-sample p-value"],
        "Deep Sleep Change in First 2 Hours": [
            combined_data["Deep Sleep Change (In First Two hrs)"].mean(),
            combined_data["Deep Sleep Change (In First Two hrs)"].median(),
            combined_data["Deep Sleep Change (In First Two hrs)"].std(),
            combined_data.shape[0],
            t_stat,
            p_value_one_tailed,
            one_sample_t_stat,
            one_sample_p_value
        ]
    })

    # Determine statistical significance
    significance_level = 0.05  
    test_type = "One-tailed (expecting increase)"
    result = "Statistically Significant (Reject Null Hypothesis)" if p_value_one_tailed < significance_level else "Not Statistically Significant (Fail to Reject Null Hypothesis)"

    summary_table.loc[len(summary_table)] = ["Test Type", test_type]
    summary_table.loc[len(summary_table)] = ["Conclusion", result]

    # Save results as an HTML file with correct float formatting
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\T_Test\paired_t_test_difference_results.html'
    summary_table.to_html(html_file_path, index=False, float_format=lambda x: f"{x:.4f}" if isinstance(x, (int, float)) else x)

    return summary_table

def one_sample_t_test_deep_sleep_median(filled_data, all_participants):
    data_list = []
    for participant in all_participants:
        # Create a proper copy right at the start
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        # Ensure 'Average Sleep Between Modalities' is computed using .loc
        participant_data.loc[:, "Average Sleep Between Modalities"] = participant_data[
            ["Actlumus (No Naps)", "GW (Total Sleep)", "Preprocessed", "Survey"]
        ].mean(axis=1, skipna=True)

        # Calculate median sleep for this participant
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Compute participant-specific median for deep sleep in first 2 hours (across all nights)
        median_deep_sleep = participant_data["Deep Sleep in First 2 Hours"].median()

        # Shift deep sleep to get next night's value (for consistency with paired test) using .loc
        participant_data.loc[:, 'Deep Sleep (In First Two hrs) Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)

        # Remove rows with missing values (same as paired test)
        valid_data = participant_data.dropna(subset=['Deep Sleep in First 2 Hours', 'Deep Sleep (In First Two hrs) Next Night', 'Average Sleep Between Modalities'])

        # Identify short nights using the same criteria as the paired test (>=0.5 hrs below the median)
        short_nights = valid_data[valid_data['Average Sleep Between Modalities'] < (median_sleep - 0.5)].copy()

        # Compute the difference from the participant's median deep sleep for the night AFTER the short night using .loc
        if not short_nights.empty:
            short_nights.loc[:, "Difference from Median"] = short_nights['Deep Sleep (In First Two hrs) Next Night'] - median_deep_sleep
            data_list.append(short_nights["Difference from Median"])

    # Check if we have any data to analyze
    if not data_list:
        return pd.DataFrame({
            "Metric": ["Error"], 
            "Value": ["No valid data found for analysis."]
        })

    # Combine all valid data across participants
    combined_data = pd.concat(data_list, ignore_index=True)

    # Perform one-sample t-test (H0: mean difference = 0)
    t_stat, p_value = stats.ttest_1samp(combined_data, 0, alternative='greater')  # One-tailed test

    # Compute summary statistics
    summary_table = pd.DataFrame({
        "Metric": ["Mean", "Median", "STD", "Variance", "Count", "t-value", "p-value"],
        "Value": [
            combined_data.mean(),
            combined_data.median(),
            combined_data.std(),
            combined_data.var(),
            combined_data.shape[0],  # Count of valid observations
            t_stat,
            p_value
        ]
    })

    # Define output file path
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\T_Test\one_sample_t_test_deep_sleep_median.html'

    # Save as HTML file
    summary_table.to_html(html_file_path, index=False, float_format="%.4f")

    return summary_table

def create_relative_sleep_loss_violin_plot_Actlumus():
    create_relative_sleep_loss_violin_plot_per_modality('Actlumus (No Naps)', 'relative_sleep_loss_violin_Actlumus.html')

def create_relative_sleep_loss_violin_plot_GW():
    create_relative_sleep_loss_violin_plot_per_modality('GW (Total Sleep)', 'relative_sleep_loss_violin_GW.html')

def create_relative_sleep_loss_violin_plot_Preprocessed():
    create_relative_sleep_loss_violin_plot_per_modality('Preprocessed', 'relative_sleep_loss_violin_Preprocessed.html')

def create_relative_sleep_loss_violin_plot_Survey():
    create_relative_sleep_loss_violin_plot_per_modality('Survey', 'relative_sleep_loss_violin_Survey.html')

def create_relative_sleep_loss_violin_plot_per_modality(modality_column, filename):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data.loc[filled_data['Participant'] == participant].copy()

        # Ensure columns exist before using them
        if modality_column not in participant_data.columns or 'GW (Deep Sleep)' not in participant_data.columns:
            continue  # Skip participant if required data is missing

        # Get median sleep for this specific modality
        median_sleep = participant_data[modality_column].median()

        # Compute Deep Sleep % safely, avoiding division by NaN
        participant_data.loc[:, 'Deep Sleep %'] = (participant_data['GW (Deep Sleep)'] / participant_data[modality_column]).fillna(0) * 100

        # Compute Deep Sleep % Change (Next Night - Current Night)
        participant_data['Deep Sleep % Next Night'] = participant_data['Deep Sleep %'].shift(-1)
        participant_data['Deep Sleep % Change'] = participant_data['Deep Sleep % Next Night'] - participant_data['Deep Sleep %']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep % Change']).copy()

        # Create new bins for this specific modality
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data[modality_column].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        valid_data = valid_data.dropna(subset=['Relative Sleep Change']).copy()
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep % Change']])

    # Ensure there's data before concatenating
    if not data_list:
        print(f"No valid data for {modality_column}. Skipping graph.")
        return

    violin_data = pd.concat(data_list, ignore_index=True)
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    fig = px.violin(violin_data, x='Relative Sleep Change', y='Deep Sleep % Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title=f"{modality_column} Sleep Deviation vs. Deep Sleep % Change")

    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=7.5, y0=0, y1=0, line=dict(color="red", width=2, dash="dash")))

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation",
        yaxis_title="Deep Sleep % Change",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    fig.write_html(f'C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Modality_Comparisons\\{filename}')
    fig.show()

def create_deep_sleep_first_2_hours_violin_plot_Actlumus():
    create_deep_sleep_first_2_hours_violin_plot_per_modality('Actlumus (No Naps)', 'deep_sleep_first_2_hours_violin_Actlumus.html')

def create_deep_sleep_first_2_hours_violin_plot_GW():
    create_deep_sleep_first_2_hours_violin_plot_per_modality('GW (Total Sleep)', 'deep_sleep_first_2_hours_violin_GW.html')

def create_deep_sleep_first_2_hours_violin_plot_Preprocessed():
    create_deep_sleep_first_2_hours_violin_plot_per_modality('Preprocessed', 'deep_sleep_first_2_hours_violin_Preprocessed.html')

def create_deep_sleep_first_2_hours_violin_plot_Survey():
    create_deep_sleep_first_2_hours_violin_plot_per_modality('Survey', 'deep_sleep_first_2_hours_violin_Survey.html')

def create_deep_sleep_first_2_hours_violin_plot_per_modality(modality_column, filename):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data.loc[filled_data['Participant'] == participant].copy()

        # Ensure column exists before using it
        if modality_column not in participant_data.columns or 'Deep Sleep in First 2 Hours' not in participant_data.columns:
            continue  # Skip participant if required data is missing

        # Get median sleep for this specific modality
        median_sleep = participant_data[modality_column].median()

        # Compute Deep Sleep First 2 Hours Change
        participant_data['Deep Sleep First 2 Hours Next Night'] = participant_data['Deep Sleep in First 2 Hours'].shift(-1)
        participant_data['Deep Sleep First 2 Hours Change'] = participant_data['Deep Sleep First 2 Hours Next Night'] - participant_data['Deep Sleep in First 2 Hours']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Deep Sleep First 2 Hours Change']).copy()

        # Create new bins for this specific modality
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data[modality_column].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        valid_data = valid_data.dropna(subset=['Relative Sleep Change']).copy()
        data_list.append(valid_data[['Relative Sleep Change', 'Deep Sleep First 2 Hours Change']])

    # Ensure there's data before concatenating
    if not data_list:
        print(f"No valid data for {modality_column}. Skipping graph.")
        return

    violin_data = pd.concat(data_list, ignore_index=True)
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    fig = px.violin(violin_data, x='Relative Sleep Change', y='Deep Sleep First 2 Hours Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title=f"{modality_column} Sleep Deviation vs. Deep Sleep First 2 Hours Change")

    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=7.5, y0=0, y1=0, line=dict(color="red", width=2, dash="dash")))

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation",
        yaxis_title="Deep Sleep First 2 Hours Change",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    fig.write_html(f'C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Modality_Comparisons\\{filename}')
    fig.show()

def create_deep_sleep_violin_plot_Actlumus():
    create_deep_sleep_violin_plot_per_modality('Actlumus (No Naps)', 'deep_sleep_violin_Actlumus.html')

def create_deep_sleep_violin_plot_GW():
    create_deep_sleep_violin_plot_per_modality('GW (Total Sleep)', 'deep_sleep_violin_GW.html')

def create_deep_sleep_violin_plot_Preprocessed():
    create_deep_sleep_violin_plot_per_modality('Preprocessed', 'deep_sleep_violin_Preprocessed.html')

def create_deep_sleep_violin_plot_Survey():
    create_deep_sleep_violin_plot_per_modality('Survey', 'deep_sleep_violin_Survey.html')

def create_deep_sleep_violin_plot_per_modality(modality_column, filename):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data.loc[filled_data['Participant'] == participant].copy()

        # Ensure column exists before using it
        if modality_column not in participant_data.columns or 'GW (Deep Sleep)' not in participant_data.columns:
            continue  # Skip participant if required data is missing

        # Get median sleep for this specific modality
        median_sleep = participant_data[modality_column].median()

        # Compute Deep Sleep Change
        participant_data['GW Deep Sleep Next Night'] = participant_data['GW (Deep Sleep)'].shift(-1)
        participant_data['GW Deep Sleep Change'] = participant_data['GW Deep Sleep Next Night'] - participant_data['GW (Deep Sleep)']

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['GW Deep Sleep Change']).copy()

        # Create new bins for this specific modality
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data[modality_column].apply(
            lambda x: '>3' if x < (median_sleep - 3) else 
                      '2-3' if x < (median_sleep - 2) else 
                      '1-2' if x < (median_sleep - 1) else 
                      '0.5-1' if x < (median_sleep - 0.5) else 
                      '0.5-1 +' if x > (median_sleep + 0.5) else 
                      '1-2 +' if x > (median_sleep + 1) else 
                      '>3 +' if x > (median_sleep + 3) else '0'
        )

        valid_data = valid_data.dropna(subset=['Relative Sleep Change']).copy()
        data_list.append(valid_data[['Relative Sleep Change', 'GW Deep Sleep Change']])

    # Ensure there's data before concatenating
    if not data_list:
        print(f"No valid data for {modality_column}. Skipping graph.")
        return

    violin_data = pd.concat(data_list, ignore_index=True)
    order = [">3", "2-3", "1-2", "0.5-1", "0", "0.5-1 +", "1-2 +", ">3 +"]

    fig = px.violin(violin_data, x='Relative Sleep Change', y='GW Deep Sleep Change', 
                    box=True, points='all', category_orders={'Relative Sleep Change': order}, 
                    title=f"{modality_column} Sleep Deviation vs. GW Deep Sleep Change")

    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=7.5, y0=0, y1=0, line=dict(color="red", width=2, dash="dash")))

    fig.update_layout(
        xaxis_title="Relative Sleep Deviation",
        yaxis_title="GW Deep Sleep Change (Next Night - Current Night)",
        height=600,
        width=1000,
        plot_bgcolor='white'
    )

    fig.write_html(f'C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Modality_Comparisons\\{filename}')
    fig.show()

def identify_very_short_nights(filled_data, all_participants):

    very_short_night_data = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate the average sleep between modalities
        participant_data['Average Sleep Between Modalities'] = participant_data[
            ['Actlumus (No Naps)', 'GW (Total Sleep)', 'Preprocessed', 'Survey']
        ].mean(axis=1, skipna=True)

        # Filter nights where sleep duration is 2 hours or less
        very_short_nights = participant_data[participant_data['Average Sleep Between Modalities'] <= 3]

        # Append results
        very_short_night_data.append(very_short_nights[['Participant', 'Date of Sleep', 'Average Sleep Between Modalities']])

    # Concatenate results from all participants
    very_short_night_df = pd.concat(very_short_night_data, ignore_index=True) if very_short_night_data else pd.DataFrame(
        columns=['Participant', 'Date of Sleep', 'Average Sleep Between Modalities'])

    # Define the output file path
    output_path = "C:\\Users\\danie\\Downloads\\SleepData\\Analysis\\Summary_Statistics\\very_short_nights.html"

    # Save results as an HTML file
    very_short_night_df.to_html(output_path, index=False, float_format="%.2f")

    return output_path

def create_deep_sleep_variance_scatterplot(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data["Participant"] == participant].copy()

        # Calculate average TST and median sleep
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate median deep sleep in first 2 hours for this participant
        median_deep_sleep_2hrs = participant_data["Deep Sleep in First 2 Hours"].median()

        # Shift to get next night's deep sleep
        participant_data['Deep Sleep (Next Night)'] = participant_data["Deep Sleep in First 2 Hours"].shift(-1)

        # Drop rows with missing values
        valid_data = participant_data.dropna(subset=["Average Sleep Between Modalities", "Deep Sleep (Next Night)"]).copy()

        # Compute sleep loss (positive = short night) and deep sleep deviation from participant median (in minutes)
        valid_data["Sleep Loss (hrs)"] = median_sleep - valid_data["Average Sleep Between Modalities"]
        valid_data["Deep Sleep Change (mins)"] = (valid_data["Deep Sleep (Next Night)"] - median_deep_sleep_2hrs) * 60

        # Keep necessary columns
        data_list.append(valid_data[["Participant", "Date of Sleep", "Sleep Loss (hrs)", "Deep Sleep Change (mins)"]])

    # Combine all participants
    scatter_data = pd.concat(data_list, ignore_index=True)

    # Create scatter plot with color by participant
    fig = px.scatter(
        scatter_data,
        x="Sleep Loss (hrs)",
        y="Deep Sleep Change (mins)",
        color="Participant",
        hover_data=["Participant", "Date of Sleep", "Sleep Loss (hrs)", "Deep Sleep Change (mins)"],
        title="Deep Sleep Change vs. Sleep Loss (Continuous)",
    )

    # Add red dotted line at y = 0
    fig.add_shape(
        type="line",
        x0=scatter_data["Sleep Loss (hrs)"].min(),
        x1=scatter_data["Sleep Loss (hrs)"].max(),
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Sleep Loss (Median - TST on Short Night) (hrs)",
        yaxis_title="Deep Sleep Change (Next Night - Median) (mins)",
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=True
    )

    # Save and show
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\deep_sleep_scatterplot_variance_by_participant.html'
    fig.write_html(html_file_path)
    fig.show()

def create_participant_stroop_sleep_comparison(participant_number):
    participant_data = filled_data[filled_data['Participant'] == participant_number].copy()

    # Compute average TST from modalities
    participant_data['TST'] = participant_data[avg_columns].mean(axis=1, skipna=True)

    # Filter out rows missing either TST or Deep Sleep for sleep-Stroop consistency
    participant_data = participant_data.dropna(subset=['TST', 'Deep Sleep in First 2 Hours'])

    # Drop rows where Stroop data is missing to avoid showing partial bars
    participant_data = participant_data.dropna(subset=['Interference Effect (ms)', 'Ratio Interference Score'])

    # Filter invalid Stroop values
    before = len(participant_data)
    participant_data = participant_data[
        (participant_data['Interference Effect (ms)'] >= 0) &
        (participant_data['Ratio Interference Score'] >= 0)
    ]
    after = len(participant_data)
    if after < before:
        print(f"Filtered out {before - after} row(s) with negative Stroop values for participant {participant_number}")

    if participant_data.empty:
        print(f"No valid data for participant {participant_number}. Skipping plot.")
        return

    # Compute medians
    median_tst = participant_data['TST'].median()
    median_deep = participant_data['Deep Sleep in First 2 Hours'].median()
    scaled_ratio = participant_data['Ratio Interference Score'] * 100

    fig = go.Figure()

    # Sleep metrics (left axis)
    fig.add_trace(go.Bar(
        x=participant_data['Date of Sleep'],
        y=participant_data['TST'],
        name='TST',
        marker_color='steelblue',
        yaxis='y',
        offsetgroup='1'
    ))

    fig.add_trace(go.Bar(
        x=participant_data['Date of Sleep'],
        y=participant_data['Deep Sleep in First 2 Hours'],
        name='Deep Sleep (First 2 Hours)',
        marker_color='limegreen',
        yaxis='y',
        offsetgroup='2'
    ))

    # Stroop metrics (right axis)
    fig.add_trace(go.Bar(
        x=participant_data['Date of Sleep'],
        y=participant_data['Interference Effect (ms)'],
        name='Stroop Interference Effect',
        marker_color='lightsalmon',
        yaxis='y2',
        offsetgroup='3'
    ))

    fig.add_trace(go.Bar(
        x=participant_data['Date of Sleep'],
        y=scaled_ratio,
        name='Stroop Ratio Interference Score (*100)',
        marker_color='mediumorchid',
        yaxis='y2',
        offsetgroup='4'
    ))

    # Add median lines
    fig.add_trace(go.Scatter(
        x=participant_data['Date of Sleep'],
        y=[median_tst] * len(participant_data),
        mode='lines',
        name='Median TST',
        line=dict(color='black', dash='dot'),
        yaxis='y'
    ))

    fig.add_trace(go.Scatter(
        x=participant_data['Date of Sleep'],
        y=[median_deep] * len(participant_data),
        mode='lines',
        name='Median Deep Sleep (2h)',
        line=dict(color='gray', dash='dot'),
        yaxis='y'
    ))

    # Layout
    fig.update_layout(
        title=dict(
            text=f"Participant {participant_number} Stroop Comparison",
            x=0.5,
            font=dict(size=20)
        ),
        xaxis=dict(title='Date'),
        yaxis=dict(
            title='Sleep Time (hrs)',
            range=[0, None]  # Ensures bars start at 0
        ),
        yaxis2=dict(
            title='Stroop Test Metrics (ms or ratio)',
            overlaying='y',
            side='right',
            showgrid=False,
            range=[0, None]  # Ensures bars start at 0
        ),
        barmode='group',
        height=600,
        width=1200,
        legend=dict(x=0.01, y=1.1, orientation='h'),
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    output_path = f'C:\\Users\\danie\\Downloads\\SleepData\\Stroop_Comparison\\participant_{int(participant_number)}_stroop_comparison.html'
    fig.write_html(output_path)
    fig.show()

def create_percentage_deep_sleep_comparison_graph_first_2_hours_vs_median(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate median % deep sleep
        participant_median_percentage_deep_sleep_first_2hrs = participant_data['Percentage Deep Sleep First 2 Hours'].median()

        # Shift to next night
        participant_data['Percentage Deep Sleep (In First Two hrs) Next Night'] = participant_data['Percentage Deep Sleep First 2 Hours'].shift(-1)

        # Compute deviation from median
        participant_data['Percentage Deep Sleep Deviation From Median (In First Two hrs)'] = participant_data['Percentage Deep Sleep (In First Two hrs) Next Night'] - participant_median_percentage_deep_sleep_first_2hrs

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Percentage Deep Sleep Deviation From Median (In First Two hrs)', 'Average Sleep Between Modalities'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3.5' if x > (median_sleep + 3.5) else 
                      '- 2.5-3.5' if (median_sleep + 2.5) <= x <= (median_sleep + 3.5) else 
                      '- 1.5-2.5' if (median_sleep + 1.5) <= x < (median_sleep + 2.5) else 
                      '- 0.5-1.5' if (median_sleep + 0.5) <= x < (median_sleep + 1.5) else
                      '- 0.5-0.5' if (median_sleep - 0.5) <= x < (median_sleep + 0.5) else
                      '0.5-1.5' if (median_sleep - 1.5) <= x < (median_sleep - 0.5) else
                      '1.5-2.5' if (median_sleep - 2.5) <= x < (median_sleep - 1.5) else 
                      '2.5-3.5' if (median_sleep - 3.5) <= x < (median_sleep - 2.5) else 
                      '>3.5' if x < (median_sleep - 3.5) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Keep as percentage
        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Percentage Deep Sleep Deviation From Median (In First Two hrs)']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Percentage Deep Sleep Deviation From Median (In First Two hrs)'].agg(
        **{"Percentage Deep Sleep in First Two Hours Deviation From Participant Median": 'mean', "STD (%)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (%)'] = summary_table['STD (%)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3.5", "- 2.5-3.5", "- 1.5-2.5", "- 0.5-1.5", "- 0.5-0.5", "0.5-1.5", "1.5-2.5", "2.5-3.5", ">3.5"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    X = summary_table['Category Numeric']
    y = summary_table['Percentage Deep Sleep in First Two Hours Deviation From Participant Median']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add scatter points for Mean Percentage Deep Sleep Deviation
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Two Hours Deviation From Participant Median'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Percentage Deep Sleep Deviation'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Two Hours Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['STD (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Two Hours Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['SE (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Add title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the Percentage",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="of Deep Sleep in First Two Hours the Following Night Relative to Participant's Median",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="% Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="in First 2 Hours After Short Nights",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_trace(go.Scatter(
    x=violin_data['Relative Sleep Change'],
    y=violin_data['Percentage Deep Sleep Deviation From Median (In First Two hrs)'],
    mode='markers',
    marker=dict(
        size=5,
        color='black',
        opacity=0.4,
        symbol='circle'
    ),
    name='Individual Data Points',
    hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\percentage_deep_sleep_comparison_graph_first_2_hours_vs_median.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

def create_percentage_deep_sleep_comparison_graph_first_half_hour_vs_median(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate the median percentage deep sleep in first half hour for this participant across all nights
        participant_median_percentage_deep_sleep_first_half_hr = participant_data['Percentage Deep Sleep in First Half Hour'].median()

        # Shift to next night
        participant_data['Percentage Deep Sleep (In First Half hr) Next Night'] = participant_data['Percentage Deep Sleep in First Half Hour'].shift(-1)

        # Compute deviation from median
        participant_data['Percentage Deep Sleep Deviation From Median (In First Half hr)'] = participant_data['Percentage Deep Sleep (In First Half hr) Next Night'] - participant_median_percentage_deep_sleep_first_half_hr

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Percentage Deep Sleep Deviation From Median (In First Half hr)', 'Average Sleep Between Modalities'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3.5' if x > (median_sleep + 3.5) else 
                      '- 2.5-3.5' if (median_sleep + 2.5) <= x <= (median_sleep + 3.5) else 
                      '- 1.5-2.5' if (median_sleep + 1.5) <= x < (median_sleep + 2.5) else 
                      '- 0.5-1.5' if (median_sleep + 0.5) <= x < (median_sleep + 1.5) else
                      '- 0.5-0.5' if (median_sleep - 0.5) <= x < (median_sleep + 0.5) else
                      '0.5-1.5' if (median_sleep - 1.5) <= x < (median_sleep - 0.5) else
                      '1.5-2.5' if (median_sleep - 2.5) <= x < (median_sleep - 1.5) else 
                      '2.5-3.5' if (median_sleep - 3.5) <= x < (median_sleep - 2.5) else 
                      '>3.5' if x < (median_sleep - 3.5) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Percentage Deep Sleep Deviation From Median (In First Half hr)']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Percentage Deep Sleep Deviation From Median (In First Half hr)'].agg(
        **{"Percentage Deep Sleep in First Half Hour Deviation From Participant Median": 'mean', "STD (%)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (%)'] = summary_table['STD (%)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3.5", "- 2.5-3.5", "- 1.5-2.5", "- 0.5-1.5", "- 0.5-0.5", "0.5-1.5", "1.5-2.5", "2.5-3.5", ">3.5"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    X = summary_table['Category Numeric']
    y = summary_table['Percentage Deep Sleep in First Half Hour Deviation From Participant Median']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add scatter points for Mean Percentage Deep Sleep Deviation
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Half Hour Deviation From Participant Median'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Percentage Deep Sleep Deviation'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Half Hour Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['STD (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First Half Hour Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['SE (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the Percentage",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="of Deep Sleep in First Half Hour the Following Night Relative to Participant's Median",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="% Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="in First 30 Min After Short Nights",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_trace(go.Scatter(
    x=violin_data['Relative Sleep Change'],
    y=violin_data['Percentage Deep Sleep Deviation From Median (In First Half hr)'],
    mode='markers',
    marker=dict(
        size=5,
        color='black',
        opacity=0.4,
        symbol='circle'
    ),
    name='Individual Data Points',
    hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\percentage_deep_sleep_comparison_graph_first_half_hour_vs_median.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

def create_percentage_deep_sleep_comparison_graph_first_1_hour_vs_median(filled_data, all_participants):
    data_list = []

    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate the median percentage deep sleep in first 1 hour for this participant across all nights
        participant_median_percentage_deep_sleep_first_1hr = participant_data['Percentage Deep Sleep in First 1 Hour'].median()

        # Shift to next night
        participant_data['Percentage Deep Sleep (In First One hr) Next Night'] = participant_data['Percentage Deep Sleep in First 1 Hour'].shift(-1)

        # Compute deviation from median
        participant_data['Percentage Deep Sleep Deviation From Median (In First One hr)'] = participant_data['Percentage Deep Sleep (In First One hr) Next Night'] - participant_median_percentage_deep_sleep_first_1hr

        # Exclude missing data
        valid_data = participant_data.dropna(subset=['Percentage Deep Sleep Deviation From Median (In First One hr)', 'Average Sleep Between Modalities'])

        # Assign deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3.5' if x > (median_sleep + 3.5) else 
                      '- 2.5-3.5' if (median_sleep + 2.5) <= x <= (median_sleep + 3.5) else 
                      '- 1.5-2.5' if (median_sleep + 1.5) <= x < (median_sleep + 2.5) else 
                      '- 0.5-1.5' if (median_sleep + 0.5) <= x < (median_sleep + 1.5) else
                      '- 0.5-0.5' if (median_sleep - 0.5) <= x < (median_sleep + 0.5) else
                      '0.5-1.5' if (median_sleep - 1.5) <= x < (median_sleep - 0.5) else
                      '1.5-2.5' if (median_sleep - 2.5) <= x < (median_sleep - 1.5) else 
                      '2.5-3.5' if (median_sleep - 3.5) <= x < (median_sleep - 2.5) else 
                      '>3.5' if x < (median_sleep - 3.5) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append data
        data_list.append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Percentage Deep Sleep Deviation From Median (In First One hr)']])

    # Combine participant data
    violin_data = pd.concat(data_list, ignore_index=True)

    # Compute statistics per category
    summary_table = violin_data.groupby('Relative Sleep Change')['Percentage Deep Sleep Deviation From Median (In First One hr)'].agg(
        **{"Percentage Deep Sleep in First One Hour Deviation From Participant Median": 'mean', "STD (%)": 'std', "Count": 'count'}
    )

    # Calculate SE
    summary_table['SE (%)'] = summary_table['STD (%)'] / np.sqrt(summary_table['Count'])

    # Sort by deviation
    order = ["- >3.5", "- 2.5-3.5", "- 1.5-2.5", "- 0.5-1.5", "- 0.5-0.5", "0.5-1.5", "1.5-2.5", "2.5-3.5", ">3.5"]
    summary_table = summary_table.reindex(order)

    # Convert index to column
    summary_table = summary_table.reset_index()

    # Convert categories to numeric
    category_map = {v: i for i, v in enumerate(order)}
    summary_table['Category Numeric'] = summary_table['Relative Sleep Change'].map(category_map)

    # Linear regression
    X = summary_table['Category Numeric']
    y = summary_table['Percentage Deep Sleep in First One Hour Deviation From Participant Median']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X, missing='drop').fit()
    summary_table['Regression Fit'] = model.predict(X)

    # Create scatter plot
    fig = go.Figure()

    # Add scatter points for Mean Percentage Deep Sleep Deviation
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First One Hour Deviation From Participant Median'],
        mode='markers',
        marker=dict(size=8, color='blue'),
        name='Mean Percentage Deep Sleep Deviation'
    ))

    # Add regression line
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Regression Fit'],
        mode='lines',
        line=dict(color='green', width=2),
        name='Linear Regression'
    ))

    # Add STD error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First One Hour Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['STD (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='blue', symbol='circle-open'),
        name='STD (Standard Deviation)'
    ))

    # Add SE error bars
    fig.add_trace(go.Scatter(
        x=summary_table['Relative Sleep Change'],
        y=summary_table['Percentage Deep Sleep in First One Hour Deviation From Participant Median'],
        error_y=dict(type='data', array=summary_table['SE (%)'], visible=True),
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle-open'),
        name='SE (Standard Error)'
    ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        showlegend=True,
        height=600,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=180, l=180)  # Increased top and left margins for longer annotations
    )

    # Title annotations
    fig.add_annotation(
        text="Effect of Reduced Total Sleep on the Percentage",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.20,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="of Deep Sleep in First One Hour the Following Night Relative to Participant's Median",
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.10,
        showarrow=False,
        font=dict(size=20, color="black")
    )

    fig.add_annotation(
        text="% Deep Sleep Change",
        xref="paper",
        yref="paper",
        x=-0.15,
        y=0.5,  # Positioned higher
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_annotation(
        text="in First 1 Hour After Short Nights",
        xref="paper",
        yref="paper",
        x=-0.11,
        y=0.5,  # Positioned lower
        showarrow=False,
        font=dict(size=14, color="black"),
        textangle=-90
    )

    fig.add_trace(go.Scatter(
    x=violin_data['Relative Sleep Change'],
    y=violin_data['Percentage Deep Sleep Deviation From Median (In First One hr)'],
    mode='markers',
    marker=dict(
        size=5,
        color='black',
        opacity=0.4,
        symbol='circle'
    ),
    name='Individual Data Points',
    hovertext=violin_data['Participant'].astype(str) + " | " + violin_data['Date of Sleep'].astype(str)
    ))

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\percentage_deep_sleep_comparison_graph_first_1_hour_vs_median.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

def create_combined_percentage_deep_sleep_comparison_graph(filled_data, all_participants):
    # Lists to store processed data for each time period
    data_lists = {
        'half_hour': [],
        'one_hour': [],
        'two_hours': []
    }

    # Process data for each participant
    for participant in all_participants:
        participant_data = filled_data[filled_data['Participant'] == participant].copy()

        # Calculate sleep stats
        participant_data['Average Sleep Between Modalities'] = participant_data[avg_columns].mean(axis=1, skipna=True)
        median_sleep = participant_data['Average Sleep Between Modalities'].median()

        # Calculate medians for each time period for this participant
        participant_median_half_hour = participant_data['Percentage Deep Sleep in First Half Hour'].median()
        participant_median_one_hour = participant_data['Percentage Deep Sleep in First 1 Hour'].median()
        participant_median_two_hours = participant_data['Percentage Deep Sleep First 2 Hours'].median()

        # Shift to get following night values
        participant_data['Half Hour Next Night'] = participant_data['Percentage Deep Sleep in First Half Hour'].shift(-1)
        participant_data['One Hour Next Night'] = participant_data['Percentage Deep Sleep in First 1 Hour'].shift(-1)
        participant_data['Two Hours Next Night'] = participant_data['Percentage Deep Sleep First 2 Hours'].shift(-1)

        # Calculate deviations from participant's median
        participant_data['Half Hour Deviation'] = participant_data['Half Hour Next Night'] - participant_median_half_hour
        participant_data['One Hour Deviation'] = participant_data['One Hour Next Night'] - participant_median_one_hour
        participant_data['Two Hours Deviation'] = participant_data['Two Hours Next Night'] - participant_median_two_hours

        # Exclude cases with missing data
        valid_data = participant_data.dropna(subset=['Half Hour Deviation', 'One Hour Deviation', 'Two Hours Deviation', 'Average Sleep Between Modalities'])

        # Assign sleep deviation bins
        valid_data = valid_data.copy()
        valid_data.loc[:, 'Relative Sleep Change'] = valid_data['Average Sleep Between Modalities'].apply(
            lambda x: '- >3.5' if x > (median_sleep + 3.5) else 
                      '- 2.5-3.5' if (median_sleep + 2.5) <= x <= (median_sleep + 3.5) else 
                      '- 1.5-2.5' if (median_sleep + 1.5) <= x < (median_sleep + 2.5) else 
                      '- 0.5-1.5' if (median_sleep + 0.5) <= x < (median_sleep + 1.5) else
                      '- 0.5-0.5' if (median_sleep - 0.5) <= x < (median_sleep + 0.5) else
                      '0.5-1.5' if (median_sleep - 1.5) <= x < (median_sleep - 0.5) else
                      '1.5-2.5' if (median_sleep - 2.5) <= x < (median_sleep - 1.5) else 
                      '2.5-3.5' if (median_sleep - 3.5) <= x < (median_sleep - 2.5) else 
                      '>3.5' if x < (median_sleep - 3.5) else '0'
        )

        # Filter nulls
        valid_data = valid_data.dropna(subset=['Relative Sleep Change'])

        # Append to respective lists
        data_lists['half_hour'].append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Half Hour Deviation']])
        data_lists['one_hour'].append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'One Hour Deviation']])
        data_lists['two_hours'].append(valid_data[['Participant', 'Date of Sleep', 'Relative Sleep Change', 'Two Hours Deviation']])

    # Combine all participant data for each time period
    violin_data = {
        'half_hour': pd.concat(data_lists['half_hour'], ignore_index=True),
        'one_hour': pd.concat(data_lists['one_hour'], ignore_index=True),
        'two_hours': pd.concat(data_lists['two_hours'], ignore_index=True)
    }

    # Define the order for sleep deviation categories
    order = ["- >3.5", "- 2.5-3.5", "- 1.5-2.5", "- 0.5-1.5", "- 0.5-0.5", "0.5-1.5", "1.5-2.5", "2.5-3.5", ">3.5"]

    # Create summary tables for each time period
    summary_tables = {}
    for period, data in violin_data.items():
        # Compute statistics per category
        column_name = 'Half Hour Deviation' if period == 'half_hour' else ('One Hour Deviation' if period == 'one_hour' else 'Two Hours Deviation')

        summary = data.groupby('Relative Sleep Change')[column_name].agg(
            **{"Mean": 'mean', "STD": 'std', "Count": 'count'}
        )

        # Calculate Standard Error
        summary['SE'] = summary['STD'] / np.sqrt(summary['Count'])

        # Sort by sleep deviation order
        summary = summary.reindex(order).reset_index()

        # Convert categories to numeric
        category_map = {v: i for i, v in enumerate(order)}
        summary['Category Numeric'] = summary['Relative Sleep Change'].map(category_map)

        # Linear regression
        X = summary['Category Numeric']
        y = summary['Mean']
        X_with_const = sm.add_constant(X)  
        model = sm.OLS(y, X_with_const, missing='drop').fit()
        summary['Regression Fit'] = model.predict(X_with_const)

        summary_tables[period] = summary

    # Create combined interactive Plotly figure
    fig = go.Figure()

    # Colors for different time periods
    colors = {
        'half_hour': 'blue',
        'one_hour': 'green',
        'two_hours': 'purple'
    }

    # Names for legend
    period_names = {
        'half_hour': 'First 30 Minutes',
        'one_hour': 'First 1 Hour',
        'two_hours': 'First 2 Hours'
    }

    # Add regression lines for each time period
    for period, summary in summary_tables.items():
        fig.add_trace(go.Scatter(
            x=summary['Relative Sleep Change'],
            y=summary['Regression Fit'],
            mode='lines',
            line=dict(color=colors[period], width=3),
            name=f'Regression: {period_names[period]}'
        ))

        # Add mean points
        fig.add_trace(go.Scatter(
            x=summary['Relative Sleep Change'],
            y=summary['Mean'],
            mode='markers',
            marker=dict(size=8, color=colors[period]),
            name=f'Mean: {period_names[period]}',
            error_y=dict(type='data', array=summary['SE'], visible=True)
        ))

    # Add individual data points
    for period, data in violin_data.items():
        column_name = 'Half Hour Deviation' if period == 'half_hour' else ('One Hour Deviation' if period == 'one_hour' else 'Two Hours Deviation')

        # Only add individual points for one period to avoid overcrowding
        if period == 'half_hour':
            fig.add_trace(go.Scatter(
                x=data['Relative Sleep Change'],
                y=data[column_name],
                mode='markers',
                marker=dict(
                    size=3,
                    color='black',
                    opacity=0.2,
                    symbol='circle'
                ),
                name='Individual Data Points',
                hovertext=data['Participant'].astype(str) + " | " + data['Date of Sleep'].astype(str),
                showlegend=False
            ))

    # Add zero line
    fig.add_shape(
        type="line",
        x0=0,
        x1=1,
        xref="paper",
        y0=0,
        y1=0,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout
    fig.update_layout(
        title="Effect of Reduced Total Sleep on Deep Sleep Percentage in Different Time Periods",
        xaxis_title="Decrease in TST Relative to Participant Median (Hours)",
        yaxis_title="% Deep Sleep Change Relative to Participant's Median",
        showlegend=True,
        height=700,
        width=1000,
        plot_bgcolor='white',
        paper_bgcolor='white',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="center",
            x=0.5
        )
    )

    # Save figure
    html_file_path = r'C:\Users\danie\Downloads\SleepData\Analysis\Sleep_Patterns\combined_percentage_deep_sleep_comparison.html'
    fig.write_html(html_file_path)

    # Display plot
    fig.show()

    return fig, summary_tables

create_sleep_nights_count_summary()

create_short_nights_summary()

create_long_nights_summary()

create_consecutive_short_nights_graph()

create_short_night_duration_histogram()

create_all_night_duration_histogram()

create_relative_sleep_loss_violin_plot()

create_relative_sleep_loss_violin_plot_fullrange()

create_deep_sleep_first_2_hours_violin_plot()

create_deep_sleep_first_2_hours_violin_plot_fullrange()

create_deep_sleep_violin_plot_fullrange()

create_deep_sleep_comparison_table()

create_deep_sleep_comparison_graph(filled_data, all_participants)

create_deep_sleep_comparison_table_first_2_hours()

create_deep_sleep_comparison_graph_first_2_hours(filled_data, all_participants)

create_deep_sleep_comparison_graph_first_2_hours_vs_median(filled_data, all_participants)

run_lmm_from_first_2_hours_vs_median(filled_data, all_participants)


run_lmm_with_correct_date_alignment(filled_data, all_participants)

summarize_lmm_results(filled_data, all_participants)

run_lmm_from_first_2_hours_vs_current(filled_data, all_participants)

summarize_lmm_results_from_current(filled_data, all_participants)

run_lmm_stroop_interference_vs_sleep_loss(filled_data, all_participants)

summarize_lmm_stroop_interference_from_current(filled_data, all_participants)

run_lmm_stroop_ratio_score(filled_data, all_participants)

summarize_lmm_stroop_ratio_score(filled_data, all_participants)

run_lmm_from_first_2_hours_percentchange(filled_data, all_participants)

summarize_lmm_results_from_percentchange(filled_data, all_participants)

generalized_deep_sleep_change_first_2_hours_table(filled_data, all_participants)

absolute_deep_sleep_first_2_hours_table(filled_data, all_participants)

paired_t_test_deep_sleep_first_2_hours_difference(filled_data, all_participants)

one_sample_t_test_deep_sleep_median(filled_data, all_participants)

# Define all sleep modalities
modalities = [
    "Actlumus (No Naps)",
    "GW (Total Sleep)",
    "Preprocessed",
    "Survey"
]

for modality in modalities:
    create_relative_sleep_loss_violin_plot_per_modality(modality, f'relative_sleep_loss_violin_{modality.replace(" ", "_").replace("(", "").replace(")", "")}.html')

for modality in modalities:
    create_deep_sleep_first_2_hours_violin_plot_per_modality(modality, f'deep_sleep_first_2_hours_violin_{modality.replace(" ", "_").replace("(", "").replace(")", "")}.html')

for modality in modalities:
    create_deep_sleep_violin_plot_per_modality(modality, f'deep_sleep_violin_plot_{modality.replace(" ", "_").replace("(", "").replace(")", "")}.html')

identify_very_short_nights(filled_data, all_participants)

create_deep_sleep_variance_scatterplot(filled_data, all_participants)

for participant in all_participants:
    fig = create_participant_sleep_comparison(int(participant))
    fig.write_html(f'C:\\Users\\danie\\Downloads\\SleepData\\Results\\participant_{int(participant)}_sleep_comparison.html')
    fig.show()

for participant in all_participants:
    create_participant_stroop_sleep_comparison(int(participant))

create_percentage_deep_sleep_comparison_graph_first_2_hours_vs_median(filled_data, all_participants)

create_percentage_deep_sleep_comparison_graph_first_half_hour_vs_median(filled_data, all_participants)

create_percentage_deep_sleep_comparison_graph_first_1_hour_vs_median(filled_data, all_participants)

create_combined_percentage_deep_sleep_comparison_graph(filled_data, all_participants)