In [1]:
import pandas as pd
from functools import reduce
participant_id = "p07"


In [2]:
from src.physiological_data_preprocessing import (
        process_intraday_heart_rate,
        process_resting_heart_rate,
        get_daily_aggregate,
        preprocess_exercise_data,
        load_sleep_score,
        load_resting_heart_rate
)   

from src.subjective_data_preprocessing import (
    preprocess_wellness_csv

        
)



In [3]:
def create_merged_dfs(participant_id):
    sleep_score_df = load_sleep_score(participant_id=participant_id)
    wellness_df = preprocess_wellness_csv(participant_id)
    resting_heart_rate_df = load_resting_heart_rate(participant_id)
    very_active_minutes_df = get_daily_aggregate(participant_id,"very_active_minutes","sum")
    sedentary_minutes_df = get_daily_aggregate(participant_id,"sedentary_minutes","sum")

    dataframes_to_merge = [
    resting_heart_rate_df,
    sleep_score_df,
    wellness_df,
    very_active_minutes_df,
    sedentary_minutes_df
    ]

    df_merged = reduce(lambda left, right: pd.merge(left, right, on='dateTime', how='outer'), dataframes_to_merge)
    df_merged = df_merged.sort_values(by='dateTime').reset_index(drop=True)
    df_merged["participant_id"] = participant_id

    return df_merged


In [4]:
def forward_fill_missing_values(df):
    df = df.copy()
    for column in df.columns:
        df[column] = df[column].ffill().bfill()

    return df


In [5]:
df = create_merged_dfs("p07")

In [6]:
df

Unnamed: 0,dateTime,resting_heart_rate,avg_overall_sleep_score,fatigue,mood,stress,sleep_quality,very_active_minutes_sum,sedentary_minutes_sum,participant_id
0,2019-11-05,,,,,,,0,1440,p07
1,2019-11-06,,,3.0,3.0,5.0,4.0,65,1270,p07
2,2019-11-07,55.667916,,4.0,3.0,4.0,4.0,105,563,p07
3,2019-11-08,53.800657,82.0,3.0,3.0,3.0,4.0,29,660,p07
4,2019-11-09,53.646626,76.0,4.0,4.0,4.0,4.0,324,383,p07
...,...,...,...,...,...,...,...,...,...,...
143,2020-03-27,58.138510,76.0,,,,,15,350,p07
144,2020-03-28,57.808391,81.0,3.0,3.0,3.0,4.0,9,467,p07
145,2020-03-29,,,3.0,3.0,3.0,3.0,0,1325,p07
146,2020-03-30,55.178128,88.0,,,,,15,496,p07


In [7]:
df = forward_fill_missing_values(df)

In [8]:
df

Unnamed: 0,dateTime,resting_heart_rate,avg_overall_sleep_score,fatigue,mood,stress,sleep_quality,very_active_minutes_sum,sedentary_minutes_sum,participant_id
0,2019-11-05,55.667916,82.0,3.0,3.0,5.0,4.0,0,1440,p07
1,2019-11-06,55.667916,82.0,3.0,3.0,5.0,4.0,65,1270,p07
2,2019-11-07,55.667916,82.0,4.0,3.0,4.0,4.0,105,563,p07
3,2019-11-08,53.800657,82.0,3.0,3.0,3.0,4.0,29,660,p07
4,2019-11-09,53.646626,76.0,4.0,4.0,4.0,4.0,324,383,p07
...,...,...,...,...,...,...,...,...,...,...
143,2020-03-27,58.138510,76.0,3.0,3.0,3.0,2.0,15,350,p07
144,2020-03-28,57.808391,81.0,3.0,3.0,3.0,4.0,9,467,p07
145,2020-03-29,57.808391,81.0,3.0,3.0,3.0,3.0,0,1325,p07
146,2020-03-30,55.178128,88.0,3.0,3.0,3.0,3.0,15,496,p07


In [9]:
df = df[["participant_id"] + df.iloc[:,:-1].columns.tolist()]

In [10]:
df.to_csv("./cleaned_data/14thJune2025_p07.csv", index=False)