In [None]:
import pandas as pd
import numpy as np
import os

def acc_constructor(ind):
    # Load accelerometer data
    df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/ACC.csv', skiprows=2)
    df.columns = ['acc_x', 'acc_y', 'acc_z']
    df["datetime"] = pd.to_datetime(df.index / 32, unit="s", origin="2024-01-01")

    # Load activity data
    act_df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_activity.csv', header=0)
    act_df.columns = ['act_name', f'S{ind}']
    act_df['timedelta'] = pd.to_timedelta(act_df[f'S{ind}'], unit='s')
    starting_time = pd.Timestamp('2024-01-01 00:00:00')
    act_df['datetime'] = starting_time + act_df['timedelta']

    # Assign specific categories using lambda
    activity_mapping = {
        "# BASELINE": 1,
        "# CLEAN_BASELINE": 2,
        '# STAIRS': 3,
        '# SOCCER': 4,
        '# CYCLING': 5,
        '# WALKING': 6,
        "# LUNCH": 7,
        "# WORKING": 8,
        "# DRIVING": 9
    }
    act_df['activity'] = act_df['act_name'].map(activity_mapping).fillna(0).astype(int)

    # Debug: Print unique activities
    print(f"Unique activities in S{ind}: {act_df['act_name'].unique()}")

    # Debug: Print the activity column to check classification
    print(f"Activity classification for S{ind}:")
    print(act_df[['act_name', 'activity']].head(18))

    # Assign activity labels to the accelerometer data
    for i, row in act_df.iterrows():
        start_time = row['datetime']
        end_time = act_df.iloc[i + 1]['datetime'] if i < len(act_df) - 1 else pd.Timestamp('2024-01-01 23:59:59')
        activity_value = row['activity']
        df.loc[(df['datetime'] >= start_time) & (df['datetime'] < end_time), 'activity'] = activity_value

    df['activity'] = df['activity'].astype(int)


    # Check if the file exists and remove it
    output_path = f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/ACC_with_specific_activity.csv'
    if os.path.exists(output_path):
        os.remove(output_path)

    # Save the DataFrame
    df.to_csv(output_path, index=False)
    print(f"S{ind} done!")
    return df

In [None]:
def hr_constructor(ind):
    # Load accelerometer data
    df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/HR.csv', skiprows=2)
    df.columns = ["hr"]
    df["datetime"] = pd.to_datetime(df.index, unit="s", origin="2024-01-01")

    # Load activity data
    act_df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_activity.csv', header=0)
    act_df.columns = ['act_name', f'S{ind}']
    act_df['timedelta'] = pd.to_timedelta(act_df[f'S{ind}'], unit='s')
    starting_time = pd.Timestamp('2024-01-01 00:00:00')
    act_df['datetime'] = starting_time + act_df['timedelta']

    # Assign specific categories using lambda
    activity_mapping = {
        "# BASELINE": 1,
        "# CLEAN_BASELINE": 2,
        '# STAIRS': 3,
        '# SOCCER': 4,
        '# CYCLING': 5,
        '# WALKING': 6,
        "# LUNCH": 7,
        "# WORKING": 8,
        "# DRIVING": 9
    }
    act_df['activity'] = act_df['act_name'].map(activity_mapping).fillna(0).astype(int)

    # Debug: Print unique activities
    print(f"Unique activities in S{ind}: {act_df['act_name'].unique()}")

    # Debug: Print the activity column to check classification
    print(f"Activity classification for S{ind}:")
    print(act_df[['act_name', 'activity']].head(18))

    # Assign activity labels to the accelerometer data
    for i, row in act_df.iterrows():
        start_time = row['datetime']
        end_time = act_df.iloc[i + 1]['datetime'] if i < len(act_df) - 1 else pd.Timestamp('2024-01-01 23:59:59')
        activity_value = row['activity']
        df.loc[(df['datetime'] >= start_time) & (df['datetime'] < end_time), 'activity'] = activity_value

    df['activity'] = df['activity'].astype(int)


    # Check if the file exists and remove it
    output_path = f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/HR_with_specific_activity.csv'
    if os.path.exists(output_path):
        os.remove(output_path)

    # Save the DataFrame
    df.to_csv(output_path, index=False)
    print(f"S{ind} done!")
    return df

In [None]:
def bpv_constructor(ind):
    # Load accelerometer data
    df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/BVP.csv', skiprows=2)
    df.columns = ["bvp"]
    df["datetime"] = pd.to_datetime(df.index / 64, unit="s", origin="2024-01-01")

    # Load activity data
    act_df = pd.read_csv(f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_activity.csv', header=0)
    act_df.columns = ['act_name', f'S{ind}']
    act_df['timedelta'] = pd.to_timedelta(act_df[f'S{ind}'], unit='s')
    starting_time = pd.Timestamp('2024-01-01 00:00:00')
    act_df['datetime'] = starting_time + act_df['timedelta']

    # Assign specific categories using lambda
    activity_mapping = {
        "# BASELINE": 1,
        "# CLEAN_BASELINE": 2,
        '# STAIRS': 3,
        '# SOCCER': 4,
        '# CYCLING': 5,
        '# WALKING': 6,
        "# LUNCH": 7,
        "# WORKING": 8,
        "# DRIVING": 9
    }
    act_df['activity'] = act_df['act_name'].map(activity_mapping).fillna(0).astype(int)

    # Debug: Print unique activities
    print(f"Unique activities in S{ind}: {act_df['act_name'].unique()}")

    # Debug: Print the activity column to check classification
    print(f"Activity classification for S{ind}:")
    print(act_df[['act_name', 'activity']].head(18))

    # Assign activity labels to the accelerometer data
    for i, row in act_df.iterrows():
        start_time = row['datetime']
        end_time = act_df.iloc[i + 1]['datetime'] if i < len(act_df) - 1 else pd.Timestamp('2024-01-01 23:59:59')
        activity_value = row['activity']
        df.loc[(df['datetime'] >= start_time) & (df['datetime'] < end_time), 'activity'] = activity_value

    df['activity'] = df['activity'].astype(int)

    # Check if the file exists and remove it
    output_path = f'HAR/ppg+dalia/data/PPG_FieldStudy/S{ind}/S{ind}_E4/BVP_with_specific_activity.csv'
    if os.path.exists(output_path):
        os.remove(output_path)

    # Save the DataFrame
    df.to_csv(output_path, index=False)
    print(f"S{ind} done!")
    return df

In [None]:
for i in np.arange(1,16):
    acc_constructor(i)
    hr_constructor(i)
    bpv_constructor(i)

Unique activities in S1: ['# NO_ACTIVITY' '# BASELINE' '# STAIRS' '# SOCCER' '# CYCLING'
 '# DRIVING' '# LUNCH' '# WALKING' '# WORKING' '# CLEAN_BASELINE']
Activity classification for S1:
            act_name  activity
0      # NO_ACTIVITY         0
1         # BASELINE         1
2      # NO_ACTIVITY         0
3           # STAIRS         3
4      # NO_ACTIVITY         0
5           # SOCCER         4
6      # NO_ACTIVITY         0
7          # CYCLING         5
8      # NO_ACTIVITY         0
9          # DRIVING         9
10     # NO_ACTIVITY         0
11           # LUNCH         7
12         # WALKING         6
13     # NO_ACTIVITY         0
14         # WORKING         8
15     # NO_ACTIVITY         0
16  # CLEAN_BASELINE         2
17     # NO_ACTIVITY         0
S1 done!
Unique activities in S1: ['# NO_ACTIVITY' '# BASELINE' '# STAIRS' '# SOCCER' '# CYCLING'
 '# DRIVING' '# LUNCH' '# WALKING' '# WORKING' '# CLEAN_BASELINE']
Activity classification for S1:
            act_name  acti

Unique activities in S4: ['# NO_ACTIVITY' '# BASELINE' '# STAIRS' '# SOCCER' '# CYCLING'
 '# DRIVING' '# LUNCH' '# WALKING' '# WORKING' '# CLEAN_BASELINE']
Activity classification for S4:
            act_name  activity
0      # NO_ACTIVITY         0
1         # BASELINE         1
2      # NO_ACTIVITY         0
3           # STAIRS         3
4      # NO_ACTIVITY         0
5           # SOCCER         4
6      # NO_ACTIVITY         0
7          # CYCLING         5
8      # NO_ACTIVITY         0
9          # DRIVING         9
10     # NO_ACTIVITY         0
11           # LUNCH         7
12         # WALKING         6
13     # NO_ACTIVITY         0
14         # WORKING         8
15     # NO_ACTIVITY         0
16  # CLEAN_BASELINE         2
17     # NO_ACTIVITY         0
