In [3]:
import pandas as pd
import os
from datetime import datetime, timedelta
from tqdm import tqdm

In [4]:
def generate_data_vector(df, t_range, column_name, operation):
    data_vector = []
    if operation == 'first_value':
        for i in range(len(t_range)-1):
            mask = (df['ts'] >= t_range[i]) & (df['ts'] < t_range[i+1])
            if mask.any():
                data_vector.append(df.loc[mask, column_name].iloc[0])
            else:
                data_vector.append(-1)
    elif operation == 'mean':
        for i in range(len(t_range)-1):
            mask = (df['ts'] >= t_range[i]) & (df['ts'] < t_range[i+1])
            if mask.any():
                data_vector.append(df.loc[mask, column_name].mean().round())
            else:
                data_vector.append(-1)
    elif operation == 'multiple_columns':
        for i in range(len(t_range)-1):
            mask = (df['ts'] >= t_range[i]) & (df['ts'] < t_range[i+1])
            if mask.any():
                cords = (df.loc[mask, column_name[0]].iloc[0], df.loc[mask, column_name[1]].iloc[0], df.loc[mask, column_name[2]].iloc[0])
                data_vector.append(cords)
            else:
                data_vector.append(-1)
    return data_vector

In [5]:
def aggregate_pedometer_data(df):
    new_df = pd.DataFrame()

    df['ts'] = pd.to_datetime(df['ts'])
    # dodanie kolumny z datą
    df['date'] = df['ts'].dt.date
    
    # grupowanie po datach
    grouped = df.groupby('date')

    last_yeasterday_row = {'steps':0, 'distance':0}

    for date, group in grouped:
        # sortowanie grupy po czasie
        group_sorted = group.sort_values(by='ts')
        # najwcześniejszy i najpóźniejszy wiersz 'dzisiaj'
        first_today_row = group_sorted.iloc[0]
        last_today_row = group_sorted.iloc[-1]

        # jeżeli ostatni wczorajszy wpis jest taki sam jak dzisiejszy pierwszy wpis
        if (last_yeasterday_row['steps'] == first_today_row['steps']) and (last_yeasterday_row['distance'] == first_today_row['distance']):
            # odejmuje dane z ostatniaego wczorajszego wpisu
            steps = last_today_row['steps'] - last_yeasterday_row['steps']
            distance = last_today_row['distance'] - last_yeasterday_row['distance']
        else:
            steps = last_today_row['steps']
            distance = last_today_row['distance']

        # dodajemy dane do dataframe
        new_df = new_df.append({'date':date, 'steps':steps, 'distance':distance}, ignore_index=True)
        # zapisujemy dzisiejszy wpis jako wczorajszy ostatni wpis 
        last_yeasterday_row = last_today_row

    return new_df

In [17]:
def aggregate_dataFrames(df_morning, df_hr, df_light, df_acc, df_grav, df_gyr, df_pedometer, vector_freq, out_path):
    df_morning_copy = df_morning.copy()
    df_morning_copy['hrSamsung'] = None
    df_morning_copy['hrPolar'] = None
    df_morning_copy['light'] = None
    df_morning_copy['accSamsung'] = None
    df_morning_copy['accPolar'] = None
    df_morning_copy['grav'] = None
    df_morning_copy['gyr'] = None
    df_morning_copy['steps'] = None
    df_morning_copy['distance'] = None

    df_morning_copy['filledTimestamp'] = df_morning['filledTimestamp'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    df_hr['ts'] = df_hr['ts'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    df_light['ts'] = df_light['ts'].apply(lambda x: x.split('.')[0])
    df_light['ts'] = df_light['ts'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    df_acc['ts'] = df_acc['ts'].apply(lambda x: x.split('.')[0])
    df_acc['ts'] = df_acc['ts'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    df_grav['ts'] = df_grav['ts'].apply(lambda x: x.split('.')[0])
    df_grav['ts'] = df_grav['ts'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    df_gyr['ts'] = df_gyr['ts'].apply(lambda x: x.split('.')[0])
    df_gyr['ts'] = df_gyr['ts'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

    
    for index, row in df_morning_copy.iterrows():
        # początek doby przed wypełnieniem ankiety (dla 2023-09-11 07:06:00 będzie to 2023-09-10 00:00:00)
        window_start = df_morning_copy['filledTimestamp'].iloc[index] - timedelta(days=1)
        window_start = window_start.replace(hour=0, minute=0, second=0)
        # koniec doby przed wypełnieniem ankiety (dla 2023-09-11 07:06:00 będzie to 2023-09-11 00:00:00)
        window_end = window_start + timedelta(days=1)

        
        ######## SYGNAŁY FIZJOLOGICZNE ########
        time_range = pd.date_range(start=window_start, end=window_end, freq=str(vector_freq)+'T')

        hr_samsung_vector = generate_data_vector(df=df_hr[df_hr['device'] == 'samsung'], t_range=time_range, column_name='hr', operation='mean')
        hr_polar_vector = generate_data_vector(df=df_hr[df_hr['device'] == 'polar'], t_range=time_range, column_name='hr', operation='mean')

        light_vector = generate_data_vector(df=df_light, t_range=time_range, column_name='light_level', operation='first_value')
        #acc_samsung_vector = generate_data_vector(df=df_acc[df_acc['device'] == 'samsung'], t_range=time_range, column_name=['x', 'y', 'z'], operation='multiple_columns')
        acc_samsung_vector = generate_data_vector(df=df_acc[df_acc['device'] == 'samsung'], t_range=time_range, column_name='vector', operation='first_value')
        acc_polar_vector = generate_data_vector(df=df_acc[df_acc['device'] == 'polar'], t_range=time_range, column_name='vector', operation='first_value')
        grav_vector = generate_data_vector(df=df_grav, t_range=time_range, column_name='vector', operation='first_value')
        gyr_vector = generate_data_vector(df=df_gyr, t_range=time_range, column_name='vector', operation='first_value')

        ######## KROKI I DYSTANS ########
        try:
            df_morning_copy.at[index, 'steps'] = df_pedometer[df_pedometer['date'] == window_start.date()]['steps'].iloc[0]
            df_morning_copy.at[index, 'distance'] = df_pedometer[df_pedometer['date'] == window_start.date()]['distance'].iloc[0]
        except:
            df_morning_copy.at[index, 'steps'] = None
            df_morning_copy.at[index, 'distance'] = None

        # zapisywanie danych do DataFramu
        df_morning_copy.at[index, 'hrSamsung'] = hr_samsung_vector
        df_morning_copy.at[index, 'hrPolar'] = hr_polar_vector
        df_morning_copy.at[index, 'light'] = light_vector
        df_morning_copy.at[index, 'accSamsung'] = acc_samsung_vector
        df_morning_copy.at[index, 'accPolar'] = acc_polar_vector
        df_morning_copy.at[index, 'grav'] = grav_vector
        df_morning_copy.at[index, 'gyr'] = gyr_vector

    df_morning_copy.to_csv(out_path, index=False)

In [None]:
##########  ZMIEŃ WARTOŚCI PARAMETRÓW ###########
vector_freq = 1 # co ile czasu dane będą próbkowane [min]
out_filename = "agg_minute_signal_data.csv" # nazwa pliku wyjściowego zapisywana w folderze dla każdego uczestnika

for iter in range(1, 8):
    ##########  W MIEJSCU /.../ PODAJ ŚCIEŻKĘ DO DANYCH PO PRZETWORZENIU ###########
    iter_path = f"C:/.../aggregated_data/i_0{iter}/"
    for i, user_name in tqdm(enumerate(os.listdir(iter_path))):
        user_path = iter_path+user_name

        df_morning = pd.read_csv(user_path+'/morning_forms.csv')
        df_hr = pd.read_csv(user_path+'/mean_minute_hr.csv')
        df_light = pd.read_csv(user_path+'/minute_light.csv')
        df_acc = pd.read_csv(user_path+'/minute_acc.csv')
        df_grav = pd.read_csv(user_path+'/minute_grav.csv')
        df_gyr = pd.read_csv(user_path+'/minute_gyr.csv')
        df_pedometer = pd.read_csv(user_path+'/minute_pedometer.csv')
        df_pedometer_daily = aggregate_pedometer_data(df_pedometer)

        aggregate_dataFrames(df_morning, df_hr, df_light, df_acc, df_grav, df_gyr, df_pedometer_daily, vector_freq=vector_freq, out_path=user_path+out_filename)