In [3]:
import pandas as pd
import os
import json
from datetime import datetime

In [4]:
def convert_dates(df):
    """Function to convert and round dates in given datasets"""

    # usuwanie milisekund
    df['filledTimestamp'] = df['filledTimestamp'].apply(lambda x: x.split('.')[0])
    # zaokrąglanie sekund do minut
    df['filledTimestamp'] = df['filledTimestamp'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S').replace(second=0, microsecond=0))
    
    return df

In [5]:
def create_forms_dataframe_for_user(in_path, out_path):
    df_morning_forms = pd.DataFrame()
    df_evening_forms = pd.DataFrame()
    for user_folders in os.listdir(in_path):
        user_folders_path = os.path.join(in_path, user_folders)
        for user_forms in os.listdir(user_folders_path):
            if user_forms == 'morning':
                morning_forms_path = os.path.join(user_folders_path, user_forms)
                for morning_form in os.listdir(morning_forms_path):
                    # bezpośredni dostęp do plików z ankiet porannych
                    morning_form_path = os.path.join(morning_forms_path, morning_form)
                    with open(morning_form_path, 'r') as f:
                        json_data = json.load(f)
                    df_morning_forms = df_morning_forms.append(json_data, ignore_index=True)
            elif user_forms == 'evening':
                evening_forms_path = os.path.join(user_folders_path, user_forms)
                for evening_form in os.listdir(evening_forms_path):
                    # bezpośredni dostęp do plików z ankiet wieczornych
                    evening_form_path = os.path.join(evening_forms_path, evening_form)
                    with open(evening_form_path, 'r') as f:
                        json_data = json.load(f)
                    df_evening_forms = df_evening_forms.append(json_data, ignore_index=True)
    # odfiltrowywanie ankiet po statusie (musi być FILLED)
    df_morning_forms_filtered = df_morning_forms[df_morning_forms['status'] == 'FILLED']
    df_morning_final = pd.concat([df_morning_forms_filtered['filledTimestamp'], df_morning_forms_filtered['response'].apply(pd.Series), df_morning_forms_filtered['sleepSegment'].apply(pd.Series)], axis=1)
    df_morning_final.rename(columns={'rest':'morningRest', 'stress':'morningStress', 'composure':'morningComposure', 'startTimestamp': 'sleepStart', 'endTimestamp':'sleepEnd', 'duration':'sleepDuration'}, inplace=True)
    
    # odfiltrowywanie ankiet wieczornych
    df_evening_forms_filtered = df_evening_forms[df_evening_forms['status'] == 'FILLED']
    df_evening_final = pd.concat([df_evening_forms_filtered['filledTimestamp'], df_evening_forms_filtered['response'].apply(pd.Series)], axis=1)
    df_evening_final.rename(columns={'health':'eveningHealth', 'mood':'eveningMood', 'overwhelm':'eveningOverwhelm', 'unpredictability': 'eveningUpredictability'}, inplace=True)

    out_morning = round(((len(df_morning_forms) - len(df_morning_final))/len(df_morning_forms))*100, 2)
    out_evening = round(((len(df_evening_forms) - len(df_evening_final))/len(df_evening_forms))*100, 2)

    df_morning_final = convert_dates(df_morning_final)
    df_evening_final = convert_dates(df_evening_final)
    df_morning_final.reset_index(inplace=True, drop=True)
    df_evening_final.reset_index(inplace=True, drop=True)

    print("Odrzucono {out}% ankiet porannych.".format(out=out_morning))
    print("Odrzucono {out}% ankiet wieczornych.".format(out=out_evening))
    df_morning_final.to_csv(out_path+'morning_forms.csv', index=False)
    df_evening_final.to_csv(out_path+'evening_forms.csv', index=False)

In [None]:
########### PRZETWARZANIE DANYCH Z ANKIET PORANNYCH I WIECZORNYCH ###########

for iter in range(1, 8):
    ###########  W MIEJSCU /.../ PODAJ ŚCIEŻKĘ DO DANYCH Z ANKIETAMI ###########
    iter_path = f"C:/.../forms/iteration_0{iter}/"
    for i, user_name in enumerate(os.listdir(iter_path)):
        in_path = iter_path+user_name

        ########### W MIEJSCU /.../ PODAJ ŚCIEŻKĘ, W KTÓREJ CHCESZ ZAPISAĆ DANE PO PRZETWORZENIU ###########
        out_path = f"C:/.../aggregated_data/i_0{iter}/{user_name}/"
        
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        # zapisywanie danych do plików
        create_forms_dataframe_for_user(in_path, out_path)