# Prediction of the morning evaluation based on the evening mood

+ Read all the questionaries
+ Join them based on the evening-morning relations
+ Evening or morning without corelated data are removed

In [None]:
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torch.nn import functional as F

In [None]:
# path to all the questionnaires
PATH = "C:/Users/Admin/Thesis/data/ankiety"
path_to_folder = 'D:/DATA_THESIS/Projekt_nw_AG_AJ/Outputs_project_data_preprocessing'
path_to_save = f'{path_to_folder}/Outputs_project_data_preprocessing/1_data'

In [None]:
import json
def load_json_file(path: str) -> dict:
    if path[-5:] != '.json':
        path += '.json'

    with open(path, encoding="utf8") as json_file:
        json_data = json.load(json_file)

    return json_data
import os
p_stress_data = []
emotion_data = []
evening_stress = []
p_data_emo, p_data_morning, p_data_evening = {}, {}, {}
p_stress = {}
for iteration in tqdm(os.listdir(PATH)):
    p_iter = f"{PATH}/{iteration}"
    for participant in os.listdir(p_iter):
        p_path = f"{p_iter}/{participant}/assessments"
        morning_list = [load_json_file(f"{p_path}/morning/{x}") for x in os.listdir(f"{p_path}/morning")]
        for assessment in morning_list:
            if assessment['status'] != 'FILLED' or assessment['response'] == None:
                continue
            p_stress_data.append({
                'id': participant,
                'createdTimestamp': assessment['createdTimestamp'],
                'filledTimestamp': assessment['filledTimestamp'],
                'rest': assessment['response']['rest'],
                'sleepQuality': assessment['response']['sleepQuality'],
                'stress': assessment['response']['stress'],
                'composure': assessment['response']['composure'],
            })
    
        evening_list = [load_json_file(f"{p_path}/evening/{x}") for x in os.listdir(f"{p_path}/evening")]
        for assessment in evening_list:
            if assessment['status'] != 'FILLED' or assessment['response'] == None:
                continue
            evening_stress.append({
                'id': participant,
                'createdTimestamp': assessment['createdTimestamp'],
                'filledTimestamp': assessment['filledTimestamp'],
                'overwhelm': assessment['response']['overwhelm'],
                'health': assessment['response']['health'],
                'mood': assessment['response']['mood'],
                'unpredictability': assessment['response']['unpredictability'],
            })
            
            
        emotion_list = [load_json_file(f"{p_path}/emotion/{x}") for x in os.listdir(f"{p_path}/emotion")]
        for assessment in emotion_list:
            if assessment['status'] != 'FILLED' or assessment['response'] == None:
                continue
            emotion_data.append({
                'id': participant,
                'createdTimestamp': assessment['createdTimestamp'],
                'filledTimestamp': assessment['filledTimestamp'],
                'intenseEmotion': assessment['response']['intenseEmotion'],
                'valence': assessment['response']['valence'],
                'arousal': assessment['response']['arousal'],
                'comments': assessment['response']['comments'],
            })

In [None]:
df_evening = pd.DataFrame(evening_stress)
len(df_evening)

In [None]:
df_morning = pd.DataFrame(p_stress_data)
len(df_morning)

In [None]:
df_emotion = pd.DataFrame(emotion_data)
len(df_emotion)

In [None]:
df_evening.to_csv(f'{path_to_save}/df_evening.csv')
df_morning.to_csv(f'{path_to_save}/df_evening.csv')
df_emotion.to_csv(f'{path_to_save}/df_emotion.csv')

wektor -> predykcja
klasyfikatory
lstm
adaboost
wyjaśnialność modelu - sieci głębokie

In [None]:
# function to convert the timestamp to the correct format
def convert_to_datetime(timestamp):
    try:
        return pd.to_datetime(timestamp)
    except ValueError:
        return None 

# Apply the conversion function to the timestamp column
df_evening['filledTimestamp'] = df_evening['filledTimestamp'].apply(convert_to_datetime)
df_morning['filledTimestamp'] = df_morning['filledTimestamp'].apply(convert_to_datetime)

display(df_evening)

In [None]:
# adding day and hour as separate columns so it will be easier to join the dataframes
df_morning['day_morning'] = pd.to_datetime(df_morning['filledTimestamp']).dt.floor('D')
df_evening['day'] = pd.to_datetime(df_evening['filledTimestamp']).dt.floor('D')
df_emotion['day'] = pd.to_datetime(df_emotion['filledTimestamp']).dt.floor('D')

df_morning['hour_morning'] = pd.to_datetime(df_morning['filledTimestamp']).dt.hour
df_evening['hour'] = pd.to_datetime(df_evening['filledTimestamp']).dt.hour
df_emotion['hour'] = pd.to_datetime(df_emotion['filledTimestamp']).dt.hour

## joining evening with morning

In [None]:
# joining evening with morning
rest = []
sleepQ = []
stress = []
composure = []
filledT = []
filledT_e = []
over = []
health = []
mood = []
unpred = []
ids = []

for patient in list(df_morning['id'].unique()):
#     taking patient and checking if there are data from the day before
    for morning in list(df_morning[df_morning['id'] == patient]['day_morning']):
# get previous evening data
        morning_data = df_morning[df_morning['id'] == patient]
        morning_data = morning_data[morning_data['day_morning'] == morning].reset_index(drop=True)
        previous_day = morning - timedelta(days=1)
        patient_single = df_evening[df_evening['id'] == patient]
        if len(list(patient_single[patient_single['day'] == previous_day]['hour']))!=0 and list(patient_single[patient_single['day'] == previous_day]['hour'])[0] <=23 and list(patient_single[patient_single['day'] == previous_day]['hour'])[0] >=17:
            day_before_df = patient_single[patient_single['day'] == previous_day].reset_index(drop=True)
        elif len(list(patient_single[patient_single['day'] == morning]['hour']))!=0 and  list(patient_single[patient_single['day'] == morning]['hour'])[0] <=5 and list(patient_single[patient_single['day'] == morning]['hour'])[0] >=0:
            day_before_df = patient_single[patient_single['day'] == morning].reset_index(drop=True)
        else:
            day_before_df = pd.DataFrame()
#         save previous evening data
        if day_before_df.empty:
            continue
        else:
            rest.append(morning_data[morning_data['id'] == patient]['rest'][0])
            sleepQ.append(morning_data[morning_data['id'] == patient]['sleepQuality'][0])
            stress.append(morning_data[morning_data['id'] == patient]['stress'][0])
            composure.append(morning_data[morning_data['id'] == patient]['composure'][0])
            filledT.append(morning_data[morning_data['id'] == patient]['filledTimestamp'][0])

            filledT_e.append(day_before_df['filledTimestamp'][0])
            over.append(day_before_df['overwhelm'][0])
            health.append(day_before_df['health'][0])
            mood.append(day_before_df['mood'][0])
            unpred.append(day_before_df['unpredictability'][0])
            ids.append(day_before_df['id'][0])
    #         display(day_before_df)


In [None]:
evening_morning = pd.DataFrame()
evening_morning['rest'] = rest
evening_morning['sleepQuality'] = sleepQ
evening_morning['stress'] = stress 
evening_morning['composure'] = composure 
evening_morning['filledTimestamp_morning'] = filledT 
evening_morning['filledTimestamp_evening'] = filledT_e 
evening_morning['overwhelm'] = over 
evening_morning['health'] = health 
evening_morning['mood'] = mood 
evening_morning['unpredictability'] = unpred 
evening_morning['id'] = ids 

In [None]:
# add column time_difference - to check the time between going to sleep and waking up based on the
#  moment of filling out the forms

evening_morning['filledTimestamp_morning'] = pd.to_datetime(evening_morning['filledTimestamp_morning'], format="%Y-%m-%d %H:%M:%S.%f")
evening_morning['filledTimestamp_evening'] = pd.to_datetime(evening_morning['filledTimestamp_evening'], format="%Y-%m-%dT%H:%M:%S.%f")

# Function to calculate time difference
def calculate_time_difference(row):
    time_diff = row['filledTimestamp_morning'] - row['filledTimestamp_evening']
    return time_diff.total_seconds() / 3600  # Convert seconds to hours

# Apply the function to create a new column 'time_difference'
evening_morning['time_difference'] = evening_morning.apply(calculate_time_difference, axis=1)


In [None]:
evening_morning.to_pickle(f'{path_to_save}/evening_morning.pkl')