##### This notebook preprocesses a subset of the labeling features and creates a slightly revised version of the labeling set to be visualized.
In more detail the features that are preprocessed are:
1. bmi
2. ecg
3. heart_rate_alert
4. nightly_temperature

In [1]:
import numpy as np
import pandas as pd

In [11]:
# give as input either '../data/preprocessing_final/labeling_df_hourly_unprocessed.pkl' or '../data/preprocessing_final/labeling_df_daily_unprocessed.pkl'
data = pd.read_pickle('../data/preprocessing_final/labeling_df_daily_unprocessed.pkl')
data

Unnamed: 0,id,date,ecg,heart_rate_alert,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,deep_sleep_breathing_rate,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2ff067b776a2403eb737,2021-12-22,NSR,NONE,33.737162,,,,,,...,,,,,,,,,,TIRED
1,621e2ff067b776a2403eb737,2021-12-22,NSR,NONE,33.737162,,,,,,...,,,,,,,,,,TIRED
2,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,<no-response>
3,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,TIRED
4,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,RESTED/RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14430,621e362467b776a2404ad513,2021-05-18,,,,,,,,,...,,,,,,,,,,HAPPY
14431,621e362467b776a2404ad513,2021-05-23,,,,,,,,,...,,,,,,,,,,HAPPY
14432,621e36f967b776a240e5e7c9,2021-05-20,,,,,,,,,...,,,,,,,,,,RESTED/RELAXED
14433,621e36f967b776a240e5e7c9,2021-05-20,,,,,,,,,...,,,,,,,,,,TIRED


#### 1. bmi

In [12]:
# bmi is converted into a categorical feature with 4 values (0: Underweight, 1: Normal, 2: Overweight, 3: Obese)
data['bmi'] = data['bmi'].fillna(data['bmi'].mode().iloc[0])
data["bmi"] = data["bmi"].apply(lambda x: 31.0 if x == '>=30' else x)
data["bmi"] = data["bmi"].apply(lambda x: 18.0 if x == '<19' else x)
data["bmi"] = data["bmi"].apply(lambda x: 26.0 if x == '>=25' else x)  # it belongs to overweight
data["bmi"] = data["bmi"].apply(lambda x: 31 if x == '>=30' else x)
data['bmi'] = data.bmi.apply(lambda bmi: 'Underweight' if bmi < 18.5 else ('Normal' if bmi < 25 else ('Overweight' if bmi < 30 else 'Obese')))
data

Unnamed: 0,id,date,ecg,heart_rate_alert,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,deep_sleep_breathing_rate,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2ff067b776a2403eb737,2021-12-22,NSR,NONE,33.737162,,,,,,...,,,,,,,,,,TIRED
1,621e2ff067b776a2403eb737,2021-12-22,NSR,NONE,33.737162,,,,,,...,,,,,,,,,,TIRED
2,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,<no-response>
3,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,TIRED
4,621e2ff067b776a2403eb737,2021-11-18,,,34.946341,,,,,,...,,,,,,,,,,RESTED/RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14430,621e362467b776a2404ad513,2021-05-18,,,,,,,,,...,,,,,,,,,,HAPPY
14431,621e362467b776a2404ad513,2021-05-23,,,,,,,,,...,,,,,,,,,,HAPPY
14432,621e36f967b776a240e5e7c9,2021-05-20,,,,,,,,,...,,,,,,,,,,RESTED/RELAXED
14433,621e36f967b776a240e5e7c9,2021-05-20,,,,,,,,,...,,,,,,,,,,TIRED


#### 2. ecg

In [13]:
data['ecg'].value_counts()

NSR               110
UNCLASSIFIABLE      3
Name: ecg, dtype: int64

In [14]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'ecg'], np.ndarray):
            data.at[index, 'ecg'] = 'UNCLASSIFIABLE'
data['ecg'].value_counts()

NSR               110
UNCLASSIFIABLE      3
Name: ecg, dtype: int64

#### 3. heart_rate_alert

In [15]:
data['heart_rate_alert'].value_counts()

NONE      110
LOW_HR      3
Name: heart_rate_alert, dtype: int64

In [16]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'heart_rate_alert'], np.ndarray):
            data.at[index, 'heart_rate_alert'] = 'LOW_HR'
data['heart_rate_alert'].value_counts()

NONE      110
LOW_HR      3
Name: heart_rate_alert, dtype: int64

#### 4. nightly_temperature

In [17]:
data['nightly_temperature'].value_counts()

33.608571    4
33.373355    4
34.145812    4
34.617691    3
33.669627    3
            ..
33.890873    1
32.328015    1
32.493277    1
32.775364    1
34.571786    1
Name: nightly_temperature, Length: 2908, dtype: int64

In [18]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'nightly_temperature'], np.ndarray):
            mean_value = np.nanmean(data.at[index, 'nightly_temperature'])
            data.at[index, 'nightly_temperature'] = mean_value

In [10]:
data.to_pickle('../data/preprocessing_final/labeling_df_daily_processed.pkl')