##### This notebook preprocesses a subset of the labeling features and creates a slightly revised version of the labeling set to be visualized.
In more detail the features that are preprocessed are:
1. bmi
2. ecg
3. heart_rate_alert
4. nightly_temperature

In [3]:
import numpy as np
import pandas as pd

In [13]:
data = pd.read_pickle('../data/preprocessing_final/labeling_df_unprocessed.pkl')
data

Unnamed: 0,id,date,hour,ecg,heart_rate_alert,nightly_temperature,nremhr,spo2,rmssd,full_sleep_breathing_rate,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2ff067b776a2403eb737,2021-12-22,19,NSR,NONE,,,,,,...,,,,,,,,,,
1,621e2ff067b776a2403eb737,2021-11-18,0,,,35.02573,,,,,...,,,,,,,,,,
2,621e2ff067b776a2403eb737,2021-11-18,21,,,34.866951,,,,,...,,,,,,,,,,
3,621e2ff067b776a2403eb737,2021-11-20,0,,,35.349583,,,,,...,,,,,,,,,,
4,621e2ff067b776a2403eb737,2021-11-20,23,,,34.495486,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165086,621e36f967b776a240e5e7c9,2021-05-20,16,,,,,,,,...,,,,,,,,,,TIRED
165087,621e36f967b776a240e5e7c9,2021-05-20,20,,,,,,,,...,,,,,,,,,,RESTED/RELAXED
165088,621e36f967b776a240e5e7c9,2021-05-21,17,,,,,,,,...,,,,,,,,,,HAPPY
165089,621e36f967b776a240e5e7c9,2021-05-21,22,,,,,,,,...,,,,,,,,,,RESTED/RELAXED


#### 1. bmi

In [14]:
# bmi is converted into a categorical feature with 4 values (0: Underweight, 1: Normal, 2: Overweight, 3: Obese)
data['bmi'] = data['bmi'].fillna(data['bmi'].mode().iloc[0])
data["bmi"] = data["bmi"].apply(lambda x: 31.0 if x == '>=30' else x)
data["bmi"] = data["bmi"].apply(lambda x: 18.0 if x == '<19' else x)
data["bmi"] = data["bmi"].apply(lambda x: 26.0 if x == '>=25' else x)  # it belongs to overweight
data["bmi"] = data["bmi"].apply(lambda x: 31 if x == '>=30' else x)
data['bmi'] = data.bmi.apply(lambda bmi: 'Underweight' if bmi < 18.5 else ('Normal' if bmi < 25 else ('Overweight' if bmi < 30 else 'Obese')))
data

Unnamed: 0,id,date,hour,ecg,heart_rate_alert,nightly_temperature,nremhr,spo2,rmssd,full_sleep_breathing_rate,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2ff067b776a2403eb737,2021-12-22,19,NSR,NONE,,,,,,...,,,,,,,,,,
1,621e2ff067b776a2403eb737,2021-11-18,0,,,35.02573,,,,,...,,,,,,,,,,
2,621e2ff067b776a2403eb737,2021-11-18,21,,,34.866951,,,,,...,,,,,,,,,,
3,621e2ff067b776a2403eb737,2021-11-20,0,,,35.349583,,,,,...,,,,,,,,,,
4,621e2ff067b776a2403eb737,2021-11-20,23,,,34.495486,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165086,621e36f967b776a240e5e7c9,2021-05-20,16,,,,,,,,...,,,,,,,,,,TIRED
165087,621e36f967b776a240e5e7c9,2021-05-20,20,,,,,,,,...,,,,,,,,,,RESTED/RELAXED
165088,621e36f967b776a240e5e7c9,2021-05-21,17,,,,,,,,...,,,,,,,,,,HAPPY
165089,621e36f967b776a240e5e7c9,2021-05-21,22,,,,,,,,...,,,,,,,,,,RESTED/RELAXED


#### 2. ecg

In [15]:
data['ecg'].value_counts()

NSR                                 68
[UNCLASSIFIABLE, UNCLASSIFIABLE]     1
UNCLASSIFIABLE                       1
Name: ecg, dtype: int64

In [16]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'ecg'], np.ndarray):
            data.at[index, 'ecg'] = 'UNCLASSIFIABLE'
data['ecg'].value_counts()

NSR               68
UNCLASSIFIABLE     2
Name: ecg, dtype: int64

#### 3. heart_rate_alert

In [17]:
data['heart_rate_alert'].value_counts()

NONE              68
[NONE, LOW_HR]     1
LOW_HR             1
Name: heart_rate_alert, dtype: int64

In [18]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'heart_rate_alert'], np.ndarray):
            data.at[index, 'heart_rate_alert'] = 'LOW_HR'
data['heart_rate_alert'].value_counts()

NONE      68
LOW_HR     2
Name: heart_rate_alert, dtype: int64

#### 4. nightly_temperature

In [19]:
data['nightly_temperature'].value_counts()

32.702906    2
33.254289    2
34.022489    2
33.926760    2
33.300426    2
            ..
33.345872    1
34.409266    1
32.952437    1
33.654396    1
34.571786    1
Name: nightly_temperature, Length: 3292, dtype: int64

In [20]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'nightly_temperature'], np.ndarray):
            mean_value = np.nanmean(data.at[index, 'nightly_temperature'])
            data.at[index, 'nightly_temperature'] = mean_value

In [21]:
data.to_pickle('../data/preprocessing_final/labeling_df_processed.pkl')