##### This notebook preprocesses a subset of the labeling features and creates a slightly revised version of the labeling set to be visualized.
In more detail the features that are preprocessed are:
1. bmi
2. ecg
3. heart_rate_alert
4. nightly_temperature

In [37]:
import numpy as np
import pandas as pd

In [38]:
data = pd.read_pickle('../data/labeling_visualizations/kmeans_labeling.pkl')
data

Unnamed: 0,id,date,sleep_points,exertion_points,altitude,calories,lightly_active_minutes,moderately_active_minutes,sedentary_minutes,steps,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2e8e67b776a24055b564,2021-05-24 00:00:00,0.810469,0.622928,0.0,0.029382,0.254701,0.083045,0.495139,0.017563,...,,,,,,,,,,
1,621e2e8e67b776a24055b564,2021-05-24 01:00:00,0.810469,0.622928,0.0,0.002914,0.254701,0.083045,0.495139,0.000000,...,,,,,,,,,,
2,621e2e8e67b776a24055b564,2021-05-24 02:00:00,0.810469,0.622928,0.0,0.000729,0.254701,0.083045,0.495139,0.046184,...,,,,,,,,,,
3,621e2e8e67b776a24055b564,2021-05-24 03:00:00,0.810469,0.622928,0.0,0.012860,0.254701,0.083045,0.495139,0.002661,...,,,,,,,,,,
4,621e2e8e67b776a24055b564,2021-05-24 04:00:00,0.810469,0.622928,0.0,0.003315,0.254701,0.083045,0.495139,0.000000,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159779,621e375b67b776a240290cdc,2021-08-11 08:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159780,621e375b67b776a240290cdc,2021-08-11 09:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159781,621e375b67b776a240290cdc,2021-08-11 10:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159782,621e375b67b776a240290cdc,2021-08-11 11:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,


#### 1. bmi

In [39]:
# bmi is converted into a categorical feature with 4 values (0: Underweight, 1: Normal, 2: Overweight, 3: Obese)
data['bmi'] = data['bmi'].fillna(data['bmi'].mode().iloc[0])
data["bmi"] = data["bmi"].apply(lambda x: 31.0 if x == '>=30' else x)
data["bmi"] = data["bmi"].apply(lambda x: 18.0 if x == '<19' else x)
data["bmi"] = data["bmi"].apply(lambda x: 26.0 if x == '>=25' else x)  # it belongs to overweight
data["bmi"] = data["bmi"].apply(lambda x: 31 if x == '>=30' else x)
data['bmi'] = data.bmi.apply(lambda bmi: 'Underweight' if bmi < 18.5 else ('Normal' if bmi < 25 else ('Overweight' if bmi < 30 else 'Obese')))
data

Unnamed: 0,id,date,sleep_points,exertion_points,altitude,calories,lightly_active_minutes,moderately_active_minutes,sedentary_minutes,steps,...,negative_affect_score,stai_stress,ttm_stage,dramatic_relief_category,environmental_reevaluation_category,self_reevaluation_category,social_liberation_category,reinforcement_management_category,self_liberation_category,mood
0,621e2e8e67b776a24055b564,2021-05-24 00:00:00,0.810469,0.622928,0.0,0.029382,0.254701,0.083045,0.495139,0.017563,...,,,,,,,,,,
1,621e2e8e67b776a24055b564,2021-05-24 01:00:00,0.810469,0.622928,0.0,0.002914,0.254701,0.083045,0.495139,0.000000,...,,,,,,,,,,
2,621e2e8e67b776a24055b564,2021-05-24 02:00:00,0.810469,0.622928,0.0,0.000729,0.254701,0.083045,0.495139,0.046184,...,,,,,,,,,,
3,621e2e8e67b776a24055b564,2021-05-24 03:00:00,0.810469,0.622928,0.0,0.012860,0.254701,0.083045,0.495139,0.002661,...,,,,,,,,,,
4,621e2e8e67b776a24055b564,2021-05-24 04:00:00,0.810469,0.622928,0.0,0.003315,0.254701,0.083045,0.495139,0.000000,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159779,621e375b67b776a240290cdc,2021-08-11 08:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159780,621e375b67b776a240290cdc,2021-08-11 09:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159781,621e375b67b776a240290cdc,2021-08-11 10:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,
159782,621e375b67b776a240290cdc,2021-08-11 11:00:00,0.680095,0.720932,0.0,0.000383,0.000000,0.000000,0.490278,0.064983,...,,,,,,,,,,


#### 2. ecg

In [40]:
data['ecg'].value_counts()

NSR                                 68
[UNCLASSIFIABLE, UNCLASSIFIABLE]     1
Name: ecg, dtype: int64

In [41]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'ecg'], np.ndarray):
            data.at[index, 'ecg'] = 'UNCLASSIFIABLE'
data['ecg'].value_counts()

NSR               68
UNCLASSIFIABLE     1
Name: ecg, dtype: int64

#### 3. heart_rate_alert

In [42]:
data['heart_rate_alert'].value_counts()

NONE              68
[NONE, LOW_HR]     1
Name: heart_rate_alert, dtype: int64

In [43]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'heart_rate_alert'], np.ndarray):
            data.at[index, 'heart_rate_alert'] = 'LOW_HR'
data['heart_rate_alert'].value_counts()

NONE      68
LOW_HR     1
Name: heart_rate_alert, dtype: int64

#### 4. nightly_temperature

In [44]:
data['nightly_temperature'].value_counts()

34.467719    3
34.352998    3
34.105792    3
34.277305    3
34.447053    2
            ..
34.586139    1
34.080868    1
33.900247    1
34.636734    1
34.571786    1
Name: nightly_temperature, Length: 3209, dtype: int64

In [45]:
for index, row in data.iterrows():
        if isinstance(data.at[index, 'nightly_temperature'], np.ndarray):
            mean_value = np.nanmean(data.at[index, 'nightly_temperature'])
            data.at[index, 'nightly_temperature'] = mean_value

In [46]:
data.to_pickle('../data/labeling_visualizations/kmeans_labeling_processed.pkl')