##### This notebook collects all the data from MongoDB and creates the corresponding dataframe.

In [21]:
import os
import warnings
import datetime
import pandas as pd
from dotenv import load_dotenv
from pymongo import MongoClient
from functions import data_loading

warnings.filterwarnings("ignore")

Connect securely to the database

In [22]:
load_dotenv("config.env")
MONGO_USER = os.getenv("MONGO_USER")
MONGO_PASSWORD = os.getenv("MONGO_PASSWORD")
client = MongoClient("mongodb://" + MONGO_USER + ":" + MONGO_PASSWORD + "@localhost:27017/")
db = client.rais

#### Fitbit data

Find all users provided their Fitbit data

In [23]:
users = db.fitbit.distinct('id')
print(len(users), "users provided their Fitbit data")

71 users provided their Fitbit data


Find all the data types

In [24]:
types = db.fitbit.distinct('type')
types

['Afib ECG Readings',
 'Computed Temperature',
 'Daily Heart Rate Variability Summary',
 'Daily SpO2',
 'Device Temperature',
 'Heart Rate Variability Details',
 'Heart Rate Variability Histogram',
 'Profile',
 'Respiratory Rate Summary',
 'Stress Score',
 'Wrist Temperature',
 'altitude',
 'badge',
 'calories',
 'demographic_vo2_max',
 'distance',
 'estimated_oxygen_variation',
 'exercise',
 'heart_rate',
 'journal_entries',
 'lightly_active_minutes',
 'mindfulness_eda_data_sessions',
 'mindfulness_goals',
 'mindfulness_sessions',
 'moderately_active_minutes',
 'resting_heart_rate',
 'sedentary_minutes',
 'sleep',
 'steps',
 'time_in_heart_rate_zones',
 'very_active_minutes',
 'water_logs']

##### Afib ECG Readings

In [25]:
ecg = pd.DataFrame(columns=["id", "data"])

# read and load from MongoDB
for user in users:
    user_data = pd.DataFrame(list(
        db.fitbit.find({"$and": [
            {"type": "Afib ECG Readings"},
            {"id": user}]},
            {"id": 1, "data.reading_time": 1, "data.result_classification": 1, "data.heart_rate_alert": 1, "_id": 0})))
    ecg = pd.concat([ecg, user_data], axis=0)

# split data column (json format) into two columns (df format)
ecg["date"] = ecg["data"].apply(lambda d: d["reading_time"])
ecg["ecg"] = ecg["data"].apply(lambda d: d["result_classification"])
ecg["heart_rate_alert"] = ecg["data"].apply(lambda d: d["heart_rate_alert"])
ecg.drop(["data"], inplace=True, axis=1)

# process the datetime object, group and aggregate the data
ecg = data_loading.date_conversion(ecg)
ecg = data_loading.aggregate_column(ecg, list(ecg.columns))

# merge with the final dataframe
df = ecg
df

Unnamed: 0,id,date,hour,ecg,heart_rate_alert
0,621e2ff067b776a2403eb737,2021-12-22,19,NSR,NONE
1,621e301367b776a24057738e,2021-06-08,21,NSR,NONE
2,621e312a67b776a240164d59,2021-10-07,17,NSR,NONE
3,621e312a67b776a240164d59,2021-10-10,20,NSR,NONE
4,621e326767b776a24012e179,2021-07-22,17,UNCLASSIFIABLE,NONE
...,...,...,...,...,...
65,621e351a67b776a240f6204b,2021-07-22,9,NSR,NONE
66,621e351a67b776a240f6204b,2021-08-04,1,NSR,NONE
67,621e351a67b776a240f6204b,2021-08-10,10,NSR,NONE
68,621e36dd67b776a240ce9a45,2021-05-24,13,NSR,NONE


##### Computed Temperature

In [26]:
nightly_temperature = pd.DataFrame(columns=["id", "data"])

# read and load from MongoDB
for user in users:
    user_data = pd.DataFrame(list(
        db.fitbit.find({"$and": [
            {"type": "Computed Temperature"},
            {"id": user}]},
            {"id": 1, "data.sleep_start": 1, "data.type": 1, "data.nightly_temperature": 1, "_id": 0})))
    nightly_temperature = pd.concat([nightly_temperature, user_data], axis=0)

# split data column (json format) into two columns (df format)
nightly_temperature["date"] = nightly_temperature["data"].apply(lambda d: d["sleep_start"])
nightly_temperature["type"] = nightly_temperature["data"].apply(lambda d: d["type"])
nightly_temperature["nightly_temperature"] = nightly_temperature["data"].apply(lambda d: d["nightly_temperature"])
nightly_temperature.drop(["data"], inplace=True, axis=1)

# process the datetime object, feature types and group and aggregate the data
nightly_temperature = data_loading.date_conversion(nightly_temperature)
nightly_temperature['nightly_temperature'] = pd.to_numeric(nightly_temperature['nightly_temperature'])
nightly_temperature = data_loading.aggregate_column(nightly_temperature, list(nightly_temperature.columns))

# merge with the final dataframe
df = df.merge(nightly_temperature, how='outer', on=['id', 'date', 'hour'])
df

Unnamed: 0,id,date,hour,ecg,heart_rate_alert,type,nightly_temperature
0,621e2ff067b776a2403eb737,2021-12-22,19,NSR,NONE,,
1,621e301367b776a24057738e,2021-06-08,21,NSR,NONE,,
2,621e312a67b776a240164d59,2021-10-07,17,NSR,NONE,,
3,621e312a67b776a240164d59,2021-10-10,20,NSR,NONE,,
4,621e326767b776a24012e179,2021-07-22,17,UNCLASSIFIABLE,NONE,,
...,...,...,...,...,...,...,...
3487,621e375b67b776a240290cdc,2021-07-24,0,,,SKIN,33.687826
3488,621e375b67b776a240290cdc,2021-07-25,1,,,SKIN,34.112386
3489,621e375b67b776a240290cdc,2021-07-26,0,,,SKIN,33.895137
3490,621e375b67b776a240290cdc,2021-07-27,0,,,SKIN,33.758319
