# Fitbit App
### In this app I am requesting data from the Fitbit API. I need a new access token every 8 hours. I am looking at the daily averages of metrics over a time period `_Interval` and the intra-day values on particular days `_Series1`

### Importing the relevant libraries

In [185]:
import requests
import time
import numpy as np
import oauth2 as oauth2
from pprint import pprint
import json
import matplotlib.pyplot as plt
import datetime
import selenium
from selenium import webdriver
import pandas as pd
from datetime import datetime, timedelta
import warnings
import configparser

warnings.filterwarnings("ignore")
todaystr = str(datetime.today()).split()[0].replace('-','_')
olddate = "2023_09_08"

# Get my private keys ready
config = configparser.ConfigParser()


### Credentials

In [186]:
config.read('config.ini')

f_config = config['fitbit app']
user_id = f_config.get('user_id')

f_config = config['auth site']
client_id = f_config.get('client_id')
scope = f_config.get('scope')
code_challenge = f_config.get('code_challenge')
state = f_config.get('state')

f_config = config['credentials']
username = f_config.get('username')
password = f_config.get('password')

# Authorization

In [187]:
driver = webdriver.Chrome()
driver.get(f"https://www.fitbit.com/oauth2/authorize?response_type=code&client_id={client_id}&scope={scope}&code_challenge={code_challenge}&code_challenge_method=S256&state={state}&redirect_uri=http%3A%2F%2Flocalhost")
driver.implicitly_wait(5)

usrnm = driver.find_element("id","ember591").send_keys(username)
pwd = driver.find_element("id", "ember592").send_keys(password)
loginbutton = driver.find_element("id", "ember632").click()

In [189]:
redirect = driver.current_url
auth_code = redirect.split('=')[1].split('&')[0]
state = redirect.split('=')[2].split('#')[0]

url = "https://api.fitbit.com/oauth2/token"
headers = {
    "Authorization": "Basic MjM5NVFDOjc0NGE1MjlmYWI5YTZmNjlkZGY4MDc1YTY0N2M4OWI1",
    "Content-Type": "application/x-www-form-urlencoded"
}


data = {
    "client_id": {client_id},
    "grant_type": "authorization_code",
    "redirect_uri": "http://localhost",
    "code": {auth_code},
    "code_verifier": "593p3z6g2o2f1n3e4d6x514y0n110h4y715c2d4u4e4" \
                    "7400g0m5e2s5u524u1a6i6c4m521d5s1s5r2k36693v" \
                    "2i6j730x4c351e1q0v5j4u3d1y490l6n1r4o3v5t3r"
}

response = requests.post(url, headers=headers, data=data)
print(response.status_code)
#print(response.json())

access_token = response.json()['access_token']
refresh_token = response.json()['refresh_token']
header = {'Authorization' : 'Bearer ' + access_token}

200


# HRV

In [190]:
# Initialize an empty DataFrame
df_hrv = pd.DataFrame(columns=['date', 'dailyrmssd', 'deeprmssd'])

# Set the start date and end date for your data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

# Define the period length (30 days)
period_length = timedelta(days=30)

# Make requests and append data to the DataFrame for each period
while start_date <= end_date:
    # Define the end date for the current period
    current_end_date = start_date + period_length - timedelta(days=1)
    
    # Format the date strings
    start_date_str = start_date.strftime('%Y-%m-%d')
    current_end_date_str = current_end_date.strftime('%Y-%m-%d')
    
    # Make the API request
    response = requests.get(f'https://api.fitbit.com/1/user/-/hrv/date/{start_date_str}/{current_end_date_str}.json', headers=header)
    
    if response.status_code == 200:
        data = response.json()
        df_data = [{'date': item['dateTime'], 'dailyrmssd': item['value']['dailyRmssd'], 'deeprmssd': item['value']['deepRmssd']} for item in data['hrv']]
        df_hrv = df_hrv.append(df_data, ignore_index=True)
    
    # Move to the next period
    start_date += period_length

In [191]:
old_data = pd.read_csv(f"data/df_hrv_{olddate}.csv")
df_hrv = pd.concat([old_data,df_hrv],ignore_index=True).drop_duplicates()
df_hrv['date'] = pd.to_datetime(df_hrv['date'])

In [192]:
df_hrv.to_csv(f"data/df_hrv_{todaystr}.csv",header=True,index=False)
df_hrv

Unnamed: 0,date,dailyrmssd,deeprmssd
0,2021-03-18,44.818,0.000
1,2021-03-19,78.079,0.000
2,2021-03-20,60.329,0.000
3,2021-03-21,71.591,0.000
4,2021-03-22,89.445,0.000
...,...,...,...
975,2023-11-21,52.772,50.974
976,2023-11-22,61.377,56.284
977,2023-11-23,77.376,74.285
978,2023-11-24,91.834,88.949


# RHR Data

In [193]:
# Initialize an empty DataFrame
df_rhr = pd.DataFrame(columns=['date', 'restingHeartRate'])

# Define the start and end dates for data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

# Define the interval (1 year)
interval = timedelta(days=364)

# Make requests and add data to the DataFrame for each year
while start_date <= end_date:
    # Define the end date for the current year
    current_end_date = start_date + interval - timedelta(days=1)
    
    # Format the date strings
    start_date_str = start_date.strftime('%Y-%m-%d')
    current_end_date_str = current_end_date.strftime('%Y-%m-%d')
    # Make the API request
    response = requests.get(url=f"https://api.fitbit.com/1/user/-/activities/heart/date/{start_date_str}/{current_end_date_str}.json", headers=header)
    
    if response.status_code == 200:
        #print(f"all good for {start_date_str} to {current_end_date_str}")
        data = response.json()
        for item in data['activities-heart']:
            if 'restingHeartRate' in item['value']:
                rhr = item['value']['restingHeartRate']
                date = item['dateTime']
                df_rhr = df_rhr.append({'date': date, 'restingHeartRate': rhr}, ignore_index=True)
    
    # Move to the next year
    start_date += interval

In [194]:
old_data = pd.read_csv(f"data/df_rhr_{olddate}.csv")
df_rhr = pd.concat([old_data,df_rhr],ignore_index=True).drop_duplicates()
df_rhr['date'] = pd.to_datetime(df_rhr['date'])

In [195]:

df_rhr.to_csv(f"data/df_rhr_{todaystr}.csv",header=True,index=False)
df_rhr

Unnamed: 0,date,restingHeartRate
0,2016-12-25,66
1,2016-12-26,60
2,2016-12-27,60
3,2016-12-28,62
4,2016-12-29,62
...,...,...
2507,2023-11-21,61
2508,2023-11-22,59
2509,2023-11-23,58
2510,2023-11-24,56


# Breathing

In [196]:
# Initialize an empty DataFrame
df_br = pd.DataFrame(columns=['date', 'breathingRate'])

# Set the start date and end date for your data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

# Define the period length (30 days)
period_length = timedelta(days=30)

# Make requests and append data to the DataFrame for each period
while start_date <= end_date:
    # Define the end date for the current period
    current_end_date = start_date + period_length - timedelta(days=1)
    
    # Format the date strings
    start_date_str = start_date.strftime('%Y-%m-%d')
    current_end_date_str = current_end_date.strftime('%Y-%m-%d')
    
    # Make the API request
    response = requests.get(f'https://api.fitbit.com/1/user/-/br/date/{start_date_str}/{current_end_date_str}.json', headers=header)
    
    if response.status_code == 200:
        data = response.json()
        df_data = [{'date': item['dateTime'], 'breathingRate': item['value']['breathingRate']} for item in data['br']]
        df_br = df_br.append(df_data, ignore_index=True)
    
    # Move to the next period
    start_date += period_length



In [197]:
old_data = pd.read_csv(f"data/df_br_{olddate}.csv")
df_br = pd.concat([old_data,df_br],ignore_index=True).drop_duplicates()
df_br['date'] = pd.to_datetime(df_br['date'])

In [198]:
df_br.to_csv(f"data/df_br_{todaystr}.csv",header=True,index=False)
df_br


Unnamed: 0,date,breathingRate
0,2021-03-18,13.6
1,2021-03-19,15.6
2,2021-03-20,16.8
3,2021-03-21,16.0
4,2021-03-22,14.8
...,...,...
973,2023-11-21,17.4
974,2023-11-22,16.6
975,2023-11-23,17.2
976,2023-11-24,16.4


# vo2 

In [199]:
# Initialize an empty DataFrame
df_cardioscore = pd.DataFrame(columns=['date', 'vo2Max'])

# Set the start date and end date for your data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

# Define the period length (30 days)
period_length = timedelta(days=30)

# Make requests and append data to the DataFrame for each period
while start_date <= end_date:
    # Define the end date for the current period
    current_end_date = start_date + period_length - timedelta(days=1)
    
    # Format the date strings
    start_date_str = start_date.strftime('%Y-%m-%d')
    current_end_date_str = current_end_date.strftime('%Y-%m-%d')
    
    # Make the API request
    response = requests.get(f'https://api.fitbit.com/1/user/-/cardioscore/date/{start_date_str}/{current_end_date_str}.json', headers=header)
    
    if response.status_code == 200:
        data = response.json()
        df_data = [{'date': item['dateTime'], 'vo2Max': item['value']['vo2Max']} for item in data['cardioScore']]
        df_cardioscore = df_cardioscore.append(df_data, ignore_index=True)
    
    # Move to the next period
    start_date += period_length



In [200]:
old_data = pd.read_csv(f"data/df_cardioscore_{olddate}.csv")
df_cardioscore = pd.concat([old_data,df_cardioscore],ignore_index=True).drop_duplicates()
df_cardioscore['date'] = pd.to_datetime(df_cardioscore['date'])

In [201]:
df_cardioscore.to_csv(f"data/df_cardioscore_{todaystr}.csv",header=True,index=False)
df_cardioscore

Unnamed: 0,date,vo2Max
0,2016-12-25,41-45
1,2016-12-26,42-46
2,2016-12-27,42-46
3,2016-12-28,42-46
4,2016-12-29,42-46
...,...,...
2485,2023-11-02,39
2486,2023-11-03,38
2487,2023-11-04,38
2488,2023-11-05,38


# Spo2

In [261]:
# Initialize an empty DataFrame
df_spo2 = pd.DataFrame(columns=['date', 'spo2avg'])

# Set the start date and end date for your data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

start_date_str = start_date.strftime('%Y-%m-%d')
current_end_date_str = current_end_date.strftime('%Y-%m-%d')

response = requests.get(f"https://api.fitbit.com/1/user/-/spo2/date/{start_date_str}/{current_end_date_str}.json",headers=header)
print(response.status_code)
if response.status_code == 200:
        data = response.json()
        df_data = [{'date': item['dateTime'], 'spo2avg': item['value']['avg']} for item in data]
        df_spo2 = df_spo2.append(df_data, ignore_index=True)

200


In [262]:
df_spo2

Unnamed: 0,date,spo2avg
0,2023-09-08,95.9
1,2023-09-09,97.2
2,2023-09-11,96.4
3,2023-09-12,94.6
4,2023-09-14,94.9
...,...,...
59,2023-11-21,92.6
60,2023-11-22,93.5
61,2023-11-23,92.8
62,2023-11-24,93.1


In [263]:
old_data = pd.read_csv(f"data/df_spo2_{olddate}.csv")
df_spo2 = pd.concat([old_data,df_spo2],ignore_index=True).drop_duplicates()
df_spo2['date'] = pd.to_datetime(df_spo2['date'])

In [264]:
df_spo2.to_csv(f"data/df_spo2_{todaystr}.csv",header=True,index=False)
df_spo2

Unnamed: 0,date,spo2avg
0,2021-09-30,94.6
1,2021-10-03,94.4
2,2021-10-04,95.1
3,2021-10-06,95.9
4,2021-10-07,93.8
...,...,...
1205,2023-11-21,92.6
1206,2023-11-22,93.5
1207,2023-11-23,92.8
1208,2023-11-24,93.1


# Temperature

In [205]:
# Initialize an empty DataFrame
df_temp = pd.DataFrame(columns=['date', 'tempSkin'])

# Set the start date and end date for your data retrieval
start_date = datetime(2023, 9, 8)
end_date = datetime.today()

# Define the period length (30 days)
period_length = timedelta(days=30)

# Make requests and append data to the DataFrame for each period
while start_date <= end_date:
    # Define the end date for the current period
    current_end_date = start_date + period_length - timedelta(days=1)
    
    # Format the date strings
    start_date_str = start_date.strftime('%Y-%m-%d')
    current_end_date_str = current_end_date.strftime('%Y-%m-%d')
    
    # Make the API request
    response = requests.get(f'https://api.fitbit.com/1/user/-/temp/skin/date/{start_date_str}/{current_end_date_str}.json', headers=header)
    
    if response.status_code == 200:
        data = response.json()
        df_data = [{'date': item['dateTime'], 'tempSkin': item['value']['nightlyRelative']} for item in data['tempSkin']]
        df_temp = df_temp.append(df_data, ignore_index=True)
    
    # Move to the next period
    start_date += period_length



In [206]:
old_data = pd.read_csv(f"data/df_temp_{olddate}.csv")
df_temp = pd.concat([old_data,df_temp],ignore_index=True).drop_duplicates()
df_temp['date'] = pd.to_datetime(df_temp['date'])

In [207]:
df_temp.to_csv(f"data/df_temp_{todaystr}.csv",header=True,index=False)
df_temp

Unnamed: 0,date,tempSkin
0,2021-03-20,-1.1
1,2021-03-21,-0.0
2,2021-03-22,-1.2
3,2021-03-23,-0.1
4,2021-03-24,-2.1
...,...,...
972,2023-11-21,0.4
973,2023-11-22,0.2
974,2023-11-23,0.9
975,2023-11-24,0.7


# Merge

In [None]:
df_hrv['date'] = pd.to_datetime(df_hrv['date'])
df_rhr['date'] = pd.to_datetime(df_rhr['date'])
df_merge = pd.merge(df_hrv, df_rhr, on='date', how='right')
df_merge

Unnamed: 0,date,dailyrmssd,deeprmssd,restingHeartRate
0,2016-12-25,,,66
1,2016-12-26,,,60
2,2016-12-27,,,60
3,2016-12-28,,,62
4,2016-12-29,,,62
...,...,...,...,...
2506,2023-11-21,52.772,50.974,61
2507,2023-11-22,61.377,56.284,59
2508,2023-11-23,77.376,74.285,58
2509,2023-11-24,91.834,88.949,56


In [None]:
import plotly.express as px


# Calculate the date one year ago from today
one_year_ago = datetime.today() - timedelta(days=365)

# Filter the DataFrame to include only the past year's data
recent_data = df_merge[df_merge['date'] >= one_year_ago]

# Create a Plotly figure
fig = px.line(recent_data, x='date', y='dailyrmssd', title='Daily RMSSD Over the Past Year')

# Customize the hover behavior
fig.update_traces(mode='lines+markers', hovertemplate='Date: %{x}<br>Daily RMSSD: %{y}')

# Set the x-axis title
fig.update_xaxes(title_text='Date')

# Set the y-axis title
fig.update_yaxes(title_text='Daily RMSSD')

# Customize the y-axis range
fig.update_yaxes(range=[20, 100])

# Show the interactive plot
fig.show()

In [None]:
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime, timedelta

# Assuming you have df_hrv (HRV data) and df_rhr (RHR data) DataFrames

# Calculate the date one year ago from today
one_year_ago = datetime.today() - timedelta(days=365)

# Filter the HRV data for the past year
recent_hrv_data = df_hrv[df_hrv['date'] >= one_year_ago]

# Filter the RHR data for the past year
recent_rhr_data = df_rhr[df_rhr['date'] >= one_year_ago]

# Create a Plotly figure with two subplots
fig = go.Figure()

# Add the HRV data as a line plot
fig.add_trace(go.Scatter(x=recent_hrv_data['date'], y=recent_hrv_data['dailyrmssd'], mode='lines+markers', name='Daily RMSSD'))

# Add the RHR data as a second line plot
fig.add_trace(go.Scatter(x=recent_rhr_data['date'], y=recent_rhr_data['restingHeartRate'], mode='lines+markers', name='Resting Heart Rate'))

# Customize the hover behavior for both traces
fig.update_traces(hovertemplate='Date: %{x}<br>%{y}', hoverinfo='x+y', selector=dict(type='scatter'))

# Set the x-axis title
fig.update_xaxes(title_text='Date')

# Set the y-axis title
fig.update_yaxes(title_text='Values')

# Set the title for the entire plot
fig.update_layout(title='HRV and Resting Heart Rate Over the Past Year')

# Customize the y-axis range
fig.update_yaxes(range=[40, 80])

# Show the interactive plot
fig.show()


In [None]:
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime, timedelta

# Assuming you have df_hrv (HRV data) and df_rhr (RHR data) DataFrames
# Also assuming you have df_hrv with 'Start_Date' and 'Cycle_Length' columns

# Calculate the date one year ago from today
one_year_ago = datetime.today() - timedelta(days=365*3)

# Filter the HRV data for the past year
recent_hrv_data = df_hrv[df_hrv['date'] >= one_year_ago]

# Filter the RHR data for the past year
recent_rhr_data = df_rhr[df_rhr['date'] >= one_year_ago]

# Filter the RHR data for the past year
recent_cycle_data = cycle[cycle['Start_Datetime'] >= one_year_ago]

# Create a Plotly figure with two subplots
fig = go.Figure()

# Add the HRV data as a line plot
fig.add_trace(go.Scatter(x=recent_hrv_data['date'], y=recent_hrv_data['dailyrmssd'], mode='lines', name='Daily RMSSD'))

# Add the RHR data as a second line plot
fig.add_trace(go.Scatter(x=recent_rhr_data['date'], y=recent_rhr_data['restingHeartRate'], mode='lines', name='Resting Heart Rate'))

# Add the menstrual cycle length data as markers
fig.add_trace(go.Scatter(x=recent_cycle_data['Start_Datetime'], y=recent_cycle_data['Cycle_Length'], mode='markers+lines', name='Cycle Length'))

# Customize the hover behavior for all traces
fig.update_traces(hovertemplate='Date: %{x}<br>%{y}', hoverinfo='x+y', selector=dict(type='scatter'))

# Set the x-axis title
fig.update_xaxes(title_text='Date')

# Set the y-axis title
fig.update_yaxes(title_text='Values')

# Set the title for the entire plot
fig.update_layout(title='Cycle Length Over the Past Year')

# Customize the y-axis range
fig.update_yaxes(range=[20, 90])



# Show the interactive plot
fig.show()


NameError: name 'cycle' is not defined

In [None]:
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler

# Assuming you have df_hrv (HRV data) and df_rhr (RHR data) DataFrames
# Also assuming you have df_hrv with 'Start_Date' and 'Cycle_Length' columns

# Calculate the date one year ago from today
one_year_ago = datetime.today() - timedelta(days=365*3)

# Filter the HRV data for the past year
recent_hrv_data = df_hrv[df_hrv['date'] >= one_year_ago]

# Filter the RHR data for the past year
recent_rhr_data = df_rhr[df_rhr['date'] >= one_year_ago]

# Filter the RHR data for the past year
recent_cycle_data = cycle[cycle['Start_Datetime'] >= one_year_ago]

# Normalize the HRV and RHR data
scaler = MinMaxScaler()
recent_hrv_data['dailyrmssd'] = scaler.fit_transform(recent_hrv_data[['dailyrmssd']])
recent_rhr_data['restingHeartRate'] = scaler.fit_transform(recent_rhr_data[['restingHeartRate']])

# Normalize the Cycle Length data
recent_cycle_data['Cycle_Length'] = scaler.fit_transform(recent_cycle_data[['Cycle_Length']])

# Create a Plotly figure with two subplots
fig = go.Figure()

# Add the HRV data as a line plot
#fig.add_trace(go.Scatter(x=recent_hrv_data['date'], y=recent_hrv_data['dailyrmssd'], mode='lines', name='Daily RMSSD'))

# Add the RHR data as a second line plot
fig.add_trace(go.Scatter(x=recent_rhr_data['date'], y=recent_rhr_data['restingHeartRate'], mode='lines', name='Resting Heart Rate'))

# Add the menstrual cycle length data as markers
fig.add_trace(go.Scatter(x=recent_cycle_data['Start_Datetime'], y=recent_cycle_data['Cycle_Length'], mode='markers+lines', name='Cycle Length'))

# Customize the hover behavior for all traces
fig.update_traces(hovertemplate='Date: %{x}<br>%{y}', hoverinfo='x+y', selector=dict(type='scatter'))

# Set the x-axis title
fig.update_xaxes(title_text='Date')

# Set the y-axis title
fig.update_yaxes(title_text='Values')

# Set the title for the entire plot
fig.update_layout(title='Normalized HRV, Resting Heart Rate, and Cycle Length Over the Past Year')

# Customize the y-axis range
fig.update_yaxes(range=[0, 1])

# Show the interactive plot
fig.show()


In [None]:
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime, timedelta

# Assuming you have df_hrv (HRV data) and df_rhr (RHR data) DataFrames
# Also assuming you have df_hrv with 'Start_Date' and 'Cycle_Length' columns

# Calculate the date one year ago from today
years = 6
one_year_ago = datetime.today() - timedelta(days=365*years)


# Filter the RHR data for the past year
recent_cycle_data = cycle[cycle['Start_Datetime'] >= one_year_ago]

# Create a Plotly figure with two subplots
fig = go.Figure()


# Add the menstrual cycle length data as markers
fig.add_trace(go.Scatter(x=recent_cycle_data['Start_Datetime'], y=recent_cycle_data['Cycle_Length'], mode='markers+lines', name='Cycle Length'))

# Customize the hover behavior for all traces
fig.update_traces(hovertemplate='Date: %{x}<br>%{y}', hoverinfo='x+y', selector=dict(type='scatter'))

# Set the x-axis title
fig.update_xaxes(title_text='Date')

# Set the y-axis title
fig.update_yaxes(title_text='Values')

# Set the title for the entire plot
fig.update_layout(title=f'Cycle Length Over the Past {years} Years')

# Customize the y-axis range
fig.update_yaxes(range=[20, 50])

fig.update_layout(
    shapes=[
        dict(
            type="line",
            x0="2022-07-01",
            x1="22-07-01",
            y0=0,
            y1=1,
            xref="x",
            yref="paper",
            line=dict(color="red", width=2, dash="dash"),
        )
    ]
)

# Show the interactive plot
fig.show()


# Menstrual

In [208]:
menstrual = pd.read_csv("data/menstrual_tracking.csv")
menstrual.head()

Unnamed: 0,Start_Date,Start_Datetime,Cycle_Length
0,15/10/2023,2023-10-15,37.0
1,21/11/2023,2023-11-21,37.0
2,08/09/2023,2023-09-08,33.0
3,06/08/2023,2023-08-06,48.0
4,19/06/2023,2023-06-19,35.0


# Add any new cycles
new_row = {"Start_Date":"15/10/2023"}
new_row_df = pd.DataFrame([new_row])
menstrual = pd.concat([new_row_df,menstrual])
menstrual.head()

In [209]:
menstrual['Start_Datetime'] = pd.to_datetime(menstrual['Start_Date'], format='%d/%m/%Y')
menstrual['Cycle_Length'] = menstrual['Start_Datetime'].sort_values().diff().dt.days.abs()
menstrual.reset_index(inplace=True,drop=True)
menstrual.head()

Unnamed: 0,Start_Date,Start_Datetime,Cycle_Length
0,15/10/2023,2023-10-15,37.0
1,21/11/2023,2023-11-21,37.0
2,08/09/2023,2023-09-08,33.0
3,06/08/2023,2023-08-06,48.0
4,19/06/2023,2023-06-19,35.0


In [210]:
menstrual.to_csv("data/menstrual_tracking.csv",header=True,index=False)

# Putting it all together

In [265]:
start_date = menstrual.Start_Datetime.min() # This will be our start date

date_range = pd.date_range(start_date, end_date, freq='D') # All the days since

all_df = pd.DataFrame({'date':date_range}) # Create a dataframe out of it

all_df['day'] = pd.Series(dtype='float64')

# Iterate through rows and update 'Day' column
for index, row in all_df.iterrows():
    if any((row['date'] == menstrual['Start_Datetime']).values):
        all_df.at[index, 'day'] = 1
    else:
        all_df.at[index, 'day'] = all_df.at[index - 1, 'day'] + 1

# Convert 'Day' column to integer
all_df['day'] = all_df['day'].astype(int)

In [266]:
all_df = all_df.merge(df_temp,how='left',on='date').merge(df_hrv,how='left',on='date').merge(df_rhr,how='left',on='date').merge(df_br,how='left',on='date').merge(df_spo2,how='left',on='date')

In [267]:
all_df.head()

Unnamed: 0,date,day,tempSkin,dailyrmssd,deeprmssd,restingHeartRate,breathingRate,spo2avg
0,2017-04-25,1,,,,61,,
1,2017-04-26,2,,,,61,,
2,2017-04-27,3,,,,60,,
3,2017-04-28,4,,,,60,,
4,2017-04-29,5,,,,61,,


In [268]:
all_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2406 entries, 0 to 2405
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              2406 non-null   datetime64[ns]
 1   day               2406 non-null   int32         
 2   tempSkin          976 non-null    float64       
 3   dailyrmssd        979 non-null    float64       
 4   deeprmssd         979 non-null    float64       
 5   restingHeartRate  2391 non-null   object        
 6   breathingRate     977 non-null    float64       
 7   spo2avg           636 non-null    float64       
dtypes: datetime64[ns](1), float64(5), int32(1), object(1)
memory usage: 159.8+ KB


In [269]:
df = all_df.dropna()

In [270]:
df

Unnamed: 0,date,day,tempSkin,dailyrmssd,deeprmssd,restingHeartRate,breathingRate,spo2avg
1619,2021-09-30,25,1.5,80.894,76.424,57,15.4,94.6
1622,2021-10-03,28,0.1,57.908,46.137,58,14.2,94.4
1623,2021-10-04,29,0.5,65.453,58.184,58,15.0,95.1
1625,2021-10-06,31,0.7,57.619,50.259,58,15.6,95.9
1626,2021-10-07,32,-0.5,64.212,53.507,58,15.2,93.8
...,...,...,...,...,...,...,...,...
2401,2023-11-21,1,0.4,52.772,50.974,61,17.4,92.6
2402,2023-11-22,2,0.2,61.377,56.284,59,16.6,93.5
2403,2023-11-23,3,0.9,77.376,74.285,58,17.2,92.8
2404,2023-11-24,4,0.7,91.834,88.949,56,16.4,93.1


In [272]:
df.to_csv("ML_data.csv",header=True,index=False)