In [4]:
# Import necessary modules and functions
import os, datetime
import pandas as pd
from ETL.ETL_general import update_incremental, update_incremental_api, get_most_recent_date, export_to_gsheets
from ETL.ETL_garmin_api import init_garmin, get_garmin_data
from ETL.ETL_whoop import init_whoop, get_sleep_recovery_data, get_journal_data
from ETL.ETL_mfp_api import init_mfp, get_meal_data, get_meal_daily
from OLD.ETL_mfp_apple_matching import update_meal_schedule
from ETL.ETL_apple_health import get_food_time_data, get_weight_data
from ETL.ETL_libreview import get_glucose_daily, get_glucose_time
from ETL.ETL_trainingpeaks import get_tp_data

In [5]:
### Update data of intemediate clean files ###

# Apple health update (food times and weight)
apple_health_file_raw = 'Data/Apple health/exportacion.xml'
food_times_file = 'Data/Cleaned/Food.csv'
weight_file = 'Data/Cleaned/Weight.csv'
update_incremental(apple_health_file_raw, food_times_file, get_food_time_data)
update_incremental(apple_health_file_raw, weight_file, get_weight_data)

# MyFitnessPal API update
meals_file = 'Data/Cleaned/MFP meals scrapped.csv'
meals_daily_file = 'Data/Cleaned/MFP per day scrapped.csv'
meals_scheduled_file = 'Data/Cleaned/MealSchedule.csv'
mfp_client = init_mfp()
get_meal_data(mfp_client, meals_file)
get_meal_daily(mfp_client, meals_daily_file)
# Different format than the rest, directly writes

# Garmin update
garmin_file = 'Data/Cleaned/Garmin_daily.csv'
email_g = os.getenv("USERNAME_G")
password_g = os.getenv("PASSWORD_G")

garmin_client = init_garmin(email_g, password_g)
update_incremental_api(garmin_client, garmin_file, get_garmin_data)

# Glucose update
libreview_file_raw = 'Data/LibreLink/AlbertoRequena Izard_glucose.csv'
glucose_daily_file = 'Data/Cleaned/Glucose_daily.csv'
glucose_time_file = 'Data/Cleaned/Glucose.csv'
glucose_time_raw = 'Data/LibreLink/AlbertoRequena Izard_glucose.csv'
update_incremental(libreview_file_raw, glucose_daily_file, get_glucose_daily)
get_glucose_time(glucose_time_raw).to_csv(glucose_time_file, index=False)

# Whoop API update
whoop_file = 'Data/Cleaned/Sleep_and_recovery.csv'
journal_file_raw = 'Data/Whoop/journal_entries.csv'
journal_file = 'Data/Cleaned/Journal.csv'
email_w = os.getenv("USERNAME_W")
password_w = os.getenv("PASSWORD_W")
whoop_client = init_whoop(email_w, password_w)
update_incremental_api(whoop_client, whoop_file, get_sleep_recovery_data)
get_journal_data(journal_file_raw, journal_file) # Not incremental for now

# Trainingpeaks update
tp_file_raw = 'Data/TrainingPeaks/workouts.csv'
tp_file = 'Data/Cleaned/TSS metrics.csv'
last_date = get_most_recent_date(tp_file)
tp_data = get_tp_data(tp_file_raw)
tp_data.to_csv('Data/Cleaned/TSS metrics.csv', index=False)
print(f"{tp_file}: Data rewritten as a whole, updated from {last_date}")

print(f'Clean data files updated')

Data/Cleaned/Food.csv: Data from 'Data/Apple health/exportacion.xml' from 2024-07-15 obtained
Data/Cleaned/Food.csv: Data from 2024-07-15 (re-)written
Data/Cleaned/Weight.csv: Data from 'Data/Apple health/exportacion.xml' from 2024-06-29 obtained
Data/Cleaned/Weight.csv: Data from 2024-07-08 (re-)written
Data/Cleaned/MFP meals scrapped.csv: Data per meal obtained and (re-)written for 2024-07-15
Data/Cleaned/MFP per day scrapped.csv: Data per day obtained and (re-)written for 2024-07-14
Data/Cleaned/Garmin_daily.csv: Data from API from 2024-07-15 obtained
Data/Cleaned/Garmin_daily.csv: Data from 2024-07-15 (re-)written
Data/Cleaned/Glucose_daily.csv: Data from 'Data/LibreLink/AlbertoRequena Izard_glucose.csv' from 2024-07-15 obtained
Data/Cleaned/Glucose_daily.csv: Data from 2024-07-15 (re-)written
Data/Cleaned/Sleep_and_recovery.csv: Data from API from 2024-07-15 obtained
Data/Cleaned/Sleep_and_recovery.csv: Data from 2024-07-15 (re-)written
Data/Cleaned/Journal.csv: Journal data obtai

In [6]:
### Integrate into a single file ###

### Get all key dfs from Cleaned Data
import pandas as pd

df_t = pd.read_csv('Data/Cleaned/TSS metrics.csv')
df_s = pd.read_csv('Data/Cleaned/Sleep_and_recovery.csv')
df_f = pd.read_csv('Data/Cleaned/MFP per day scrapped.csv')
df_g = pd.read_csv('Data/Cleaned/Glucose_daily.csv')
df_gar = pd.read_csv('Data/Cleaned/Garmin_daily.csv')
df_j = pd.read_csv('Data/Cleaned/Journal.csv')
df_w = pd.read_csv('Data/Cleaned/Weight.csv')

# Filter out today from MFP per day scrapped
today = datetime.datetime.today().strftime('%Y-%m-%d')
df_f = df_f[df_f['date']!=today]

# Filter out TSS metrics from before we are reading Garmin data
df_t = df_t[df_t['date']>=df_gar['date'].min()]

# Print the min and the max date of each df
print('TSS metrics: ', df_t['date'].min(),' to ',df_t['date'].max())
print('Sleep and recovery: ',df_s['date'].min(),' to ',df_s['date'].max())
print('MFP per day scrapped: ',df_f['date'].min(),' to ',df_f['date'].max())
print('Glucose daily: ',df_g['date'].min(),' to ',df_g['date'].max())
print('Garmin daily: ',df_gar['date'].min(),' to ',df_gar['date'].max())
print('Journal: ',df_j['date'].min(),' to ',df_j['date'].max())
print('Weight: ',df_w['date'].min(),' to ',df_w['date'].max())

# Perform an outter join on all dfs
df = df_t.merge(df_s, on='date', how='outer')
df = df.merge(df_f, on='date', how='outer')
df = df.merge(df_g, on='date', how='outer')
df = df.merge(df_gar, on='date', how='outer')
df = df.merge(df_j, on='date', how='outer')
df = df.merge(df_w, on='date', how='outer')

# Save to CSV
df.to_csv('Data/Cleaned/Integrated_data.csv', index=False)
print('Integrated data file created: ',df['date'].min(),' to ',df['date'].max())

# Export DataFrame to Google Sheets with specified sheet name
sheet_name = 'Integrated_data'
export_to_gsheets(df, sheet_name)
print('Uploaded to Google Sheets, sheet name: ',sheet_name)

TSS metrics:  2024-03-16  to  2024-07-14
Sleep and recovery:  2024-03-16  to  2024-07-15
MFP per day scrapped:  2024-03-16  to  2024-07-14
Glucose daily:  2024-03-23  to  2024-07-15
Garmin daily:  2024-03-16  to  2024-07-15
Journal:  2024-03-14  to  2024-07-14
Weight:  2024-03-11  to  2024-07-08
Integrated data file created:  2024-03-11  to  2024-07-15
Uploaded to Google Sheets, sheet name:  Integrated_data
