# <center> MyFitnessPal data pipeline

In this project, I am attempting to create a data pipeline to fetch my daily data logged on to myfitness pal for tracking calories (macro and micro nutrients). There is already a package developed by Adam Coddington aka [@coddingtonbear](https://github.com/coddingtonbear/python-myfitnesspal). I will be using that package directly instead of reinventing the wheel

In [81]:
# importing all the dependencies
import myfitnesspal
import datetime
from datetime import date, timedelta
import pandas as pd
import os
import numpy as np


os.chdir('/Users/jayrajparmar/Documents/side_project/health_data_tracking')
import basic_methods as bm

In [25]:
mfp_exec = pd.read_csv('/Users/jayrajparmar/Documents/side_project/health_data_tracking/myfitnesspal/File-Export-2017-09-22-to-2022-06-01/Exercise-Summary-2017-09-22-to-2022-06-01.csv')
mfp_nutrition = pd.read_csv('/Users/jayrajparmar/Documents/side_project/health_data_tracking/myfitnesspal/File-Export-2017-09-22-to-2022-06-01/Nutrition-Summary-2017-09-22-to-2022-06-01.csv')


In [66]:
# Function to clean column names, convert date to datetime and delete Remarks field
def data_prep(df, date_col):
#     # Column name cleaning
    d = {' ':'_',
         '(':'',
         ')':''}
    df.columns = [i.replace(' ','_').replace('(','').replace(')','') for i in list(df)]
    print(df.columns)
    df = bm.datetime_conversion(df, [date_col])
    df = df.fillna(-1)
    return df


# Creating a function to convert features to numeric and extracting just date from datetime
def date_numeric_groupby(df, date_col):
    
    """First part is for converting datatypes to appropriate type for aggregation without error
       Second part is for removing multiple entries for same dates"""
    df = df.convert_dtypes()
    try:
        lis=[]
        for i in df.columns:
            
            if df[i].dtypes == 'datetime64[ns]':
                lis.append(df[i].dt.date)
                print(str(i)+str('(')+str(df[i].dtypes)+str(')')+str(':')+'Done')
            elif df[i].dtypes == str:
                lis.append(df[i].astype(str))
                print(str(i)+str('(')+str(df[i].dtypes)+str(')')+str(':')+'Done')
            else: 
                lis.append(df[i].apply(pd.to_numeric, errors='coerce'))
                print(str(i)+str('(')+str(df[i].dtypes)+str(')')+str(': ')+'Done')
    except:
        print("There is an unknown datatype in the data!!")
    df_temp = pd.concat(lis, axis=1)
    print('************************************************************************************')
    print('************************************************************************************')
    print('*****************************Datatypes after conversion*****************************')
    print('************************************************************************************')
    print('************************************************************************************')

    print(df_temp.dtypes)
    
    # Creating list of numerical columns
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    col_lis = list(df_temp.select_dtypes(include=numerics).columns)

    # Creating a list of np.max functions for creating dictionary
    max_func = [np.sum for i in range(len(col_lis))]

    # Creating dictionary from both lists
    d = dict(zip(col_lis, max_func))

    df_max_cols = df_temp.groupby([date_col]).agg(d).reset_index()

    print('Shape of dataframe before removing duplicates: ' + str(df_temp.shape))

    print('Shape of dataframe after removing duplicates: ' + str(df_max_cols.shape))
    return df_max_cols

def null_imputation_rolling(df,date_col):
    df = bm.datetime_conversion(df, [date_col])
    df = df.sort_values(date_col)
    # Now we would fill the null values using .interpolate with linear method; it would be somewhat like imputing
    # mean values from previous and next value in columns
    df = df.set_index(date_col)
#     df.interpolate(method='linear', limit_direction='forward', axis=0, inplace=True)
    for i in df.columns:
        if i != date_col:
            df[i+str('_rolling_7')] = df[i].rolling(window=7).mean()
    return df

In [67]:
mfp_exec_cleaned1 = data_prep(mfp_exec, 'Date')
mfp_exec_cleaned2 = date_numeric_groupby(mfp_exec_cleaned1, 'Date')
del mfp_exec_cleaned2['Exercise']
del mfp_exec_cleaned2['Type']

mfp_exec_cleaned3 = null_imputation_rolling(mfp_exec_cleaned2,'Date')


Index(['Date', 'Exercise', 'Type', 'Exercise_Calories', 'Exercise_Minutes',
       'Sets', 'Reps_Per_Set', 'Kilograms', 'Steps', 'Note'],
      dtype='object')
Date(datetime64[ns]):Done
Exercise(string): Done
Type(string): Done
Exercise_Calories(Int64): Done
Exercise_Minutes(Int64): Done
Sets(Int64): Done
Reps_Per_Set(Int64): Done
Kilograms(Int64): Done
Steps(Int64): Done
Note(Int64): Done
************************************************************************************
************************************************************************************
*****************************Datatypes after conversion*****************************
************************************************************************************
************************************************************************************
Date                  object
Exercise             float64
Type                 float64
Exercise_Calories      int64
Exercise_Minutes       int64
Sets                   int64
Reps_Per

In [74]:
mfp_nutrition_cleaned1 = data_prep(mfp_nutrition, 'Date')
mfp_nutrition_cleaned2 = date_numeric_groupby(mfp_nutrition_cleaned1, 'Date')
del mfp_nutrition_cleaned2['Note']
del mfp_nutrition_cleaned2['Meal']

mfp_nutrition_cleaned3 = null_imputation_rolling(mfp_nutrition_cleaned2,'Date')

Index(['Date', 'Meal', 'Calories', 'Fat_g', 'Saturated_Fat',
       'Polyunsaturated_Fat', 'Monounsaturated_Fat', 'Trans_Fat',
       'Cholesterol', 'Sodium_mg', 'Potassium', 'Carbohydrates_g', 'Fiber',
       'Sugar', 'Protein_g', 'Vitamin_A', 'Vitamin_C', 'Calcium', 'Iron',
       'Note'],
      dtype='object')
Date(datetime64[ns]):Done
Meal(string): Done
Calories(Float64): Done
Fat_g(Float64): Done
Saturated_Fat(Float64): Done
Polyunsaturated_Fat(Float64): Done
Monounsaturated_Fat(Float64): Done
Trans_Fat(Float64): Done
Cholesterol(Float64): Done
Sodium_mg(Float64): Done
Potassium(Float64): Done
Carbohydrates_g(Float64): Done
Fiber(Float64): Done
Sugar(Float64): Done
Protein_g(Float64): Done
Vitamin_A(Float64): Done
Vitamin_C(Float64): Done
Calcium(Float64): Done
Iron(Float64): Done
Note(Int64): Done
************************************************************************************
************************************************************************************
***************

In [76]:
mfp_nutrition_cleaned3

Unnamed: 0_level_0,Calories,Fat_g,Saturated_Fat,Polyunsaturated_Fat,Monounsaturated_Fat,Trans_Fat,Cholesterol,Sodium_mg,Potassium,Carbohydrates_g,Fiber,Sugar,Protein_g,Vitamin_A,Vitamin_C,Calcium,Iron,Calories_rolling_7,Fat_g_rolling_7,Saturated_Fat_rolling_7,Polyunsaturated_Fat_rolling_7,Monounsaturated_Fat_rolling_7,Trans_Fat_rolling_7,Cholesterol_rolling_7,Sodium_mg_rolling_7,Potassium_rolling_7,Carbohydrates_g_rolling_7,Fiber_rolling_7,Sugar_rolling_7,Protein_g_rolling_7,Vitamin_A_rolling_7,Vitamin_C_rolling_7,Calcium_rolling_7,Iron_rolling_7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
2017-12-18,1052.9,35.9,11.8,8.6,10.7,0.0,853.2,846.0,723.6,86.8,10.0,25.2,93.6,23.6,0.0,30.2,41.4,,,,,,,,,,,,,,,,,
2017-12-19,1702.2,68.5,17.0,11.2,25.4,0.0,853.2,1113.7,2109.7,166.5,28.7,38.2,109.3,202.6,48.4,42.1,68.0,,,,,,,,,,,,,,,,,
2017-12-20,1855.1,78.4,18.9,12.7,25.7,0.0,853.2,1298.0,2153.8,180.8,33.7,62.6,112.5,204.6,56.4,46.6,72.0,,,,,,,,,,,,,,,,,
2017-12-21,613.0,21.5,9.6,2.9,4.7,0.0,402.0,242.0,458.0,63.7,4.0,35.4,39.6,14.8,0.0,35.6,19.7,,,,,,,,,,,,,,,,,
2017-12-22,941.2,46.2,9.3,5.6,18.1,0.0,295.2,313.2,1476.0,67.1,17.8,23.1,66.9,181.6,80.8,22.2,23.5,,,,,,,,,,,,,,,,,
2018-05-07,909.6,28.3,8.5,5.7,10.0,0.0,494.0,972.2,1241.1,71.1,7.6,30.6,97.0,10.8,1.2,7.6,20.2,,,,,,,,,,,,,,,,,
2018-05-09,667.1,20.5,6.7,3.7,6.3,0.0,457.7,723.2,964.0,44.6,4.6,21.6,78.2,10.8,0.0,5.6,9.8,1105.8714,42.7571,11.6857,7.2,14.4143,0.0,601.2143,786.9,1303.7429,97.2286,15.2,33.8143,85.3,92.6857,26.6857,27.1286,36.3714
2018-06-06,1175.3,52.5,24.0,3.8,5.3,0.0,561.4,3494.4,1048.7,59.0,7.0,11.8,120.2,10.8,1.9,17.6,33.6,1123.3571,45.1286,13.4286,6.5143,13.6429,0.0,559.5286,1165.2429,1350.1857,93.2571,14.7714,31.9,89.1,90.8571,26.9571,25.3286,35.2571
2018-06-18,1922.5,55.2,19.1,6.7,10.7,0.2,552.5,2951.4,2093.5,217.8,29.5,68.2,146.8,639.4,39.4,73.4,53.9,1154.8286,43.2286,13.7286,5.8714,11.5429,0.0286,516.5714,1427.7714,1347.8714,100.5857,14.8857,36.1857,94.4571,153.2571,25.6714,29.8,33.2429
2018-06-19,1991.4,61.6,18.2,10.7,16.6,0.3,576.0,3072.5,2627.5,192.7,28.5,58.2,178.0,663.7,392.4,97.6,79.4,1174.3,40.8286,13.6286,5.5857,10.2429,0.0714,476.9714,1681.2714,1415.5429,102.2857,14.1429,35.5571,103.8143,218.8429,73.6714,37.0857,34.3


In [78]:
print(max(mfp_exec_cleaned3.index))
print(min(mfp_exec_cleaned3.index))

2022-05-31 00:00:00
2020-01-04 00:00:00


In [79]:
print(max(mfp_nutrition_cleaned3.index))
print(min(mfp_nutrition_cleaned3.index))

2022-04-07 00:00:00
2017-12-18 00:00:00


In [82]:
pd.date_range('2017-12-18','2022-05-31'-timedelta(days=1),freq='d')

TypeError: unsupported operand type(s) for -: 'str' and 'datetime.timedelta'

In [84]:
sdate = date(2017,12,18)   # start date
edate = date(2022,5,31)   # end date
date_modified=sdate
list=[sdate] 


while date_modified<edate:
    date_modified+=timedelta(days=1) 
    list.append(date_modified)

print(list) 

[datetime.date(2017, 12, 18), datetime.date(2017, 12, 19), datetime.date(2017, 12, 20), datetime.date(2017, 12, 21), datetime.date(2017, 12, 22), datetime.date(2017, 12, 23), datetime.date(2017, 12, 24), datetime.date(2017, 12, 25), datetime.date(2017, 12, 26), datetime.date(2017, 12, 27), datetime.date(2017, 12, 28), datetime.date(2017, 12, 29), datetime.date(2017, 12, 30), datetime.date(2017, 12, 31), datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), datetime.date(2018, 1, 3), datetime.date(2018, 1, 4), datetime.date(2018, 1, 5), datetime.date(2018, 1, 6), datetime.date(2018, 1, 7), datetime.date(2018, 1, 8), datetime.date(2018, 1, 9), datetime.date(2018, 1, 10), datetime.date(2018, 1, 11), datetime.date(2018, 1, 12), datetime.date(2018, 1, 13), datetime.date(2018, 1, 14), datetime.date(2018, 1, 15), datetime.date(2018, 1, 16), datetime.date(2018, 1, 17), datetime.date(2018, 1, 18), datetime.date(2018, 1, 19), datetime.date(2018, 1, 20), datetime.date(2018, 1, 21), datetime.date(

In [92]:
sdate = date(2017,12,18)   # start date
edate = date(2022,5,31)   # end date

delta = edate - sdate       # as timedelta

# lis = []
for i in range(delta.days + 1):
    day = sdate + timedelta(days=i)
#     lis.append(day)
    print(day)

TypeError: 'Series' object is not callable

In [95]:
from datetime import date, timedelta

sdate = date(2008, 8, 15)   # start date
edate = date(2008, 9, 15)   # end date

delta = edate - sdate       # as timedelta
lis=[]
for i in range(delta.days + 1):
    day = sdate + timedelta(days=i)
    
    lis.append(day)
    print(day)

2008-08-15
2008-08-16
2008-08-17
2008-08-18
2008-08-19
2008-08-20
2008-08-21
2008-08-22
2008-08-23
2008-08-24
2008-08-25
2008-08-26
2008-08-27
2008-08-28
2008-08-29
2008-08-30
2008-08-31
2008-09-01
2008-09-02
2008-09-03
2008-09-04
2008-09-05
2008-09-06
2008-09-07
2008-09-08
2008-09-09
2008-09-10
2008-09-11
2008-09-12
2008-09-13
2008-09-14
2008-09-15


In [102]:
print(day)

2008-09-15
