In [None]:
import pandas as pd
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
import numpy as np
import re
import datetime

from packages.parquet_cached_df import ParquetCachedDF

In [None]:
garmin_df = pd.read_csv("./weight_loss_dfs/Weight.csv")
date_re = re.compile(r"(?: +)?(?P<month>[a-zA-Z]{3}) (?P<day>\d{1,2}), (?P<year>\d{4})")
time_re = re.compile(r"(?P<hour>\d{1,2}):(?P<minute>\d{2}) (?P<am_pm>[AP]M)")

In [None]:
display(garmin_df)

In [None]:
DEBUG = False
pertinent_cols = ["Weight", "BMI", "Body Fat", "Skeletal Muscle Mass", "Bone Mass", "Body Water"]

clean_funcs = {
    "Weight": lambda x: float(x.split()[0]),
    "BMI": lambda x: float(x),
    "Body Fat": lambda x: float(x.split()[0]), 
    "Skeletal Muscle Mass": lambda x: float(x.split()[0]), 
    "Bone Mass": lambda x: float(x.split()[0]), 
    "Body Water": lambda x: float(x.split()[0]), 
}

row_headers = {
    "Weight": "Weight (lb)",
    "BMI": "BMI",
    "Body Fat": "Body Fat (%)",
    "Skeletal Muscle Mass": "Skeletal Muscle Mass (lb)",
    "Bone Mass": "Bone Mass (lb)",
    "Body Water": "Body Water (%)",
}


cleaned = {row_headers[col]: [] for col in pertinent_cols}
cleaned["date"] = []
date = None

def add_row(di, row, date, replace_last=False):
    if DEBUG:
        print("-------------------")
        print("di:", di)
        print("row:", row)
        print("date:", date)
        print("replace_last:", replace_last)
    
    global row_headers, clean_funcs, pertinent_cols
    
    if replace_last:
        
        assert date == di["date"][-1], "invalid date for replace"
        
        for col in pertinent_cols:
            if row[col] == "--":
                pass#di[row_headers[col]] = np.nan
            else:
                di[row_headers[col]][-1] = clean_funcs[col](row[col])
    else:
        cleaned["date"].append(date)

        for col in pertinent_cols:
            if row[col] == "--":
                di[row_headers[col]].append(np.nan)
            else:
                di[row_headers[col]].append(clean_funcs[col](row[col]))

for i, row in garmin_df.iterrows():
    date_match = date_re.match(row["Time"])
    time_match = time_re.match(row["Time"])
    if date_match:
        
        _d = date_match.groupdict()
        
        date = datetime.datetime.strptime(
            f"{_d['year']}-{_d['month']}-{_d['day']}",
            "%Y-%b-%d"
        ).strftime("%Y-%m-%d")
        
    elif time_match:
        assert date, "no date!?"
        if cleaned["date"]: 
            if date == cleaned["date"][-1]:
                add_row(di=cleaned, row=row, date=date, replace_last=True)
            else:
                add_row(di=cleaned, row=row, date=date, replace_last=False)
        else:
            add_row(di=cleaned, row=row, date=date, replace_last=False)
    else:
        raise ValueError(f"row after {date} doesn't match!")
 

In [None]:
df_garmin_cleaned = pd.DataFrame(cleaned).set_index("date")

In [None]:
df_path = "./weight_loss_dfs/jordan_df_add_garmin.pqt"
df_manager = ParquetCachedDF(file_path=df_path)

In [None]:
df_personal = df_manager.get_df()
display(df_personal)

In [None]:
print(df_personal.shape)
print(df_garmin_cleaned.shape)

In [None]:
df_merged = df_personal.merge(df_garmin_cleaned, how="outer", suffixes=(" MINE", " GARMIN"), left_index=True, right_index=True)
df_merged.columns

In [None]:
mine_re = re.compile(r"(?P<col>.*) MINE$")
garmin_re = re.compile(r".*GARMIN$")
keep_cols = []
cols = df_merged.columns
print(cols)
for col in cols:
    if mine_re.match(col):
        column_header = mine_re.match(col).groupdict()["col"]
        print(column_header)
        df_merged[column_header] = df_merged[f"{column_header} GARMIN"]
        keep_cols.append(column_header)
    elif garmin_re.match(col):
        print("garmin")
        pass
    else:
        keep_cols.append(col)
print(keep_cols)
#
#df_merged[
#    (df_merged["Weight (lb) MINE"] != df_merged["Weight (lb) GARMIN"])
#    | (df_merged["Body Fat (%) MINE"] != df_merged["Body Fat (%) GARMIN"])
#    | (df_merged["BMI MINE"] != df_merged["BMI GARMIN"])
#    | (df_merged["Skeletal Muscle Mass (lb) MINE"] != df_merged["Skeletal Muscle Mass (lb) GARMIN"])
#    | (df_merged["Bone Mass (lb) MINE"] != df_merged["Bone Mass (lb) GARMIN"])
#    | (df_merged["Body Water (%) MINE"] != df_merged["Body Water (%) GARMIN"])
#    
#][[
#    "Weight (lb) MINE", "BMI MINE", "Body Fat (%) MINE", "Skeletal Muscle Mass (lb) MINE", "Bone Mass (lb) MINE","Body Water (%) MINE",
#    "Weight (lb) GARMIN", "BMI GARMIN", "Body Fat (%) GARMIN","Skeletal Muscle Mass (lb) GARMIN", "Bone Mass (lb) GARMIN","Body Water (%) GARMIN",
#]]

In [None]:
display(df_merged[keep_cols])

In [None]:
df = df_merged[
    [
        'Weight (lb)',
        'Body Fat (%)',
        'BMI',
        'Skeletal Muscle Mass (lb)',
        'Bone Mass (lb)',
        'Body Water (%)',
        'Waist (in)',
        'Belly (in)',
        'Hips (in)',
        'Chest (in)',
        'Bicep (in)',
        'Thigh (in)',
        'Calf (in)',
        'Target Calories (kcal)',
        'Consumed Calories (kcal)',
        'Active Calories (kcal)',
        'Resting Heart Rate (bpm)',
        'Workout',
        'Cardio',
        'Stretch',
        'Meditate',
        'Mile Time (min)',
        'Mode'
    ] 
]

In [None]:
df = df.sort_index(ascending=False)
df["Mode"] = "Cutting"

In [None]:
df_path = "./weight_loss_dfs/jordan_df.pqt"
df_manager.set_df(df)
df_manager.save_df(file_path=df_path)
