# EDA for Weight Forecaster MVP

**NOTES**

Workflow: use Altair and Panel in a jupyter notebook to develop the app (what info to display, charts, layout, etc.). Then at a later stage, hopefully it would be relatively straightforward to manually migrate it over to Streamlit (https://www.streamlit.io/) for a standalone app (although, perhaps first see how feasible that would be with Panel).

## Setup

In [1]:
import sys
toolpath = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/weightforecaster/server/src'
sys.path.append(toolpath)

%load_ext autoreload
%autoreload 2

from wtfc_utils import etl_utils as etl

import datetime
from sqlalchemy import create_engine

import altair as alt
import panel as pn
pn.extension('vega')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.interpolate import interp1d


## Load Data

In [36]:
server_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/weightforecaster/server/'
db_dir = server_dir + 'db/'
db_name = 'weightforecaster'
db_ext = '.db'
db_file_name = db_dir + db_name + db_ext

In [37]:
# See: https://pandas.pydata.org/pandas-docs/stable/io.html#advanced-sqlalchemy-queries
engine = create_engine('sqlite:///'+db_file_name)

In [38]:
with engine.connect() as conn, conn.begin():
    db_df = pd.read_sql_table('fitness', conn, index_col='date', parse_dates=['date'])

In [39]:
db_df.tail(10)

Unnamed: 0_level_0,weight,calories,steps,weight_imputed,w_7day_avg,c_7day_avg,s_7day_avg,w_7day_avg_last_week,c_7day_avg_last_week,s_7day_avg_last_week,w_7day_avg_weekly_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-07-01,158.8,1957.0,11489.0,0.0,158.342857,2110.285714,13586.571429,158.885714,2212.857143,16943.285714,-0.542857
2021-07-02,156.5,3267.0,20434.0,0.0,157.857143,2263.714286,14845.0,159.4,2250.714286,16607.714286,-1.542857
2021-07-03,158.1,2559.0,23734.0,0.0,157.5,2288.0,16686.428571,159.528571,2266.714286,16490.857143,-2.028571
2021-07-04,159.3,2289.0,16903.0,0.0,157.842857,2313.142857,17260.714286,159.385714,2221.714286,16043.285714,-1.542857
2021-07-05,157.5,2108.0,16527.0,0.0,158.071429,2280.857143,16800.857143,158.628571,2074.285714,16350.714286,-0.557143
2021-07-06,155.7,1881.0,17462.0,0.0,157.8,2288.285714,17242.142857,158.214286,2106.714286,15129.571429,-0.414286
2021-07-07,157.7,1719.0,12459.0,0.0,157.657143,2254.285714,17001.142857,158.171429,2115.0,14462.571429,-0.514286
2021-07-08,155.4,2001.0,18929.0,0.0,157.171429,2260.571429,18064.0,158.342857,2110.285714,13586.571429,-1.171429
2021-07-09,154.9,2006.0,17747.0,0.0,156.942857,2080.428571,17680.142857,157.857143,2263.714286,14845.0,-0.914286
2021-07-10,158.6,1760.0,9653.0,0.0,157.014286,1966.285714,15668.571429,157.5,2288.0,16686.428571,-0.485714


## Model Functions

In [6]:
def weight_forecast_i(model_coefs, w_prev, c_i, s_i):
    [c_w, c_c, c_s, c_0] = model_coefs
    return c_0 + c_w*w_prev + c_c*c_i + c_s*s_i


def get_date_init(date_init):
    d_init = date_init
    if date_init==None:
        d_init = datetime.date.today()
    if type(date_init)==str:
        d_init = datetime.datetime.strptime(date_init, '%Y-%m-%d').date()
    return d_init

def weight_forecast(model_coefs, w_prev_init, calories, steps, num_weeks, date_init=None):
    d = get_date_init(date_init)
    w = w_prev_init
    weeks = list(range(1, num_weeks+1))
    dates = [d]    
    wgt = [w]
    for week in weeks:
        d = d + datetime.timedelta(days=7)
        w = weight_forecast_i(model_coefs, w, calories, steps)
        dates.append(d)
        wgt.append(w)
    weeks = [0] + weeks
    return [weeks, dates, wgt]

def get_forecast_data(model_coefs, wgt_init=None, cals=None, steps=None, max_num_weeks=26, dw=1, dc=100, ds=1000, date_init=None):
    d_init = get_date_init(date_init)
    wgt_init_list = [wgt_init]
    cals_list = [cals]
    steps_list = [steps]
    if wgt_init==None:
        wgt_init_list = np.arange(149, 181, dw)
    if cals==None:
        cals_list = np.arange(1000, 2600, dc)
    if steps==None:
        steps_list = np.arange(1000, 20000, ds)

    forecast_data = []
    for wgt_init in wgt_init_list:
        for cal in cals_list:
            for steps in steps_list:
                [weeks, dates, wgt] = weight_forecast(model_coefs, wgt_init, cal, steps, max_num_weeks, date_init=d_init)
                fc_df = pd.DataFrame({'week':weeks, 'date':dates, 'weight':wgt})
                fc_df['weight_init'] = wgt_init
                fc_df['calories'] = cal
                fc_df['steps'] = steps
                forecast_data.append(fc_df)
    forecast_data = pd.concat(forecast_data, ignore_index=True)
    return forecast_data

def solve_for_weeks(model_coefs, weight_init, weight_target, steps_target, calories_target, date_init=None):
    d_init = get_date_init(date_init)
    forecast_data = get_forecast_data(model_coefs, wgt_init=weight_init, cals=calories_target, steps=steps_target, 
                                      max_num_weeks=52, date_init=d_init)
    forecast_data.sort_values('date', inplace=True)
    forecast_data['weight_prev'] = forecast_data.weight.shift(1)
    forecast_data['dw'] = forecast_data.weight - forecast_data.weight_prev
    if min(forecast_data.weight)>weight_target:
        dw = -np.round(forecast_data.dw.mean(),2)
        return ['-', None, dw, steps_target, calories_target]
    forecast_data = forecast_data[forecast_data.weight>(weight_target-5)].copy()
    dw = -np.round(forecast_data.dw.mean(),2)  
    interp = interp1d(forecast_data.weight, forecast_data.week)
    weeks = interp(weight_target)
    date = d_init + datetime.timedelta(days=(round(weeks*7)))
    weeks = np.round(weeks, 2)
    return [weeks, date, dw, steps_target, calories_target]

def solve_for_calories(model_coefs, weight_init, weight_target, steps_target, date_target, date_init=None):
    d_init = get_date_init(date_init)
    d_target = date_target
    if type(d_target)==str:
        d_target = datetime.datetime.strptime(d_target, '%Y-%m-%d').date()
    w_target = np.round((d_target - d_init).days/7.0,1)
    forecast_data = get_forecast_data(model_coefs, wgt_init=weight_init, steps=steps_target, dc=1, max_num_weeks=52, date_init=d_init)
    forecast_data = forecast_data[
        (forecast_data.week<(w_target+1)) 
        & (forecast_data.week>(w_target-1))
        & (forecast_data.weight<(weight_target+1))
        & (forecast_data.weight>(weight_target-1))].copy()
    candidate_cal = list(forecast_data.calories.unique())
    candidate_date = [solve_for_weeks(model_coefs, weight_init, weight_target, steps_target, c, date_init=date_init)[1] for c in candidate_cal]
    temp_df = pd.DataFrame({'date':candidate_date, 'cal':candidate_cal})
    temp_df = temp_df[temp_df.date==d_target].copy().sort_values('cal', ascending=False)
    calories_target = temp_df.iloc[0].cal
    return solve_for_weeks(model_coefs, weight_init, weight_target, steps_target, calories_target, date_init=date_init)

def solve_for_equil_calories(model_coefs, weight_target, steps_target, date_init=None):
    d_init = get_date_init(date_init)
    cals = np.arange(1000, 3000, 5)
    dw = [weight_target-weight_forecast(model_coefs, weight_target, c, steps_target, 1, date_init=d_init)[-1][-1] for c in cals]
    cal_equil = int(interp1d(dw, cals)(0))
    return cal_equil

In [7]:
foo = solve_for_weeks(model_coefs, 160, 155, 15000, 1650)
foo

NameError: name 'model_coefs' is not defined

In [8]:
solve_for_equil_calories(model_coefs, 150, 10000)

NameError: name 'model_coefs' is not defined

## Exploring Altair & Panel

In [9]:
headers = [' ', 'week', 'today', 'target', 'deviation']
metric_polarity = [1, 1, 1, 1, -1, 1, -1]

def metric_markdown(index, metric, col, align='center', font_size=30, color='#333', top="auto"):
    c = color
    fs = font_size
    if col=='target':
        c = '#999'
    if metric in headers:
        c = '#bbb'
        fs = 20
    if col=='deviation' and not type(metric)==str:
        if index==1:
            c = '#999'
        elif (metric_polarity[index]*metric) <= 0:
            c = 'green'
        else:
            c = 'red'
    return pn.pane.Markdown(str(metric), align=align, height=18, 
                           style={
                               'font-family': "Tahoma",
                               'font-size': str(fs)+"px",
                               'color': c,
                               'position':'absolute',
                               'top': top
                           })
    
def metric_column(metric_list, align='center', color='#333', font_size=30, top="auto"):
    spacer = pn.Spacer(background=None, height=10)
    markdown = [metric_markdown(i, x, metric_list[0], align=align, color=color, font_size=font_size, top=top) for i,x in enumerate(metric_list)]
    markdown = [*sum(zip(markdown, [pn.Spacer(background=None, height=10),spacer,spacer,spacer,spacer,spacer,None]),())]
    return pn.Column(*markdown)

In [10]:
def get_target(db_df, target_w, target_d=None, target_s=None, target_c=None, steps_source='today', date_init=None):
    assert(not(target_d==None) and (target_c==None))
    data = db_df.iloc[-1]
    w_init = np.round(data.w_7day_avg, 1)
    s_t = target_s
    if s_t==None:
        if steps_source=='today':
            s_t = int(data.steps)
        else:
            s_t = int(db_df.iloc[-2].s_7day_avg)
    if target_c==None:
        [weeks_t, date_t, dw_t, steps_t, calories_t] = solve_for_calories(model_coefs, w_init, target_w, s_t, target_d, date_init=date_init)    
    else:
        [weeks_t, date_t, dw_t, steps_t, calories_t] = solve_for_weeks(model_coefs, w_init, target_w, s_t, target_c, date_init=date_init)
        
    return [target_w, date_t.strftime(date_format), weeks_t, dw_t, calories_t, steps_t]

def get_metrics_today(db_df, target_w, date_init=None):
    data = db_df.iloc[-1]
    w_init = np.round(data.w_7day_avg, 1)
    today_weight = np.round(data.weight,1)
    today_calories = int(data.calories)
    today_steps = int(data.steps)
    [weeks_t, date_t, dw_t, steps_t, calories_t] = solve_for_weeks(model_coefs, w_init, target_w, today_steps, today_calories, date_init=date_init)
    if date_t==None:
        date_t = '-'
    else:
        date_t = date_t.strftime(date_format)
        
    return [today_weight, date_t, weeks_t, dw_t, today_calories, today_steps]

def get_metrics_week(db_df, target_w, date_init=None):
    data = db_df.iloc[-2]
    week_weight = np.round(db_df.iloc[-1].w_7day_avg,1)
    week_calories = int(data.c_7day_avg)
    week_steps = int(data.s_7day_avg)    
    [weeks_t, date_t, dw_t, steps_t, calories_t] = solve_for_weeks(model_coefs, week_weight, target_w, week_steps, week_calories, date_init=date_init)
    if date_t==None:
        date_t = '-'
    else:
        date_t = date_t.strftime(date_format)
    
    return [week_weight, date_t, weeks_t, dw_t, week_calories, week_steps]

# def get_metrics_remaining(db_df):
#     data = db_df.iloc[-1]
#     weeks = '—'
#     date = '—'
#     w = np.round(data['weight']-target_w, 1)
#     c = -int(target_c - data['calories'])
#     s = -int(target_s - data['steps'])
#     return ['deviation', w, '2020-07-01', weeks, '-1.2', c, s]

In [132]:
# See: https://pandas.pydata.org/pandas-docs/stable/io.html#advanced-sqlalchemy-queries
engine = create_engine('sqlite:///'+db_file_name)

with engine.connect() as conn, conn.begin():
    db_df = pd.read_sql_table('fitness', conn, index_col='date', parse_dates=['date'])

# ref_date = '2020-05-16'
ref_date = datetime.date.today().strftime('%Y-%m-%d')
db_df = db_df[db_df.index<=ref_date]

#============================================================================    [c_w, c_c, c_s, c_0]

model_coefs = [0.9842664081035283, # c_w
               0.001965638199353011, # c_c
               -4.621900527451458e-05, # c_s
               -1.2110620297640367] # c_0

target_w = 155
target_c = 1800
target_d = '2021-8-31'
target_s = 10000
date_format = '%b %d, %y'

# target_w = 160
# target_c = 1800
# target_d = '2020-6-30'
# target_s = 12000
# date_format = '%b %d, %y'


today_target_metrics = ['target']+get_target(db_df, target_w, target_d=target_d, target_s=target_s, date_init=ref_date)
today_metrics = ['today']+get_metrics_today(db_df, target_w, date_init=ref_date)
if today_metrics[2]=='-':
    d='-'
else:
    d=(datetime.datetime.strptime(today_metrics[2], date_format) - datetime.datetime.strptime(today_target_metrics[2], date_format)).days
if today_metrics[3]=='-':
    w='-'
else:
    w=round(today_metrics[3]-today_target_metrics[3],1)
today_deviation = ['deviation', 
 round(today_metrics[1]-today_target_metrics[1],1), 
 d, 
 w, 
 round(today_metrics[4]-today_target_metrics[4],2), 
 round(today_metrics[5]-today_target_metrics[5],1), 
 round(today_metrics[6]-today_target_metrics[6],1)
]

week_target_metrics = ['target']+get_target(db_df, target_w, target_d=target_d, steps_source='week', date_init=ref_date)
week_metrics = ['week']+get_metrics_week(db_df, target_w, date_init=ref_date)
if week_metrics[2]=='-':
    d='-'
else:
    d=(datetime.datetime.strptime(week_metrics[2], date_format) - datetime.datetime.strptime(week_target_metrics[2], date_format)).days
if week_metrics[3]=='-':
    w='-'
else:
    w=round(week_metrics[3]-week_target_metrics[3],1)
week_deviation = ['deviation', 
 round(week_metrics[1]-week_target_metrics[1],1), 
 d, 
 w, 
 round(week_metrics[4]-week_target_metrics[4],2), 
 round(week_metrics[5]-week_target_metrics[5],1), 
 round(week_metrics[6]-week_target_metrics[6],1)
]

In [133]:
solve_for_equil_calories(model_coefs, 150, 12000)

2098

In [134]:
db_df.tail(8)

Unnamed: 0_level_0,weight,calories,steps,weight_imputed,weight_measured,w_7day_avg,c_7day_avg,s_7day_avg,w_7day_avg_last_week,c_7day_avg_last_week,s_7day_avg_last_week,w_7day_avg_weekly_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-07-06,155.7,1881.0,17462.0,0.0,155.7,157.8,2288.285714,17242.142857,158.214286,2106.714286,15129.571429,-0.414286
2021-07-07,157.7,1719.0,12459.0,0.0,157.7,157.657143,2254.285714,17001.142857,158.171429,2115.0,14462.571429,-0.514286
2021-07-08,155.4,2001.0,18929.0,0.0,155.4,157.171429,2260.571429,18064.0,158.342857,2110.285714,13586.571429,-1.171429
2021-07-09,154.9,2006.0,17747.0,0.0,154.9,156.942857,2080.428571,17680.142857,157.857143,2263.714286,14845.0,-0.914286
2021-07-10,158.6,2848.0,14936.0,0.0,158.6,157.014286,2121.714286,16423.285714,157.5,2288.0,16686.428571,-0.485714
2021-07-11,159.8,2352.0,10991.0,0.0,159.8,157.085714,2130.714286,15578.714286,157.842857,2313.142857,17260.714286,-0.757143
2021-07-12,157.2,2036.0,15940.0,0.0,157.2,157.042857,2120.428571,15494.857143,158.071429,2280.857143,16800.857143,-1.028571
2021-07-13,158.7,459.0,1038.0,0.0,158.7,157.471429,1917.285714,13148.571429,157.8,2288.285714,17242.142857,-0.328571


In [135]:
pn.Row(
    metric_column([' ', 'weight', 'date', 'weeks', 'rate', 'calories', 'steps'], align='end', color='#bbb', font_size=18, top="7px"), 
    pn.Spacer(background=None, width=20), 
    metric_column(week_metrics), 
    pn.Spacer(background=None, width=15), 
    metric_column(week_deviation),
    pn.Spacer(background=None, width=15), 
    metric_column(week_target_metrics),

    pn.Spacer(background='lightgray', width=10), 

    metric_column(today_metrics), 
    pn.Spacer(background=None, width=15), 
    metric_column(today_deviation), 
    pn.Spacer(background=None, width=15), 
    metric_column(today_target_metrics)
)

In [136]:
dr = db_df[db_df.index >= "2015-10-01"].copy()
dr['date'] = dr.index
xs = ('2019-12-1', '2021-08-31')
# xs = ('2019-12-1', '2020-07-31')
ys = (150, 183)
width = 900
height = 450

# target_df = pd.DataFrame({'date':[week_metrics[2]], 'w_target':target_w}) 
# target_df.date = pd.to_datetime(target_df.date)

latest_d = dr.date.iloc[-2]
latest_w = dr.w_7day_avg.iloc[-1]
latest_c = dr.c_7day_avg.iloc[-2]
latest_s = dr.s_7day_avg.iloc[-2]
forecast = weight_forecast(model_coefs, latest_w, latest_c, latest_s, num_weeks=25, date_init=latest_d)
f_df = pd.DataFrame({'date':forecast[1], 'weight_forecast':forecast[2]})

w=alt.Chart(dr).mark_circle(size=60, opacity=0.3, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs)
    ),  
    y=alt.Y('weight', title='weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight', 'weight_measured', 'calories', 'steps']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(dr).mark_line(opacity=0.7, color='black').encode(
    x=alt.X('date'),   
    y=alt.Y('w_7day_avg',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_7day_avg']
).properties(
    width=width,
    height=height
).interactive()

w_forecast=alt.Chart(f_df).mark_line(opacity=0.5, color='black', strokeDash=[3,2], strokeWidth=3).encode(
    x='date',   
    y=alt.Y('weight_forecast',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight_forecast']
).properties(
    width=width,
    height=height
).interactive()

# wt=alt.Chart(target_df).mark_circle(size=100, opacity=0.75, color='red').encode(
#     x='date',   
#     y=alt.Y('w_target',
#         scale=alt.Scale(domain=ys)
#     ),      
#     tooltip=['date', 'w_target']
# ).properties(
#     width=width,
#     height=height
# ).interactive()

w + w_7d_avg + w_forecast #+ wt

In [137]:
dr = db_df[db_df.index >= "2015-10-01"].copy()
dr['date'] = dr.index
xs = ('2015-9-01', '2021-8-31')
ys = (153, 183)
width = 900
height = 450

# target_df = pd.DataFrame({'date':[week_metrics[2]], 'w_target':target_w}) 
# target_df.date = pd.to_datetime(target_df.date)

latest_d = dr.date.iloc[-2]
latest_w = dr.w_7day_avg.iloc[-1]
latest_c = dr.c_7day_avg.iloc[-2]
latest_s = dr.s_7day_avg.iloc[-2]
forecast = weight_forecast(model_coefs, latest_w, latest_c, latest_s, num_weeks=17, date_init=latest_d)
f_df = pd.DataFrame({'date':forecast[1], 'weight_forecast':forecast[2]})

w=alt.Chart(dr).mark_circle(size=60, opacity=0.3, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs)
    ),  
    y=alt.Y('weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight', 'calories', 'steps']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(dr).mark_line(opacity=0.7, color='black').encode(
    x='date',   
    y=alt.Y('w_7day_avg',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_7day_avg']
).properties(
    width=width,
    height=height
).interactive()

w_forecast=alt.Chart(f_df).mark_line(opacity=0.5, color='black', strokeDash=[3,2], strokeWidth=3).encode(
    x='date',   
    y=alt.Y('weight_forecast',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight_forecast']
).properties(
    width=width,
    height=height
).interactive()

# wt=alt.Chart(target_df).mark_circle(size=100, opacity=0.75, color='red').encode(
#     x='date',   
#     y=alt.Y('w_target',
#         scale=alt.Scale(domain=ys)
#     ),      
#     tooltip=['date', 'w_target']
# ).properties(
#     width=width,
#     height=height
# ).interactive()

w + w_7d_avg + w_forecast #+ wt

## Calorie dimensional refactoring

In [77]:
model_coefs = [0.9842664081035283, # c_w
               0.001965638199353011, # c_c
               -4.621900527451458e-05, # c_s
               -1.2110620297640367] # c_0
[c_w, c_c, c_s, c_0] = model_coefs

In [78]:
weekly_pounds_per_calorie = c_c
cals_per_step = -c_s/c_c

def resting_cals(weight):
    c0p = -c_0/c_c
    cwp = (1-c_w)/c_c
    return c0p + cwp*weight

In [79]:
# about 500 calories under/over equilibrium to lose/gain 1 lb
weekly_pounds_per_calorie*500

0.9828190996765056

In [80]:
# 10K steps expends 235 calories, according to this model...
# intuitively this feels like an underestimate, likely because running/walking/misc are not treated separately
cals_per_step*10000

235.13485487679037

In [83]:
# at 160lbs, my resting calorie expenditure is around 1900 calories, according to this model
resting_cals(160)

1896.8072224210566

In [84]:
cals_per_step*10000 + resting_cals(160) - 500

1631.942077297847

In [85]:
cals_per_step*20

0.47026970975358073

In [86]:
weekly_pounds_per_calorie

0.001965638199353011

In [87]:
cals_per_step

0.023513485487679037

In [88]:
-c_0/c_c

616.1164502005797

In [89]:
(1-c_w)/c_c

8.004317326377981