# Making figures for slidedeck on WF v1

## Setup

In [336]:
import os
import datetime
import json
from sqlalchemy import create_engine

import altair as alt
# import panel as pn
# pn.extension('vega')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md

from scipy.interpolate import interp1d
from sklearn.linear_model import LinearRegression

pd.set_option('display.max_rows', 500)

In [102]:
data_dir = os.path.join('/','Users','jamieinfinity','Dropbox','Projects','WeightForecaster','_data_dump')
filename_weight = os.path.join(data_dir, 'weight', 'weight_full.csv')
filename_activities = os.path.join(data_dir, 'fitbit', 'activities', 'activities.csv')
filename_daily_summary = os.path.join(data_dir, 'fitbit', 'activities', 'daily_summary.csv')
filename_daily_activity_steps_cals = os.path.join(data_dir, 'fitbit', 'activities', 'daily_activity_steps_cals.csv')
filename_food = os.path.join(data_dir, 'food', 'food_tot.csv')
filename_steps_daily = os.path.join(data_dir, 'fitbit', 'steps_daily.csv')
steps_detail_dir = os.path.join(data_dir, 'fitbit', 'detailed_steps')
hr_detail_dir = os.path.join(data_dir, 'fitbit', 'detailed_heart_rate')

## Load database

In [657]:
server_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/weightforecaster/server/'
db_dir = server_dir + 'db/'
db_name = 'weightforecaster'
db_ext = '.db'
db_file_name = db_dir + db_name + db_ext

In [658]:
# See: https://pandas.pydata.org/pandas-docs/stable/io.html#advanced-sqlalchemy-queries
engine = create_engine('sqlite:///'+db_file_name)

In [659]:
with engine.connect() as conn, conn.begin():
    data_df = pd.read_sql_table('fitness', conn, index_col='date', parse_dates=['date'])
    
temp_df = pd.DataFrame(index=pd.date_range(start="2015-09-16",end=data_df.index.max()))
data_df = pd.merge(temp_df, data_df, how='left', left_index=True, right_index=True)
# data_df = data_df[(data_df.w_7day_avg_weekly_diff>-5)&()].copy() # drop outliers due to imputation effects
data_df.dropna(inplace=True)
data_df['date'] = data_df.index
data_df.rename({
    'w_7day_avg':'w',
    'c_7day_avg':'c',
    's_7day_avg':'s',
    'w_7day_avg_last_week':'w_prev',
    'c_7day_avg_last_week':'c_prev',
    's_7day_avg_last_week':'s_prev',
    'w_7day_avg_weekly_diff':'dw',
}, axis=1, inplace=True)

In [660]:
data_df['weight_shifted'] = data_df.weight.shift(-1)
data_df['delta_weight_shifted'] = data_df.weight_shifted - data_df.weight
data_df['w_s'] = data_df.w.shift(-1)
data_df['w_prev_s'] = data_df.w_prev.shift(-1)
data_df['dw_s'] = data_df.dw.shift(-1)

data_df.drop(data_df.index[-1], inplace=True)

In [661]:
data_df.tail(5)

Unnamed: 0,weight,calories,steps,weight_imputed,w,c,s,w_prev,c_prev,s_prev,dw,date,weight_shifted,delta_weight_shifted,w_s,w_prev_s,dw_s
2021-01-23,153.6,2128.0,13650.0,0.0,152.785714,1910.428571,15802.714286,156.014286,2305.0,13137.428571,-3.228571,2021-01-23,152.7,-0.9,152.728571,155.714286,-2.985714
2021-01-24,152.7,2130.0,11859.0,0.0,152.728571,2017.428571,15560.714286,155.714286,2198.571429,13487.714286,-2.985714,2021-01-24,152.4,-0.3,152.814286,155.257143,-2.442857
2021-01-25,152.4,2265.0,11149.0,0.0,152.814286,2053.571429,15047.285714,155.257143,2167.714286,14029.714286,-2.442857,2021-01-25,153.8,1.4,152.985714,154.8,-1.814286
2021-01-26,153.8,2143.0,11689.0,0.0,152.985714,2121.714286,13668.285714,154.8,2065.714286,14642.142857,-1.814286,2021-01-26,153.8,0.0,153.157143,154.4,-1.242857
2021-01-27,153.8,2297.0,11362.0,0.0,153.157143,2165.714286,12886.857143,154.4,2002.0,15322.428571,-1.242857,2021-01-27,153.4,-0.4,153.228571,153.857143,-0.628571


## Load raw data

### Weight

In [587]:
wgt_raw = pd.read_csv(filename_weight)
wgt_df = wgt_raw[['date', 'weight']].copy()
wgt_df['date'] = pd.to_datetime(wgt_df['date'])
wgt_df.set_index('date', drop=True, inplace=True)

period = '7D'
min_periods = 1
wgt_df['wgt_roll7d'] = wgt_df.weight.rolling(period, min_periods=min_periods).mean()
wgt_df.tail()

Unnamed: 0_level_0,weight,wgt_roll7d
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-20,152.6,154.4
2021-01-21,152.9,153.857143
2021-01-22,153.0,153.271429
2021-01-23,153.6,152.8
2021-01-24,152.7,152.742857


### Steps

In [588]:
steps_raw = pd.read_csv(filename_steps_daily)
steps_raw.tail()
steps_df = steps_raw[['date', 'steps']].copy()
steps_df['date'] = pd.to_datetime(steps_df['date'])
steps_df.set_index('date', drop=True, inplace=True)

period = '7D'
min_periods = 1
steps_df['steps_roll7d'] = steps_df.steps.rolling(period, min_periods=min_periods).mean()
steps_df.head()

Unnamed: 0_level_0,steps,steps_roll7d
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,,
2013-01-02,,
2013-01-03,,
2013-01-04,,
2013-01-05,,


### Food

In [589]:
food_raw = pd.read_csv(filename_food)
food_df = food_raw[['date', 'calories']].copy()
food_df['date'] = pd.to_datetime(food_df['date'])
food_df.set_index('date', drop=True, inplace=True)

period = '7D'
min_periods = 1
food_df['calories_roll7d'] = food_df.calories.rolling(period, min_periods=min_periods).mean()
food_df.tail()

Unnamed: 0_level_0,calories,calories_roll7d
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-20,1989.0,2002.0
2021-01-21,2192.0,1913.0
2021-01-22,2005.0,1878.285714
2021-01-23,2128.0,1910.428571
2021-01-24,2130.0,2017.428571


### Combine

In [590]:
date_min = wgt_df.index.min()
date_max = wgt_df.index.max()
total_days = (date_max - date_min).days
dates=[]
for i in range(total_days):
    dates.append(date_min + pd.Timedelta(i, 'd'))
alldata_df = pd.DataFrame({'date':dates})
alldata_df.set_index('date', drop=True, inplace=True)

In [591]:
alldata_df = pd.merge(alldata_df, wgt_df, how='left', left_index=True, right_index=True)
alldata_df = pd.merge(alldata_df, steps_df, how='left', left_index=True, right_index=True)
alldata_df = pd.merge(alldata_df, food_df, how='left', left_index=True, right_index=True)
alldata_df['date'] = alldata_df.index

In [592]:
alldata_df.head()

Unnamed: 0_level_0,weight,wgt_roll7d,steps,steps_roll7d,calories,calories_roll7d,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-01-01,177.5,177.5,,,,,2013-01-01
2013-01-02,176.5,177.0,,,,,2013-01-02
2013-01-03,177.0,177.0,,,,,2013-01-03
2013-01-04,177.0,177.0,,,,,2013-01-04
2013-01-05,176.5,176.9,,,,,2013-01-05


In [593]:
len(alldata_df)

2945

In [594]:
alldata_df.weight.count()/len(alldata_df)

0.9446519524617997

In [595]:
alldata_df.steps.count()/len(alldata_df)

0.9894736842105263

In [596]:
alldata_df[alldata_df.date>'2015-09-20'].calories.count()/len(alldata_df[alldata_df.date>'2015-09-20'])

0.9892418032786885

## Weight History

In [68]:
xs = ('2013-01-01', '2020-1-1')
ys = (153, 186)
width = 900
height = 450

w=alt.Chart(alldata_df).mark_circle(size=60, opacity=0.15, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs)
    ),  
    y=alt.Y('weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(alldata_df).mark_line(opacity=0.7, color='black').encode(
    x='date',   
    y=alt.Y('wgt_roll7d',
            scale=alt.Scale(domain=ys),
            title='weight'
    ),      
    tooltip=['date', 'wgt_roll7d']
).properties(
    width=width,
    height=height
).interactive()

(w + w_7d_avg).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

## History of Weight, Steps, Calories

In [597]:
# xs = ('2013-01-01', '2020-1-1')
xs = ('2015-09-21', '2021-1-1')

ws = (153, 186)
width = 700
height = 200

w=alt.Chart(alldata_df).mark_circle(size=60, opacity=0.15, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs), 
        axis = alt.Axis(format = ("%b `%y")),
        title=None
    ),  
    y=alt.Y('weight',
        scale=alt.Scale(domain=ws)
    ),      
    tooltip=['date', 'weight']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(alldata_df).mark_line(opacity=0.7, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
        axis = alt.Axis(format = ("%b `%y")),
        title=None
    ),  
    y=alt.Y('wgt_roll7d',
            scale=alt.Scale(domain=ws),
            title='weight'
    ),      
    tooltip=['date', 'wgt_roll7d']
).properties(
    width=width,
    height=height
).interactive()



s=alt.Chart(alldata_df).mark_circle(size=60, opacity=0.15, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
        axis = alt.Axis(format = ("%b `%y")),
        title=None
    ), 
    y=alt.Y('steps',
        scale=alt.Scale()
    ),      
    tooltip=['date', 'steps']
).properties(
    width=width,
    height=height
).interactive()

s_7d_avg=alt.Chart(alldata_df).mark_line(opacity=0.7, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
        axis = alt.Axis(format = ("%b `%y")),
        title=None
    ),   
    y=alt.Y('steps_roll7d',
            scale=alt.Scale(),
            title='steps'
    ),      
    tooltip=['date', 'steps_roll7d']
).properties(
    width=width,
    height=height
).interactive()



c=alt.Chart(alldata_df).mark_circle(size=60, opacity=0.15, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
        axis = alt.Axis(format = ("%b `%y")),
    ), 
    y=alt.Y('calories',
        scale=alt.Scale()
    ),      
    tooltip=['date', 'calories']
).properties(
    width=width,
    height=height
).interactive()

c_7d_avg=alt.Chart(alldata_df).mark_line(opacity=0.7, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
        axis = alt.Axis(format = ("%b `%y")),
    ),   
    y=alt.Y('calories_roll7d',
            scale=alt.Scale(),
            title='calories'
    ),      
    tooltip=['date', 'calories_roll7d']
).properties(
    width=width,
    height=height
).interactive()



alt.vconcat(
    (w + w_7d_avg)
    ,
    (s + s_7d_avg)
    ,
    (c + c_7d_avg)
).resolve_scale(
    x='shared'
).configure_axis(
        labelFontSize=16,
        titleFontSize=16
    )

## Extracting calories expended from Fitbit data

In [114]:
act_df = pd.read_csv(filename_activities)
ds_df = pd.read_csv(filename_daily_summary)
da_df = pd.read_csv(filename_daily_activity_steps_cals)

In [172]:
ds_df.head()

Unnamed: 0,date,steps,floors,heart_rate_resting,calories_out,calories_active,calories_bmr,minutes_sedentary,minutes_lightly_active,minutes_fairly_active,minutes_very_active
0,2015-01-17,11747,3,67,2959,1502,1691,791,205,41,37
1,2015-01-18,13248,3,61,2904,1467,1691,770,178,12,70
2,2015-01-19,4242,3,61,2227,655,1691,791,165,0,0
3,2015-01-20,7426,4,60,2702,1125,1692,992,263,0,0
4,2015-01-21,9122,14,62,2731,1255,1692,660,293,0,0


In [136]:
alt.Chart(ds_df).mark_bar(color='gray').encode(
    alt.X('calories_active', bin=alt.Bin(maxbins=20),
            title='Calories'),
    alt.Y('count()', title=None),
).properties(
    width=600,
    height=200
).configure_axis(
        labelFontSize=16,
        titleFontSize=16
    )

In [138]:
alt.Chart(ds_df).mark_bar(color='black').encode(
    alt.X('calories_out', bin=alt.Bin(maxbins=30),
            title='Calories'),
    alt.Y('count()', title=None),
).properties(
    width=600,
    height=200
).configure_axis(
        labelFontSize=16,
        titleFontSize=16
    )

## Predict weight using simple rule of thumb

In [173]:
temp=ds_df.copy()
temp['date'] = pd.to_datetime(temp['date'])
temp.set_index('date', drop=True, inplace=True)
temp.drop(['steps'], inplace=True, axis=1)

rot_df = pd.merge(alldata_df, temp, how='left', left_index=True, right_index=True)
rot_df = rot_df[(rot_df.index>='2015-01-17') & (rot_df.index<='2020-12-31')] 

In [174]:
rot_df.tail()

Unnamed: 0_level_0,weight,wgt_roll7d,steps,steps_roll7d,calories,calories_roll7d,date,floors,heart_rate_resting,calories_out,calories_active,calories_bmr,minutes_sedentary,minutes_lightly_active,minutes_fairly_active,minutes_very_active
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-12-27,156.3,155.2,11188.0,10033.142857,2546.0,2419.0,2020-12-27,6.0,63.0,2527.0,1137.0,1583.0,832.0,216.0,8.0,46.0
2020-12-28,154.8,155.242857,8349.0,9703.285714,2124.0,2414.142857,2020-12-28,6.0,64.0,2386.0,950.0,1576.0,653.0,192.0,3.0,32.0
2020-12-29,154.7,155.3,7967.0,9250.714286,2133.0,2447.285714,2020-12-29,6.0,64.0,2350.0,931.0,1575.0,781.0,207.0,2.0,29.0
2020-12-30,155.1,155.385714,5565.0,8459.571429,2141.0,2459.428571,2020-12-30,0.0,64.0,2321.0,883.0,1577.0,807.0,245.0,0.0,0.0
2020-12-31,155.4,155.471429,10322.0,8550.428571,2898.0,2404.428571,2020-12-31,13.0,64.0,2610.0,1240.0,1579.0,819.0,273.0,3.0,31.0


In [183]:
xs = ('2017-01-07', '2017-3-4')
ys = (163, 172)
width = 900
height = 450

w=alt.Chart(rot_df).mark_circle(size=60, opacity=0.15, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs)
    ),  
    y=alt.Y('weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(rot_df).mark_line(opacity=0.7, color='black').encode(
    x='date',   
    y=alt.Y('wgt_roll7d',
            scale=alt.Scale(domain=ys),
            title='weight'
    ),      
    tooltip=['date', 'wgt_roll7d']
).properties(
    width=width,
    height=height
)

(w + w_7d_avg).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

**TODO**
- for this period of weight gain, calculate $\Delta W$ with simple rule of thumb using cal in - cal out. 
- show a plot with +/- weight gain vs loss using rule of thumb. 

My prediction: it will almost always end up predicting weight loss. The point is to show quickly / simply that the rule of thumb doesn't work well, and to motivate just doing a regression on the data.

You may need to do both single day and rolling 7d avg.

I think this can also provide a natural segue into the discussion of weight fluctuations, and how a rolling 7d avg helps with that.

In [184]:
rot_df.head()

Unnamed: 0_level_0,weight,wgt_roll7d,steps,steps_roll7d,calories,calories_roll7d,date,floors,heart_rate_resting,calories_out,calories_active,calories_bmr,minutes_sedentary,minutes_lightly_active,minutes_fairly_active,minutes_very_active
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-01-17,171.5,172.357143,11747.0,10865.428571,,,2015-01-17,3.0,67.0,2959.0,1502.0,1691.0,791.0,205.0,41.0,37.0
2015-01-18,170.5,171.928571,13248.0,11844.142857,,,2015-01-18,3.0,61.0,2904.0,1467.0,1691.0,770.0,178.0,12.0,70.0
2015-01-19,171.5,171.857143,4242.0,10404.428571,,,2015-01-19,3.0,61.0,2227.0,655.0,1691.0,791.0,165.0,0.0,0.0
2015-01-20,171.5,171.642857,7426.0,10164.428571,,,2015-01-20,4.0,60.0,2702.0,1125.0,1692.0,992.0,263.0,0.0,0.0
2015-01-21,171.5,171.642857,9122.0,9221.428571,,,2015-01-21,14.0,62.0,2731.0,1255.0,1692.0,660.0,293.0,0.0,0.0


In [210]:
temp = rot_df[['date', 'weight', 'wgt_roll7d', 'calories', 'calories_roll7d', 'calories_out']].copy()
temp = temp[(temp.date>'2017-01-07') & (temp.date<'2017-03-04')]
temp['cal_delta'] = temp.calories - temp.calories_out
temp['w_delta'] = temp.cal_delta / 3500
temp['sign'] = temp.cal_delta > 0
temp['w_rot'] = temp.weight.iloc[0] + temp.w_delta.cumsum()

In [220]:
xs = ('2017-01-07', '2017-3-4')
ys = (158, 172)
width = 900
height = 500

w=alt.Chart(temp).mark_circle(size=60, opacity=0.33, color='black').encode(
    x=alt.X('date',
        scale=alt.Scale(domain=xs),
            title=None
    ),  
    y=alt.Y('weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'weight']
).properties(
    width=width,
    height=height
).interactive()

w_7d_avg=alt.Chart(temp).mark_line(opacity=0.7, color='black').encode(
    x='date',   
    y=alt.Y('wgt_roll7d',
            scale=alt.Scale(domain=ys),
            title='weight'
    ),      
    tooltip=['date', 'wgt_roll7d']
).properties(
    width=width,
    height=height
)

rot=alt.Chart(temp).mark_line(opacity=1, size=3).encode(
    x='date',   
    y=alt.Y('w_rot',
            scale=alt.Scale(domain=ys),
            title='weight'
    ),      
    tooltip=['date', 'w_rot']
).properties(
    width=width,
    height=height
)


diff=alt.Chart(temp).mark_bar().encode(
    x=alt.X('date:T'),
    y=alt.Y('cal_delta', title='cals_in - cals_out'),
    color='sign'
).properties(
    width=width,
    height=400/3
)


alt.vconcat(
    w + w_7d_avg + rot,
    diff
).resolve_scale(
    x='shared'
).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

## Looking for correlations between predictors and target

In [283]:
temp_df = data_df[data_df.date < '2020-05-01'].copy()

In [303]:
c1=alt.Chart(temp_df).mark_bar(color='gray').encode(
    alt.X('delta_weight_shifted', bin=alt.Bin(maxbins=30),
        scale=alt.Scale(domain=(-5, 5)),
            title='weight change'),
    alt.Y('count()', title=None,
        scale=alt.Scale(domain=(0, 400))),
).properties(
    width=600,
    height=200
)

c2=alt.Chart(temp_df).mark_bar(color='black').encode(
    alt.X('dw_s', bin=alt.Bin(maxbins=20, extent=(-5,5)),
        scale=alt.Scale(domain=(-5, 5)),
            title='weight change'),
    alt.Y('count()', title=None),
).properties(
    width=600,
    height=200
)

alt.vconcat(
    c1,
    c2
).resolve_scale(
    x='shared'
).configure_axis(
        labelFontSize=16,
        titleFontSize=16
)

In [285]:
alt.Chart(temp_df).mark_circle(size=60).encode(
    x=alt.X('calories',
        scale=alt.Scale(domain=(1400, 3200))
    ), 
    y=alt.Y('delta_weight_shifted',
        scale=alt.Scale(domain=(-5, 5))
    ),    
    color='steps',
    tooltip=['date', 'weight', 'calories', 'steps']
).properties(
    width=500,
    height=300
).interactive()

In [315]:
sc = alt.Chart(temp_df).mark_circle(size=60).encode(
    x=alt.X('c',
        scale=alt.Scale(domain=(1400, 3200)),
            title='c_i'
    ), 
    y=alt.Y('dw_s',
        scale=alt.Scale(domain=(-3, 2.5)),
            title='w_i - w_{i-1}'
    ),    
    color=alt.Color('s', title='s_i'),
    tooltip=['date', 'w_s', 'c', 's']
).properties(
    width=500,
    height=450
).interactive()

sc.configure_axis(
        labelFontSize=16,
        titleFontSize=16
)

In [286]:
alt.Chart(temp_df).mark_circle(size=60).encode(
    x=alt.X('steps',
        scale=alt.Scale(domain=(0, 30000))
    ), 
    y=alt.Y('delta_weight_shifted',
        scale=alt.Scale(domain=(-5, 5))
    ),    
    color='calories',
    tooltip=['date', 'weight', 'calories', 'steps']
).properties(
    width=500,
    height=300
).interactive()

In [319]:
sc = alt.Chart(temp_df).mark_circle(size=60).encode(
    x=alt.X('s',
        scale=alt.Scale(domain=(4000, 22000)),
            title='s_i'
    ), 
    y=alt.Y('dw_s',
        scale=alt.Scale(domain=(-3, 2.5)),
            title='w_i - w_{i-1}'
    ),    
    color=alt.Color('c', title='c_i'),
    tooltip=['date', 'w_s', 'c', 's']
).properties(
    width=500,
    height=450
).interactive()

sc.configure_axis(
        labelFontSize=16,
        titleFontSize=16
)

## Linear regression model

### Train/test split

In [757]:
# data_modeling = data_df[['w_prev','c_prev','s_prev','c','s','w']].copy()
data_modeling = data_df[['w_prev_s','c_prev','s_prev','c','s','w_s']].copy()
data_train = data_modeling[(data_modeling.index < '2020-01-01')].copy()
data_test = data_modeling[(data_modeling.index >= '2020-01-01') & (data_modeling.index <= '2020-12-31')].copy()
data_train['date'] = data_train.index
data_test['date'] = data_test.index

features=['w_prev_s','c','s']
x_train = data_train[features].values
y_train = data_train[['w_s']].values
x_test = data_test[features].values
y_test = data_test[['w_s']].values

### Train the model

In [758]:
model = LinearRegression().fit(x_train, y_train)

In [759]:
model.score(x_train, y_train)

0.9837591185068993

In [760]:
model.score(x_test, y_test)

0.9924353506416014

In [761]:
# model_coefs = [0.9842664081035283, # c_w
#                0.001965638199353011, # c_c
#                -4.621900527451458e-05, # c_s
#                -1.2110620297640367] # c_0
# [c_w, c_c, c_s, c_0] = model_coefs

In [762]:
[c_w, c_c, c_s] = list(model.coef_[0])
c_0 = model.intercept_[0]

In [763]:
[c_w, c_c, c_s, c_0]

[0.9820501055684959,
 0.0021336822092839396,
 -4.8939182513203444e-05,
 -1.1713935060094514]

### Regression lines

In [669]:
weight_prev = [160, 170, 180]
steps = [5000, 10000, 15000]
cals = list(range(1500, 3200, 10))

wgt = []
for wp in weight_prev:
    for s in steps:
        wgt_df = pd.DataFrame({'calories':cals})
        w = c_0 + c_w*wp + c_c*np.array(cals) + c_s*s
        wgt_df['delta_w'] = w-wp
        wgt_df['weight'] = wp
        wgt_df['steps'] = s
        wgt.append(wgt_df)
wgt_df = pd.concat(wgt, ignore_index=True)

In [670]:
s = alt.Chart(data_df).mark_circle(size=30, opacity=0.3, color='#ccc').encode(
    x=alt.X('c', title='C',
        scale=alt.Scale(domain=(1400, 3200))
    ), 
    y=alt.X('dw_s', title='W_i - W_{i-1}',
        scale=alt.Scale(domain=(-3, 3))
    ),    
#     color='s',
    tooltip=['date', 'w_s', 'c', 's']
).properties(
    width=500,
    height=450
)

wgt = wgt_df[wgt_df.weight==170].copy()
rl = alt.Chart(wgt).mark_line(size=4).encode(
    alt.X('calories'
    ),    
    y='delta_w',
    color='steps:O',
    tooltip=['calories', 'steps', 'weight', 'delta_w']
).properties(
    width=500,
    height=450
)


(s + rl).configure_axis(
        labelFontSize=16,
        titleFontSize=16
)

### Model performance on training and test set

In [671]:
y_pred = model.predict(x_train)
data_train['w_pred'] = [x[0] for x in y_pred]

y_pred = model.predict(x_test)
data_test['w_pred'] = [x[0] for x in y_pred]

In [672]:
ys = (150, 182)
w=alt.Chart(data_train).mark_point(size=2, opacity=1, color='#222', fill='#222').encode(
    x=alt.X('date',
        axis = alt.Axis(format = ("%b `%y"))),   
    y=alt.Y('w_s', title='weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_s', 'c', 's']
).properties(
    width=800,
    height=300
).interactive()

wp=alt.Chart(data_train).mark_point(size=2,opacity=0.5, color='#2C84EE').encode(
    x='date',   
    y=alt.Y('w_pred',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_s', 'c', 's']
).interactive()

w2=alt.Chart(data_test).mark_point(size=2, opacity=1, color='#222', fill='#222').encode(
    x='date',   
    y=alt.Y('w_s', title='weight',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_s', 'c', 's']
).properties(
    width=800,
    height=300
).interactive()

wp2=alt.Chart(data_test).mark_point(size=2,opacity=0.6, color='#E68F26').encode(
    x='date',   
    y=alt.Y('w_pred',
        scale=alt.Scale(domain=ys)
    ),      
    tooltip=['date', 'w_s', 'c', 's']
).interactive()

(w+wp+w2+wp2).configure_axis(
        labelFontSize=16,
        titleFontSize=16
    )

### Forecasting with actual (c,s) trajectory

#### Showing steady state

In [764]:
# model_coefs = [0.9842664081035283, # c_w
#                0.001965638199353011, # c_c
#                -4.621900527451458e-05, # c_s
#                -1.2110620297640367] # c_0
# [c_w, c_c, c_s, c_0] = model_coefs

alpha_s = -c_s/c_c
alpha_0 = -c_0/c_c
alpha_w = (1-c_w)/c_c

def wss(C,S):
    return (C - alpha_s*S - alpha_0)/alpha_w

In [777]:
temp_df = pd.DataFrame(index=pd.date_range(start="2021-01-01",end="2027-01-01",freq="7d"))
temp_df['step'] = list(range(len(temp_df)))
w_prev = 180
temp_df['w'] = w_prev
c = 2125
s = 10000

for dt in list(temp_df.index)[1:]:    
    w_prev = c_0 + c_w*w_prev + c_c*c + c_s*s
    temp_df.loc[temp_df.index==dt, 'w'] = w_prev

In [778]:
wss(c,s)

160.07277226900797

In [781]:
ch1=alt.Chart(temp_df).mark_point(size=10, opacity=1, color='#222', fill='#222').encode(
    x=alt.X('step', scale=alt.Scale(domain=(0,300)), title='weeks'),  
    y=alt.Y('w', title='weight',
        scale=alt.Scale(domain=(155, 180))
    )
).properties(
    width=900,
    height=450
).interactive()

(ch1).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

#### Specific segment of training set

In [694]:
wgt_forecast = data_train[['date','w_prev_s','w_s','c','s']].copy()
wgt_forecast = wgt_forecast[(wgt_forecast.index>='2017-01-01') & (wgt_forecast.index<='2017-03-05')]
wgt_forecast['w_fc'] = wgt_forecast['w_s']
wgt_forecast['w_fc_prev'] = wgt_forecast['w_prev_s']

for dt in list(wgt_forecast.index)[7:]:
    df = wgt_forecast.loc[dt]    
    dt_last_week = dt + datetime.timedelta(days=-7)
    df_last_week = wgt_forecast.loc[dt_last_week]
    w_prev = df_last_week.w_fc
#     w_prev = df.w_prev_s
    c = df.c
    s = df.s
    w_fc = c_0 + c_w*w_prev + c_c*c + c_s*s
#     w_fc = model.predict([[w_prev, c, s]])
    wgt_forecast.loc[wgt_forecast.date==dt, 'w_fc'] = w_fc
    wgt_forecast.loc[wgt_forecast.date==dt, 'w_fc_prev'] = w_prev

In [710]:
ch1=alt.Chart(wgt_forecast).mark_point(size=40, opacity=1, color='#222', fill='#222').encode(
    x='date',  
    y=alt.Y('w_s', title='weight',
        scale=alt.Scale(domain=(163, 172))
    ),  
    tooltip=['date', 'w_s', 'w_fc', 'c', 's', 'w_prev_s']
).properties(
    width=900,
    height=450
).interactive()
ch2=alt.Chart(wgt_forecast).mark_point(size=40,opacity=1, color='#2C84EE', fill='#2C84EE').encode(
    x='date',  
    y=alt.Y('w_fc',
        scale=alt.Scale(domain=(163, 172))
    ),  
    tooltip=['date', 'w_s', 'w_fc', 'c', 's', 'w_prev_s']
).properties(
    width=900,
    height=450
).interactive()

(ch1+ch2).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

#### Test set

In [715]:
wgt_forecast = data_test[['date','w_prev_s','w_s','c','s']].copy()
wgt_forecast['w_fc'] = wgt_forecast['w_s']
wgt_forecast['w_fc_prev'] = wgt_forecast['w_prev_s']

for dt in list(wgt_forecast.index)[7:]:
    df = wgt_forecast.loc[dt]    
    dt_last_week = dt + datetime.timedelta(days=-7)
    df_last_week = wgt_forecast.loc[dt_last_week]
    w_prev = df_last_week.w_fc
#     w_prev = df.w_prev_s
    c = df.c
    s = df.s
    w_fc = c_0 + c_w*w_prev + c_c*c + c_s*s
#     w_fc = model.predict([[w_prev, c, s]])
    wgt_forecast.loc[wgt_forecast.date==dt, 'w_fc'] = w_fc
    wgt_forecast.loc[wgt_forecast.date==dt, 'w_fc_prev'] = w_prev

In [719]:
ch1=alt.Chart(wgt_forecast).mark_point(size=10, opacity=1, color='#222', fill='#222').encode(
    x='date',  
    y=alt.Y('w_s', title='weight',
        scale=alt.Scale(domain=(150, 180))
    ),  
    tooltip=['date', 'w_s', 'w_fc', 'c', 's', 'w_prev_s']
).properties(
    width=900,
    height=450
).interactive()
ch2=alt.Chart(wgt_forecast).mark_point(size=10,opacity=1, color='#2C84EE', fill='#2C84EE').encode(
    x='date',  
    y=alt.Y('w_fc',
        scale=alt.Scale(domain=(150, 180))
    ),  
    tooltip=['date', 'w_s', 'w_fc', 'c', 's', 'w_prev_s']
).properties(
    width=900,
    height=450
).interactive()

(ch1+ch2).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

## Scraps