In [None]:
#change width of current notebook cells
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [15]:
import json
import datetime
from datetime import timedelta
import os, zipfile
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import chisquare
import numpy as np
import matplotlib.pyplot as plt

This notebook will be organized in steps from:
1)collecting the polar data, 
2)make obs_x, obs_y, obs_x_trim, obs_y_trim - trim data is based on the start of data based on lowest heart rate from first 30 seconds. 
3)fit orign and trim data.
4) calculate parameters.
5) calculate r sqrs 
6) build comparisons

In [2]:
data_zip_med=r"C:\Users\captian2020\Downloads\polar-user-data-export_6103854c-9559-4056-8b5d-89feb507be70.zip"
polar_zip=zipfile.ZipFile(data_zip_med)

#read all files in zip to dictionary of file names and json (each value is a training_session)
polar_data_dict={}
for i in polar_zip.filelist:
    polar_data_dict[i.filename]=json.loads(polar_zip.read(i.filename))

#read all training sessions into df's inside of a dict
polar_df_dict={}
for i,j in polar_data_dict.items():
    if 'training-session-' in i:
        var_datetime_utc=j['timeZoneOffset']
        var_datetime_utc_list=[datetime.datetime.strptime(
            k['dateTime']  ,'%Y-%m-%dT%H:%M:%S.%f') + timedelta(
                minutes=var_datetime_utc) for k in j['exercises'][0]['samples']['heartRate']]
        var_values_list=[k['value'] for k in j['exercises'][0]['samples']['heartRate']]
        df=pd.DataFrame(list(zip(var_datetime_utc_list,var_values_list)), columns=[
            'var_datetime_utc','var_value'])
        df['var_activity']=j['name']
        df['var_periodicity']='seconds'
        df['var_type']='heart rate'
        df['var_unit']='heart rate per second'
        df['user_id']=1
        df['source_filename']=i
        df['time_stamp_utc']=datetime.datetime.utcnow()
        df['var_timezone_utc_delta_in_mins']=var_datetime_utc
        polar_df_dict[i]=df

#Dictionary of name, exercise, max obs_y, count
polar_data_dict_abbrev={}
for i,j in polar_df_dict.items():
    polar_data_dict_abbrev[i]=(i,j.at[0,'var_activity'],j.var_value.max(), len(j))

In [3]:
#The model set to max heartrate = 170
def michaelis_m_eq_fix170(time_var, shape_var):
    return (170 *time_var)/(shape_var + time_var)

In [4]:
def calc_rsq(obs_y, pred_y):
    residuals = obs_y - pred_y
    ss_res=np.sum(residuals**2)
    ss_tot=np.sum((obs_y-np.mean(obs_y))**2)
    r_squared=1-(ss_res/ss_tot)
    return r_squared

### Collect, trim, model data

In [5]:
polar_obs_var_dict={};polar_obs_var_dict_trim={}
polar_pred_var_dict={};polar_pred_var_dict_trim={}
polar_parameters_dict={};polar_parameters_dict_trim={}
polar_rsq_dict={};polar_rsq_dict_trim={}

for a,b in polar_df_dict.items():
    
    #Get Observed x and y
    var_datetime_utc_list=polar_df_dict[a].var_datetime_utc.to_list()
    obs_x=np.array([(i-var_datetime_utc_list[0]).total_seconds() for i in var_datetime_utc_list])
    obs_y=np.array(polar_df_dict[a].var_value.to_list())
    obs_x_manuf=np.array(range(0,len(obs_y)))
    polar_obs_var_dict[a]=(obs_x, obs_y)
    
    #make trimmed observations based on minimum value of first 30 seconds of session
    min_value_30=polar_obs_var_dict[list(polar_obs_var_dict.keys())[0]][1][:30].min()
    min_position=np.where(polar_obs_var_dict[list(polar_obs_var_dict.keys())[0]][1][:30]==min_value_30)[0][-1]
    obs_y_trim=obs_y[min_position:]
    obs_x_trim=np.array(range(0,len(obs_y_trim)))
    polar_obs_var_dict_trim[a]=(obs_x_trim, obs_y_trim)
    
    
    #calculate parameter for each model
    popt_fix170, pcov_fix170 = curve_fit(michaelis_m_eq_fix170, obs_x, obs_y,bounds=(0,np.inf))
    popt_fix170_trim, pcov_fix170_trim = curve_fit(michaelis_m_eq_fix170, obs_x_trim, obs_y_trim,bounds=(0,np.inf))
    polar_parameters_dict[a]=(popt_fix170[0])
    polar_parameters_dict_trim[a]=(popt_fix170_trim[0])

    
    #calculated predicted Y's for each model
    pred_y_fix170=[michaelis_m_eq_fix170(i, popt_fix170[0]) for i in obs_x]
    pred_y_fix170_trim=[michaelis_m_eq_fix170(i, popt_fix170_trim[0]) for i in obs_x_trim]
    polar_pred_var_dict[a]=( pred_y_fix170)
    polar_pred_var_dict_trim[a]=( pred_y_fix170_trim)
    
    #claculate the r square for each model
    rsq_fix170=calc_rsq(obs_y, pred_y_fix170)
    rsq_fix170_trim=calc_rsq(obs_y_trim, pred_y_fix170_trim)
    polar_rsq_dict[a]=(rsq_fix170)
    polar_rsq_dict_trim[a]=(rsq_fix170_trim)
    
    

  r_squared=1-(ss_res/ss_tot)


Check r squared calculation

In [8]:
training_session="training-session-2021-07-23-6299244154-3de1d61e-a912-49db-a0c5-760aaf80fa3b.json"
obs_x,obs_y=polar_obs_var_dict[training_session]
pre_y=polar_pred_var_dict[training_session]

obs_x_trim,obs_y_trim=polar_obs_var_dict_trim[training_session]
pre_y_trim=polar_pred_var_dict_trim[training_session]

In [14]:
df_values=pd.DataFrame(list(zip(obs_x,obs_y,pre_y)), columns=['obs_x','obs_y','pred_y'])
df_values.to_excel('df_values.xlsx')

In [18]:
chi_square_training= chisquare(obs_y_trim,pre_y_trim)
chi_square_training

Power_divergenceResult(statistic=inf, pvalue=0.0)

In [21]:
obs_y

array([ 71,  71,  70, ..., 170, 170, 170])

In [23]:
obs_y_trim[:10]

array([ 92,  92,  91,  92,  92,  96,  99, 100, 101, 101])

In [None]:
for i,j in polar_obs_var_dict_trim.items():
    length_of_j=len(j[0])
    if length_of_j<min_j:
        min_j=length_of_j
    

In [None]:
min_j

### Make Excel Spreadsheet

In [None]:
df_description = pd.DataFrame.from_dict(polar_data_dict_abbrev, orient='index', columns=['name', 'exercise', 'maxObs_y', 'count'])
df_parameters = pd.DataFrame.from_dict(polar_parameters_dict, orient='index', columns=['popt_fix170'])
df_parameters_trim = pd.DataFrame.from_dict(polar_parameters_dict_trim, orient='index', columns=['popt_fix170_trim'])
df_rsq=pd.DataFrame.from_dict(polar_rsq_dict, orient='index', columns=['rsq_fix170'])
df_rsq_trim=pd.DataFrame.from_dict(polar_rsq_dict_trim, orient='index', columns=['rsq_fix170_trim'])

In [None]:
#Make excel spreadsheet with polar_data_dict_abbrev | df_parameters |df_parameters_trim| df_rsq | df_rsq_trim
frames=[df_description, df_parameters, df_parameters_trim,df_rsq, df_rsq_trim]
result_primary=pd.concat(frames, axis=1)
result_primary.to_excel('big_table6_fix170_with_trim.xlsx', index=False)

### Chart and review output

In [None]:
def plot_build(obs_x, obs_y, pred_y, rsq, param):
    
    plt.plot(obs_x, obs_y,':', label='Observed')
    plt.plot(obs_x,pred_y,'b-', label='predicted170')
    plt.legend(loc=4)
    plt.ylim([60, 185])
    plt.title("count:{len(obs_x)} /n rsq:{rsq:.3g} /n param:{param:3g}",x=0.4, y=0.9)

In [None]:
training_session="training-session-2021-07-23-6299244154-3de1d61e-a912-49db-a0c5-760aaf80fa3b.json"
obs_x, obs_y=polar_obs_var_dict[]
polar_obs_var_dict_trim