In [13]:
# Load data and train model 

import ipywidgets as widgets
import numpy as np
import pandas as pd

from IPython.display import display
from stravalib.client import Client
from itertools import chain
from sklearn import linear_model
from datetime import datetime, timedelta

client = Client()
activities = {}
reg = linear_model.LinearRegression() 

txt_at = widgets.Text()
display(txt_at)

def handle_submit_token(sender):
    # Set access token for Strava
    client.access_token = txt_at.value
    print("Using access token: " + client.access_token)
    
    athlete = client.get_athlete()

    types = ['time', 'latlng', 'distance', 'altitude', 'velocity_smooth', 'heartrate', 'cadence', 'moving', 'grade_smooth' ]

    # Some runs excluded b/c strange or atypical data
    exclude_from_training_set = [963076931, 984046012, 993157852, 643237811] # 

    N = 15                # Window for rolling averages
    heartrate_shift = 7   # Back shifting of heartrate
    moving_cut_off = 0.8  # Cut off point to distinguish between moving and stationary
    initial_exclude_period = 600 # Number of seconds to exclude initially from training set b/c high pulse in beginning

    #acts = client.get_activities(after = "2016-05-01T00:00:00Z", before = "2016-07-14T00:00:00Z", limit=200)
    #acts = client.get_activities(after = "2017-05-01T00:00:00Z", before = "2017-07-30T00:00:00Z", limit=200)
    
    start_time_str = "2017-05-01T00:00:00Z"
    print("Model will be fitted with data since " + start_time_str)
    
    acts = client.get_activities(after = start_time_str, limit=200)
    acts = chain(acts, client.get_activities(after = "2016-07-15T00:00:00Z",  limit=1)) # LV 2016

    activity_count = 0

    train_x = []
    train_y = []
    
    for activity in acts:
        try:
            streams = client.get_activity_streams(activity.id, types=types, resolution='high')
        except:
            print("Exception getting %d" % activity.id)

        if 'latlng' in streams.keys() and 'heartrate' in streams.keys():
            activities[activity.id] = {'features' : [], 'velocity' : [], 'altitude' : []}

            # Smooth inputs
            grade_avg = pd.rolling_mean(np.asarray(streams['grade_smooth'].data), N)
            velocity_avg = pd.rolling_mean(np.asarray(streams['velocity_smooth'].data), N)
            heartrate_avg = pd.rolling_mean(np.asarray(streams['heartrate'].data), N)
            moving_avg = pd.rolling_mean(np.asarray(streams['moving'].data), N)

            # Initialize section arrays
            section_distance = np.zeros(len(grade_avg))
            section_grade = np.zeros(len(grade_avg))
            section_grade_distance = np.zeros(len(grade_avg))
            section_elevation = np.zeros(len(grade_avg))
            section_heartrate = np.zeros(len(grade_avg))

            # Analyze hill sections
            si = N + 1
            ei = len(streams['latlng'].data) - heartrate_shift

            ascending = 1 if grade_avg[si] > 0 else 0
            grade_cum = 0
            heartrate_cum = 0

            for i in range(si, ei):
                moving_avg[i] = moving_avg[i] if moving_avg[i] < moving_cut_off else 1

                if (ascending == 1 and grade_avg[i] <= 0) or (ascending == 0 and grade_avg[i] > 0) or (i == ei):
                    for j in range(si, i):
                        section_distance[j] = streams['distance'].data[i] - streams['distance'].data[si]
                        section_grade[j] = grade_cum / (i - si)
                        section_heartrate[j] = heartrate_cum / (i - si)

                    grade_cum = 0
                    heartrate_cum = 0
                    si = i
                    ascending = 1 if grade_avg[i] > 0 else 0
                else:
                    grade_cum = grade_cum + grade_avg[i] 
                    heartrate_cum = heartrate_cum + heartrate_avg[i]

            # Prepare arrays for model and store values
            for i in range(N + 1, ei):
                #features = [streams['time'].data[i], heartrate_avg[i + heartrate_shift], grade_avg[i], abs(grade_avg[i]), section_distance[i], section_grade[i], abs(section_grade[i]), moving_avg[i], streams['distance'].data[i]] 
                features = [heartrate_avg[i + heartrate_shift], grade_avg[i], abs(grade_avg[i]), section_distance[i], section_grade[i], abs(section_grade[i]), moving_avg[i], streams['distance'].data[i]] 

                activities[activity.id]['features'].append(features)
                activities[activity.id]['altitude'].append(streams['altitude'].data[i])
                activities[activity.id]['velocity'].append(velocity_avg[i])

                if activity.id not in exclude_from_training_set and streams['time'].data[i] > initial_exclude_period:
                    train_x.append(features) 
                    train_y.append(velocity_avg[i])

            if activity.id in exclude_from_training_set:
                print("Got %d. Skipping from training set" % activity.id) 
            else:
                print("Got %d" % activity.id)
        else:
            print("No heartrate data for %d. Skipping" % activity.id)

    print("Imported %d actvities" % len(activities))
    print("Fitting model...")
    
    reg.fit (train_x, train_y)  

    print("\nModel ready:\n")
    print("  R^2: %.5f \n" % reg.score(train_x, train_y))
    print("  w_heartrate:         %.5f" % reg.coef_[0])
    print("  w_grade:             %.5f" % reg.coef_[1])
    print("  w_grade_abs:         %.5f" % reg.coef_[2])
    print("  w_section_distance:  %.5f" % reg.coef_[3])
    print("  w_section_grade:     %.5f" % reg.coef_[4])
    print("  w_section_grade_abs: %.5f" % reg.coef_[5])
    print("  w_moving:            %.5f" % reg.coef_[6])
    print("  w_distance:          %.5f" % reg.coef_[7])
    print("  w_intercept:         %.5f" % reg.intercept_)
       
txt_at.on_submit(handle_submit_token)

# 



Using access token: 73a2e28e72713e830ce1039ebabc843658c274eb


Unable to set attribute membership on entity <Club id=197753 name='Laugavegshópur Sigga P. og hlaup.is 2016.' resource_state=2>
Unable to set attribute admin on entity <Club id=197753 name='Laugavegshópur Sigga P. og hlaup.is 2016.' resource_state=2>
Unable to set attribute owner on entity <Club id=197753 name='Laugavegshópur Sigga P. og hlaup.is 2016.' resource_state=2>
Unable to set attribute membership on entity <Club id=271427 name='Laugavegshópur Náttúruhlaupa' resource_state=2>
Unable to set attribute admin on entity <Club id=271427 name='Laugavegshópur Náttúruhlaupa' resource_state=2>
Unable to set attribute owner on entity <Club id=271427 name='Laugavegshópur Náttúruhlaupa' resource_state=2>


Model will be fitted with data since 2017-05-01T00:00:00Z


No such attribute utc_offset on entity <Activity id=971273973 name='Morning Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=971273951 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=971273964 name='Evening Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=974078533 name='Morning Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=984046080 name='Evening Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=984051825 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=984046056 name='Evening Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=984046087 name='Morning Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=984046012 name='Morning Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=988035698 name='Evening Run' resource_state=2>
No suc

Got 971273973
Got 971273951
No heartrate data for 971273964. Skipping
No heartrate data for 974078533. Skipping
No heartrate data for 984046080. Skipping
Exception getting 984051825
No heartrate data for 984051825. Skipping
No heartrate data for 984046056. Skipping
Got 984046087
Got 984046012. Skipping from training set
Got 988035698
Got 993157852. Skipping from training set
No heartrate data for 997454297. Skipping
Got 997454316
No heartrate data for 1005008784. Skipping
No heartrate data for 1005008778. Skipping
Got 1005008762
Got 1005008806
No heartrate data for 1008357047. Skipping
Got 1015118517
Got 1015118496
Got 1015118502
Got 1022625248
Got 1022625277
Got 1022624524
Got 1025861984
Got 1030393121
Got 1030393136
Got 1030393107
Got 1030393101
Got 1038675802
Got 1038675800
Got 1038675805
Got 1038675792
Got 1043349674
Got 1052464069
Got 1052464054
Got 1052464061
Got 1052464060
Got 1052464067
Got 1058721063
Got 1058721034
Got 1058721062
Got 1062638363
Got 1062638367


No such attribute utc_offset on entity <Activity id=643237811 name='Morning Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765910 name='Night Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765902 name='Lunch Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765908 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765904 name='Evening Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765895 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765900 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765887 name='Evening Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765884 name='Afternoon Run' resource_state=2>
No such attribute utc_offset on entity <Activity id=700765883 name='Afternoon Run' resource_state=2>
No s

Got 643237811. Skipping from training set
Imported 36 actvities
Fitting model...

Model ready:

  R^2: 0.72162 

  w_heartrate:         0.01633
  w_grade:             -0.01059
  w_grade_abs:         -0.02074
  w_section_distance:  0.00005
  w_section_grade:     -0.02462
  w_section_grade_abs: -0.06728
  w_moving:            2.31161
  w_distance:          -0.00001
  w_intercept:         -1.05022


In [16]:
# Make predictions and plot
from ipywidgets import interact
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, curdoc
from bokeh.layouts import row, column
from bokeh.models import Spacer, CustomJS, ColumnDataSource, Span
from bokeh.models.widgets import PreText, Select,RadioButtonGroup
from IPython.display import clear_output

output_notebook()

txt_hr = widgets.Text()
display(txt_hr)

def handle_submit_heartrate(sender):
    race_heartrate = float(txt_hr.value)
    
    clear_output()

    pi = np.array(activities[643237811]['features'])
    pi[:,0] = race_heartrate
    pi[:,6] = 1 

    predicted_speed = reg.predict(pi)
    actual_speed = np.array(activities[643237811]['velocity'])
    distance = [row[7] for row in pi]
    time = [row[0] for row in pi]
    altitude = np.array(activities[643237811]['altitude'])
    
    d = distance - np.roll(distance,1) 
    d[0] = 0

    actual_times = d / actual_speed
    actual_time = sum(actual_times)

    predicted_times = d / predicted_speed
    predicted_time = sum(predicted_times)
    
    # stops
    d1 = 10000  # Hrafntinnusker
    d2 = 21500  # Álftavatn
    d3 = 37400  # Emstrur
    d4 = distance[len(distance) - 1]

    l1 = d1
    l2 = d2 - d1
    l3 = d3 - d2
    l4 = d4 - d3

    i1 = np.searchsorted(distance, d1)
    i2 = np.searchsorted(distance, d2)
    i3 = np.searchsorted(distance, d3)

    a_t1 = sum(actual_times[0:i1]) / 60
    a_t2 = sum(actual_times[i1:i2]) / 60
    a_t3 = sum(actual_times[i2:i3]) / 60
    a_t4 = sum(actual_times[i3:len(actual_times)]) / 60

    p_t1 = sum(predicted_times[0:i1]) /60
    p_t2 = sum(predicted_times[i1:i2]) / 60
    p_t3 = sum(predicted_times[i2:i3]) / 60
    p_t4 = sum(predicted_times[i3:len(predicted_times)]) / 60

    print("Race 2016")
    print("Leg 1: %d min (%.1f%%) %.1f" % (a_t1, (a_t1 / (actual_time / 60) * 100), l1 / (a_t1 * 60)))
    print("Leg 2: %d min (%.1f%%) %.1f" % (a_t2, (a_t2 / (actual_time / 60) * 100), l2 / (a_t2 * 60)))
    print("Leg 3: %d min (%.1f%%) %.1f" % (a_t3, (a_t3 / (actual_time / 60) * 100), l3 / (a_t3 * 60)))
    print("Leg 4: %d min (%.1f%%) %.1f" % (a_t4, (a_t4 / (actual_time / 60) * 100), l4 / (a_t4 * 60)))
    print("Total: %d min" % (actual_time / 60))

    print("")

    print("Prediction 2017")
    print("Leg 1: %d min (%.1f%%) %.1f" % (p_t1, (p_t1 / (predicted_time / 60) * 100), l1 / (p_t1 * 60)))
    print("Leg 2: %d min (%.1f%%) %.1f" % (p_t2, (p_t2 / (predicted_time / 60) * 100), l2 / (p_t2 * 60)))
    print("Leg 3: %d min (%.1f%%) %.1f" % (p_t3, (p_t3 / (predicted_time / 60) * 100), l3 / (p_t3 * 60)))
    print("Leg 4: %d min (%.1f%%) %.1f" % (p_t4, (p_t4 / (predicted_time / 60) * 100), l4 / (p_t4 * 60)))
    print("Total: %d min" % (predicted_time / 60))

    # Show plots
    p_speed = figure(title="Predicted speed", plot_height=300, plot_width=900, x_range=(0,max(distance)), y_range=(0,5))
    r_a = p_speed.line(distance, actual_speed, color="#648ed1", line_width=1)
    r_p = p_speed.line(distance, predicted_speed, color="#79c45c", line_width=1)

    p_altitude = figure(title="Altitude", plot_height=300, plot_width=900, x_range=(0,max(distance)), y_range=(0,1200))
    r_a = p_altitude.line(distance, altitude, color="#648ed1", line_width=1)

    #p_grade = figure(title="Grade", plot_height=300, plot_width=900, x_range=(0,max(distance)), y_range=(-50,50))
    #r_g = p_grade.line(distance, [row[5] for row in pi], color="#648ed1", line_width=1)

    s1_1 = Span(location=d1,dimension='height', line_color='black', line_dash='dashed', line_width=1) 
    s2_1 = Span(location=d2,dimension='height', line_color='black', line_dash='dashed', line_width=1) 
    s3_1 = Span(location=d3,dimension='height', line_color='black', line_dash='dashed', line_width=1) 
    s1_2 = Span(location=d1,dimension='height', line_color='black', line_dash='dashed', line_width=1) 
    s2_2 = Span(location=d2,dimension='height', line_color='black', line_dash='dashed', line_width=1) 
    s3_2 = Span(location=d3,dimension='height', line_color='black', line_dash='dashed', line_width=1) 

    p_speed.add_layout(s1_1)
    p_speed.add_layout(s2_1)
    p_speed.add_layout(s3_1)
    p_altitude.add_layout(s1_2)
    p_altitude.add_layout(s2_2)
    p_altitude.add_layout(s3_2)

    layout = column(p_speed, Spacer(height=10), p_altitude)  #, Spacer(height=10), p_grade)
    show(layout, notebook_handle=True)

txt_hr.on_submit(handle_submit_heartrate)

Race 2016
Leg 1: 71 min (20.0%) 2.3
Leg 2: 71 min (20.0%) 2.7
Leg 3: 96 min (27.0%) 2.7
Leg 4: 118 min (33.0%) 2.2
Total: 358 min

Prediction 2017
Leg 1: 66 min (20.2%) 2.5
Leg 2: 66 min (20.1%) 2.9
Leg 3: 92 min (28.1%) 2.9
Leg 4: 104 min (31.5%) 2.6
Total: 330 min
