# Activities Processing

In [1]:
import pandas as pd
from datetime import *
import numpy as np
import math

In [2]:
import  plotly.plotly  as py
import plotly.graph_objs as go

In [3]:
metres_mile = 1609.34

In [4]:
activities_df = pd.read_json('alex_all_acts.json')

In [5]:
activities_df.columns

Index(['achievement_count', 'athlete', 'athlete_count', 'average_cadence',
       'average_heartrate', 'average_speed', 'average_watts', 'comment_count',
       'commute', 'device_watts', 'distance', 'elapsed_time', 'elev_high',
       'elev_low', 'end_latlng', 'external_id', 'flagged', 'from_accepted_tag',
       'gear_id', 'has_heartrate', 'has_kudoed', 'id', 'kilojoules',
       'kudos_count', 'location_city', 'location_country', 'location_state',
       'manual', 'map', 'max_heartrate', 'max_speed', 'max_watts',
       'moving_time', 'name', 'photo_count', 'pr_count', 'private',
       'resource_state', 'start_date', 'start_date_local', 'start_latitude',
       'start_latlng', 'start_longitude', 'timezone', 'total_elevation_gain',
       'total_photo_count', 'trainer', 'type', 'upload_id', 'utc_offset',
       'weighted_average_watts', 'workout_type'],
      dtype='object')

In [6]:
activities_df = activities_df[['average_speed','distance','moving_time','name','start_date_local','id','workout_type','type']]
activities_df = activities_df[activities_df.type == 'Run']

In [7]:
activities_df['url'] = activities_df['id'].apply(lambda x : 'https://www.strava.com/activities/1513967998' + str(x))

In [8]:
activities_df.drop('id', axis = 1, inplace = True)

In [9]:
activities_df['pace_mile'] = metres_mile / activities_df.average_speed
activities_df['pace_km'] = 1000 / activities_df.average_speed

In [10]:
activities_df['date'] = pd.to_datetime(activities_df.start_date_local.apply(lambda x : x.split('T')[0]))

In [11]:
activities_df.drop(['average_speed','start_date_local', 'type'], axis = 1, inplace = True)

In [12]:
activities_df.head(10)

Unnamed: 0,distance,moving_time,name,workout_type,url,pace_mile,pace_km,date
0,10324.0,2649,Morning Run,,https://www.strava.com/activities/151396799815...,412.96895,256.607647,2018-04-18
1,4347.8,1156,WD,0.0,https://www.strava.com/activities/151396799815...,427.902154,265.886732,2018-04-17
2,4063.7,977,Tune-up before Stanford v2,3.0,https://www.strava.com/activities/151396799815...,386.953595,240.442414,2018-04-17
3,4142.2,1134,WU,0.0,https://www.strava.com/activities/151396799815...,440.55297,273.747605,2018-04-17
4,8285.8,2101,Double,0.0,https://www.strava.com/activities/151396799815...,408.047667,253.549696,2018-04-16
5,10754.4,2657,Gonna be huge with all this extra protein past...,0.0,https://www.strava.com/activities/151396799815...,397.564229,247.035573,2018-04-16
6,21657.4,5271,Post-race LR,2.0,https://www.strava.com/activities/151396799815...,391.662205,243.368216,2018-04-15
7,1500.0,233,UCLA Invite 1500,1.0,https://www.strava.com/activities/151396799815...,249.975148,155.327742,2018-04-14
8,5576.1,1461,WD + “Threshold” + strides,0.0,https://www.strava.com/activities/151396799815...,421.624312,261.985853,2018-04-14
9,3914.8,993,WU,0.0,https://www.strava.com/activities/151396799815...,408.254693,253.678336,2018-04-14


In [13]:
activities_df.workout_type = activities_df.workout_type.fillna(0)

In [14]:
workout_type_dict = {0:'Run',1:'Race',2:'Long Run',3:'Workout'}

In [15]:
activities_df.workout_type = activities_df.workout_type.apply(lambda x : workout_type_dict[x])

In [16]:
activities_df['Distance (Miles)'] = activities_df.distance / metres_mile
activities_df['Distance (Kilometres)'] = activities_df.distance / 1000

In [17]:
activities_df['size'] = activities_df.moving_time.astype('float').apply(lambda x : math.sqrt(x))

In [18]:
activities_df

Unnamed: 0,distance,moving_time,name,workout_type,url,pace_mile,pace_km,date,Distance (Miles),Distance (Kilometres),size
0,10324.0,2649,Morning Run,Run,https://www.strava.com/activities/151396799815...,412.968950,256.607647,2018-04-18,6.415052,10.3240,51.468437
1,4347.8,1156,WD,Run,https://www.strava.com/activities/151396799815...,427.902154,265.886732,2018-04-17,2.701604,4.3478,34.000000
2,4063.7,977,Tune-up before Stanford v2,Workout,https://www.strava.com/activities/151396799815...,386.953595,240.442414,2018-04-17,2.525072,4.0637,31.256999
3,4142.2,1134,WU,Run,https://www.strava.com/activities/151396799815...,440.552970,273.747605,2018-04-17,2.573850,4.1422,33.674916
4,8285.8,2101,Double,Run,https://www.strava.com/activities/151396799815...,408.047667,253.549696,2018-04-16,5.148570,8.2858,45.836667
5,10754.4,2657,Gonna be huge with all this extra protein past...,Run,https://www.strava.com/activities/151396799815...,397.564229,247.035573,2018-04-16,6.682491,10.7544,51.546096
6,21657.4,5271,Post-race LR,Long Run,https://www.strava.com/activities/151396799815...,391.662205,243.368216,2018-04-15,13.457318,21.6574,72.601653
7,1500.0,233,UCLA Invite 1500,Race,https://www.strava.com/activities/151396799815...,249.975148,155.327742,2018-04-14,0.932059,1.5000,15.264338
8,5576.1,1461,WD + “Threshold” + strides,Run,https://www.strava.com/activities/151396799815...,421.624312,261.985853,2018-04-14,3.464837,5.5761,38.223030
9,3914.8,993,WU,Run,https://www.strava.com/activities/151396799815...,408.254693,253.678336,2018-04-14,2.432550,3.9148,31.511903


In [19]:
sizeref = 20*max(activities_df['size'])/(100**2)

In [38]:
activities_df.loc[10,].distance

2386.6999999999998

In [92]:
activities_text = []
for i in range(len(activities_df)):
    row = activities_df.iloc[i,]
    activities_text.append('{}<br>Distance: {}<br>Pace:{}'.format(row['name'][:80], row['distance'], row['pace_mile']))

activities_df['text'] = activities_text

In [93]:
activities_text[0]

'Morning Run<br>Distance: 10324.0<br>Pace:412.9689504747241'

In [160]:
data = []
for run_type in ['Run','Workout','Long Run','Race']:
    trace = go.Scatter(
        x=activities_df['Distance (Miles)'][activities_df['workout_type'] == run_type],
        y=activities_df['pace_mile'][activities_df['workout_type'] == run_type],
        mode='markers',
        hoverinfo='text',
        name=run_type,
        hovertext = activities_df['text'][activities_df['workout_type'] == run_type],
        marker=dict(
            symbol='circle',
            sizemode='area',
            sizeref=sizeref,
            size=activities_df['size'][activities_df['workout_type'] == run_type],
            line=dict(
                width=2
            ),
        )
    )
    data.append(trace)

In [162]:
layout = go.Layout(
    title='Run Summary',
    hovermode='closest',
    xaxis=dict(
        title='Distance (Miles)',
        gridcolor='rgb(255, 255, 255)',
        range=[0, 20],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Pace (Seconds per Mile)',
        gridcolor='rgb(255, 255, 255)',
        range=[0,600],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

In [163]:
fig = go.Figure(data=data, layout=layout)

In [164]:
py.iplot(fig, filename='bubble_chart_test.fig')

Bits to improve:
- tooltips text -- needs tweaking
- hyperlinks -- doesn't seem like we can do this
- filter -- doesn't seem like we can do this

In [103]:
??go.Scatter()