# Strava-Vis: Beta Release
**Alex Howard & Taylor Pellerin**

In [33]:
import pandas as pd
from datetime import *
import numpy as np
import math
import  plotly.plotly  as py
import plotly.graph_objs as go
import json
from geoplotlib.layers import BaseLayer
from geoplotlib.core import BatchPainter
import geoplotlib
from geoplotlib.colors import colorbrewer
from geoplotlib.utils import epoch_to_str, BoundingBox, read_csv

In [34]:
metres_mile = 1609.34
workout_type_dict = {0:'Run',1:'Race',2:'Long Run',3:'Workout'}

## Data Pre-Processing

In [35]:
data = json.load(open('alex_activities_latlng.json'))

In [36]:
len(data)

200

In [37]:
sample = data

In [38]:
alex_csv = "runner_id,lat,lon,timestamp\n"

for i in range(len(sample)):
    time_stamp = 0
    try:
        lat_lng = sample[i][0]["data"]
    except: continue
    try:
        for lat, lon in lat_lng:
            time_stamp += 120
            alex_csv += ",".join([str(i),
                                  str(lat),
                                  str(lon),
                                  str(time_stamp)
                                 ]) + "\n"
    except: pass

f = open("alex.csv", "w") 
f.write(alex_csv) 
f.close()

In [39]:
activities_df = pd.read_json('alex_all_acts.json')
data = json.load(open('alex_activities_latlng.json'))

In [40]:
activities_df.head()

Unnamed: 0,achievement_count,athlete,athlete_count,average_cadence,average_heartrate,average_speed,average_watts,comment_count,commute,device_watts,...,start_longitude,timezone,total_elevation_gain,total_photo_count,trainer,type,upload_id,utc_offset,weighted_average_watts,workout_type
0,0,"{u'resource_state': 1, u'id': 6831078}",1,91.4,,3.897,,0,False,,...,-122.45,(GMT-08:00) America/Los_Angeles,116.9,0,False,Run,1632966000.0,-25200,,
1,0,"{u'resource_state': 1, u'id': 6831078}",2,91.0,,3.761,,0,False,,...,-122.46,(GMT-08:00) America/Los_Angeles,25.2,0,False,Run,1630031000.0,-25200,,0.0
2,3,"{u'resource_state': 1, u'id': 6831078}",2,91.5,,4.159,,0,False,,...,-122.46,(GMT-08:00) America/Los_Angeles,0.0,0,False,Run,1630035000.0,-25200,,3.0
3,0,"{u'resource_state': 1, u'id': 6831078}",1,90.4,,3.653,,0,False,,...,-122.45,(GMT-08:00) America/Los_Angeles,26.1,0,False,Run,1629909000.0,-25200,,0.0
4,0,"{u'resource_state': 1, u'id': 6831078}",1,91.5,,3.944,,0,False,,...,-122.45,(GMT-08:00) America/Los_Angeles,53.5,0,False,Run,1628983000.0,-25200,,0.0


In [41]:
activities_df = activities_df[['average_speed','distance','moving_time','name','start_date_local','id','workout_type','type']]
activities_df = activities_df[activities_df.type == 'Run']

In [42]:
activities_df['pace_mile'] = metres_mile / activities_df.average_speed
activities_df['pace_km'] = 1000 / activities_df.average_speed

In [43]:
activities_df['date'] = pd.to_datetime(activities_df.start_date_local.apply(lambda x : x.split('T')[0]))
activities_df.drop(['average_speed','start_date_local', 'type', 'id'], axis = 1, inplace = True)

In [44]:
activities_df.workout_type = activities_df.workout_type.fillna(0)
activities_df.workout_type = activities_df.workout_type.apply(lambda x : workout_type_dict[x])

In [45]:
activities_df['miles'] = activities_df.distance / metres_mile
activities_df['Distance (Kilometres)'] = activities_df.distance / 1000

In [46]:
activities_df['size'] = activities_df.moving_time.astype('float').apply(lambda x : math.sqrt(x))
sizeref = 20*max(activities_df['size'])/(100**2)
activities_df['year'] = activities_df.date.apply(lambda x: x.year)

In [47]:
activities_text = []
for i in range(len(activities_df)):
    row = activities_df.iloc[i,]
    activities_text.append('{}<br>{}<br>'.format(row['name'].encode('ascii','ignore'),row['date'])+ '{:.1f} miles<br>{:.2f} seconds/mile'.format(row['miles'], row['pace_mile']))

activities_df['text'] = activities_text

In [48]:
activities_df.head(2)

Unnamed: 0,distance,moving_time,name,workout_type,pace_mile,pace_km,date,miles,Distance (Kilometres),size,year,text
0,10324.0,2649,Morning Run,Run,412.96895,256.607647,2018-04-18,6.415052,10.324,51.468437,2018,Morning Run<br>2018-04-18 00:00:00<br>6.4 mile...
1,4347.8,1156,WD,Run,427.902154,265.886732,2018-04-17,2.701604,4.3478,34.0,2018,WD<br>2018-04-17 00:00:00<br>2.7 miles<br>427....


## 1. Bubble Chart

In [49]:
data = []
for run_type in ['Run','Workout','Long Run','Race']:
    trace = go.Scatter(
        x=activities_df['miles'][activities_df['workout_type'] == run_type],
        y=activities_df['pace_mile'][activities_df['workout_type'] == run_type],
        mode='markers',
        hoverinfo='text',
        opacity = 0.8,
        name=run_type,
        hovertext = activities_df['text'][activities_df['workout_type'] == run_type],
        marker=dict(
            symbol='circle',
            sizemode='area',
            sizeref=sizeref,
            size=activities_df['size'][activities_df['workout_type'] == run_type],
            line=dict(
                width=2
            ),
        )
    )
    data.append(trace)

layout = go.Layout(
    title='Run Summary',
    hovermode='closest',
    xaxis=dict(
        title='Distance (Miles)',
        gridcolor='rgb(255, 255, 255)',
        range=[0, 20],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Pace (Seconds per Mile)',
        gridcolor='rgb(255, 255, 255)',
        range=[0,600],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='bubble_chart_test.fig')

Add button to filter by year (include an all)

## 2. Parallel Coordinates

In [50]:
activities_grouped_df = activities_df.groupby(['date'], as_index = False)['miles'].sum()
activities_grouped_df['dow'] = activities_grouped_df.date.apply(lambda x : x.weekday())
activities_grouped_df['week_start'] = activities_grouped_df.date.apply(lambda x : x - timedelta(days=x.weekday()))

miles_per_week = activities_grouped_df.groupby(['week_start'], as_index = False).miles.sum()
by_week_df = pd.DataFrame(activities_grouped_df.week_start.unique(), columns = ['week_start'])

In [51]:
for i in range(7):
    by_week_df['{}'.format(i)] = i

for i in range(7):
    by_week_df = pd.merge(by_week_df, activities_grouped_df, left_on = ['week_start','{}'.format(i)], right_on = ['week_start','dow'], how='left',suffixes = ('','_{}'.format(i)))

In [52]:
by_week_df = by_week_df[['week_start','miles','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']]
by_week_df.columns = ['week_start','miles_0','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']
by_week_df['year'] = by_week_df['week_start'].apply(lambda x : x.year)
by_week_df.fillna(0, inplace = True)
by_week_df = pd.merge(by_week_df, miles_per_week, how='left', on='week_start')

In [53]:
days_dict = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}

In [54]:
dimensions = list()

for i in range(7):
    dimensions.append(
            dict(range = [0,20],
                constraintrange = [0,20],
                label = '{}'.format(days_dict[i]), values = by_week_df['miles_{}'.format(i)]))

In [55]:
data = [
    go.Parcoords(
        line = dict(color = by_week_df['miles'],
                   colorscale = 'Hot',
                   showscale = True,
                   reversescale=True),
        opacity=0.5,
        dimensions = dimensions,hoverinfo='text')
        
]

layout = go.Layout(
    plot_bgcolor = '#E5E5E5',
    paper_bgcolor = '#E5E5E5',
    title = 'Miles per week broken down by day'
)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'parcoords')

Add label for week date

## 3. Miles Per Week

In [56]:
by_week_df['week_end'] = by_week_df['week_start'].apply(lambda x: (x + timedelta(days=6)).date())

In [57]:
data = []
for i in range(7):
    data.append(go.Bar(
        x=by_week_df['week_start'],
        y=by_week_df['miles_{}'.format(i)],
        name=days_dict[i]))

In [58]:
layout = go.Layout(
    barmode='stack'
)

In [59]:
layout = dict(
    barmode='stack',
    hovermode='closest',
    title='Miles per week',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(count=1,
                    label='YTD',
                    step='year',
                    stepmode='todate'),
                dict(count=1,
                    label='1y',
                    step='year',
                    stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(),
        type='date'
    )
)


In [60]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

## Stacked Bar Chart by Race:

In [61]:
activities_df['week_start'] = activities_df.date.apply(lambda x : x - timedelta(days=x.weekday()))

In [62]:
activities_grouped_df_2 = activities_df.groupby(['workout_type','week_start'], as_index = False)['miles'].sum()

by_week_activity_df = pd.DataFrame(activities_grouped_df_2.week_start.unique(), columns = ['week_start'])

In [63]:
for i in activities_df.workout_type.unique():
    by_week_activity_df['{}'.format(i)] = i

In [64]:
by_week_activity_df.head()

Unnamed: 0,week_start,Run,Workout,Long Run,Race
0,2014-12-15,Run,Workout,Long Run,Race
1,2014-12-22,Run,Workout,Long Run,Race
2,2014-12-29,Run,Workout,Long Run,Race
3,2015-01-05,Run,Workout,Long Run,Race
4,2015-01-12,Run,Workout,Long Run,Race


In [65]:
by_week_activity_df['miles'] = 0

In [66]:
by_week_activity_df = pd.merge(by_week_activity_df, activities_grouped_df_2, left_on = ['week_start','{}'.format(activities_df.workout_type.unique()[0])], right_on = ['week_start','workout_type'], how='left',suffixes = ('','_{}'.format(activities_df.workout_type.unique()[0])))

In [68]:
for i in range(4):
    by_week_activity_df = pd.merge(by_week_activity_df, activities_grouped_df_2, left_on = ['week_start','{}'.format(activities_df.workout_type.unique()[i])], right_on = ['week_start','workout_type'], how='left',suffixes = ('','_{}'.format(activities_df.workout_type.unique()[i])))

In [69]:
by_week_activity_df.columns

Index([u'week_start', u'Run', u'Workout', u'Long Run', u'Race', u'miles',
       u'workout_type', u'miles_Run', u'workout_type_Run', u'miles_Run',
       u'workout_type_Workout', u'miles_Workout', u'workout_type_Long Run',
       u'miles_Long Run', u'workout_type_Race', u'miles_Race'],
      dtype='object')

In [70]:
by_week_activity_df = by_week_activity_df[['week_start','miles','miles_Workout','miles_Long Run','miles_Race']]

In [71]:
by_week_activity_df.columns = ['week_start','miles_Run','miles_Workout','miles_Long Run','miles_Race']
by_week_activity_df.fillna(0,inplace=True)

In [72]:
by_week_activity_df['miles_Run'] = np.array(by_week_activity_df['miles_Run']) + np.array(by_week_activity_df['miles_Long Run'])

In [73]:
data = []
custom_colours = ['blue','orange','red']
j=0
for i in ['Run','Workout','Race']:
    data.append(go.Bar(
        x=by_week_activity_df['week_start'],
        y=by_week_activity_df['miles_{}'.format(i)],
        marker=dict(color=custom_colours[j]),
        name=i))
    j+=1

In [74]:
layout = dict(
    barmode='stack',
    hovermode='closest',
    title='Miles per week',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(count=1,
                    label='YTD',
                    step='year',
                    stepmode='todate'),
                dict(count=1,
                    label='1y',
                    step='year',
                    stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(),
        type='date'
    )
)

In [75]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

In [38]:
races_df = activities_df[activities_df.workout_type == 'Race']

In [39]:
races_df

Unnamed: 0,distance,moving_time,name,workout_type,pace_mile,pace_km,date,miles,Distance (Kilometres),size,year,text,week_start
7,1500.0,233,UCLA Invite 1500,Race,249.975148,155.327742,2018-04-14,0.932059,1.5,15.264338,2018,UCLA Invite 1500<br>2018-04-14 00:00:00<br>0.9...,2018-04-09
36,3000.0,536,Stanford Invite 3k SC - 8:56,Race,287.53618,178.667143,2018-03-31,1.864118,3.0,23.151674,2018,Stanford Invite 3k SC - 8:56<br>2018-03-31 00:...,2018-03-26
60,1500.0,232,Hornet Invite 1500,Race,248.892669,154.655119,2018-03-17,0.932059,1.5,15.231546,2018,Hornet Invite 1500<br>2018-03-17 00:00:00<br>0...,2018-03-12
84,3000.0,510,Quad Meet 3k,Race,273.604216,170.010201,2018-03-04,1.864118,3.0,22.58318,2018,Quad Meet 3k<br>2018-03-04 00:00:00<br>1.9 mil...,2018-02-26
122,4540.8,899,Night Run,Race,318.618095,197.980598,2018-02-10,2.821529,4.5408,29.983329,2018,Night Run<br>2018-02-10 00:00:00<br>2.8 miles<...,2018-02-05
316,8136.1,1565,USF invite - 8th and 4th Don. Not used to raci...,Race,309.54799,192.344682,2017-09-02,5.055551,8.1361,39.560081,2017,USF invite - 8th and 4th Don. Not used to raci...,2017-08-28
446,3000.0,546,Harvard-Yale vs Oxford-Cambridge 3000 SC: 1st ...,Race,292.873521,181.983621,2017-04-08,1.864118,3.0,23.366643,2017,Harvard-Yale vs Oxford-Cambridge 3000 SC: 1st ...,2017-04-03
475,4310.2,793,Southern 12 stage L2: 13:13. Team victory!,Race,296.106716,183.99264,2017-03-25,2.678241,4.3102,28.160256,2017,Southern 12 stage L2: 13:13. Team victory!<br>...,2017-03-20
614,12008.1,2343,Kent XC Champs - 4th SM,Race,314.017561,195.121951,2017-01-07,7.461506,12.0081,48.404545,2017,Kent XC Champs - 4th SM<br>2017-01-07 00:00:00...,2017-01-02
670,11705.0,2340,Varsity - bit of a stinker,Race,321.739304,199.920032,2016-12-03,7.273168,11.705,48.373546,2016,Varsity - bit of a stinker<br>2016-12-03 00:00...,2016-11-28


annotations = []
for i in len(races_df):
    annotations.append(dict(
                                ))

## 4. Geographic Visualisation:

In [8]:
class AllTrailsLayer(BaseLayer):

    def __init__(self):
        self.data = read_csv('alex.csv')
        self.cmap = colorbrewer(self.data['runner_id'], alpha=220)
        self.t = self.data['timestamp'].min()
        self.painter = BatchPainter()


    def draw(self, proj, mouse_x, mouse_y, ui_manager):
        self.painter = BatchPainter()
        df = self.data.where((self.data['timestamp'] > self.t) & (self.data['timestamp'] <= self.t + 15*60))

        for taxi_id in set(df['runner_id']):
            grp = df.where(df['runner_id'] == taxi_id)
            self.painter.set_color(self.cmap[taxi_id])
            x, y = proj.lonlat_to_screen(grp['lon'], grp['lat'])
            self.painter.points(x, y, 10)

        self.t += 60

        if self.t > self.data['timestamp'].max():
            self.t = self.data['timestamp'].min()

        self.painter.batch_draw()
        ui_manager.info(epoch_to_str(self.t))

        
    # this should get modified as well moving forward. Might be too small
    def bbox(self):
        return BoundingBox(north=37.801421, west=-122.517339, south=37.730097, east=-122.424474)

In [None]:
geoplotlib.add_layer(AllTrailsLayer())
x = geoplotlib.show()

In [48]:
class FollowTrailsLayer(BaseLayer):

    def __init__(self):
        self.data = read_csv('alex.csv')
        self.data = self.data.where(self.data['runner_id'] == list(set(self.data['runner_id']))[6])
        self.t = self.data['timestamp'].min()
        self.painter = BatchPainter()


    def draw(self, proj, mouse_x, mouse_y, ui_manager):
        self.painter = BatchPainter()
        self.painter.set_color([0,0,255])
        df = self.data.where((self.data['timestamp'] > self.t) & (self.data['timestamp'] <= self.t + 30*60))
        proj.fit(BoundingBox.from_points(lons=df['lon'], lats=df['lat']), max_zoom=14)
        x, y = proj.lonlat_to_screen(df['lon'], df['lat'])
        self.painter.linestrip(x, y, 10)
        self.t += 250
        if self.t > self.data['timestamp'].max():
            self.t = self.data['timestamp'].min()

        self.painter.batch_draw()
        ui_manager.info(epoch_to_str(self.t))

In [49]:
geoplotlib.add_layer(FollowTrailsLayer())
geoplotlib.show()

## Next Steps:

**Alex:**
- Keep working on smaller charts
- Start to build basic functionality into website
- Investigate using API to scrape other user's data

**Taylor:**
- re-build Python environment to run geoplotlib
- Download full set of Alex activities w/ rate limiting
- Fully integrate into geoplotlib plots

Alex things to consider:
- do miles per week broken down by run type
- histogram of run distances
- bubble chart of "live" activities

In [66]:
np.array([1,2,3])+np.array([1,2,3])

array([2, 4, 6])