# Activities Processing

In [87]:
import pandas as pd
from datetime import *
import numpy as np
import math

In [88]:
import  plotly.plotly  as py
import plotly.graph_objs as go

In [89]:
metres_mile = 1609.34

In [90]:
activities_df = pd.read_json('alex_all_acts.json')

In [91]:
activities_df.columns

Index(['achievement_count', 'athlete', 'athlete_count', 'average_cadence',
       'average_heartrate', 'average_speed', 'average_watts', 'comment_count',
       'commute', 'device_watts', 'distance', 'elapsed_time', 'elev_high',
       'elev_low', 'end_latlng', 'external_id', 'flagged', 'from_accepted_tag',
       'gear_id', 'has_heartrate', 'has_kudoed', 'id', 'kilojoules',
       'kudos_count', 'location_city', 'location_country', 'location_state',
       'manual', 'map', 'max_heartrate', 'max_speed', 'max_watts',
       'moving_time', 'name', 'photo_count', 'pr_count', 'private',
       'resource_state', 'start_date', 'start_date_local', 'start_latitude',
       'start_latlng', 'start_longitude', 'timezone', 'total_elevation_gain',
       'total_photo_count', 'trainer', 'type', 'upload_id', 'utc_offset',
       'weighted_average_watts', 'workout_type'],
      dtype='object')

In [92]:
activities_df = activities_df[['average_speed','distance','moving_time','name','start_date_local','id','workout_type','type']]
activities_df = activities_df[activities_df.type == 'Run']

In [93]:
activities_df['url'] = activities_df['id'].apply(lambda x : 'https://www.strava.com/activities/1513967998' + str(x))

In [94]:
activities_df.drop('id', axis = 1, inplace = True)

In [95]:
activities_df['pace_mile'] = metres_mile / activities_df.average_speed
activities_df['pace_km'] = 1000 / activities_df.average_speed

In [96]:
activities_df['date'] = pd.to_datetime(activities_df.start_date_local.apply(lambda x : x.split('T')[0]))

In [97]:
activities_df.drop(['average_speed','start_date_local', 'type', 'id'], axis = 1, inplace = True)

In [98]:
activities_df.workout_type = activities_df.workout_type.fillna(0)

In [99]:
workout_type_dict = {0:'Run',1:'Race',2:'Long Run',3:'Workout'}

In [100]:
activities_df.workout_type = activities_df.workout_type.apply(lambda x : workout_type_dict[x])

In [101]:
activities_df['miles'] = activities_df.distance / metres_mile
activities_df['Distance (Kilometres)'] = activities_df.distance / 1000

In [102]:
activities_df['size'] = activities_df.moving_time.astype('float').apply(lambda x : math.sqrt(x))

In [103]:
sizeref = 20*max(activities_df['size'])/(100**2)

In [104]:
activities_df['year'] = activities_df.date.apply(lambda x: x.year)

In [105]:
activities_df.head(10)

Unnamed: 0,distance,moving_time,name,workout_type,url,pace_mile,pace_km,date,miles,Distance (Kilometres),size,year
0,10324.0,2649,Morning Run,Run,https://www.strava.com/activities/151396799815...,412.96895,256.607647,2018-04-18,6.415052,10.324,51.468437,2018
1,4347.8,1156,WD,Run,https://www.strava.com/activities/151396799815...,427.902154,265.886732,2018-04-17,2.701604,4.3478,34.0,2018
2,4063.7,977,Tune-up before Stanford v2,Workout,https://www.strava.com/activities/151396799815...,386.953595,240.442414,2018-04-17,2.525072,4.0637,31.256999,2018
3,4142.2,1134,WU,Run,https://www.strava.com/activities/151396799815...,440.55297,273.747605,2018-04-17,2.57385,4.1422,33.674916,2018
4,8285.8,2101,Double,Run,https://www.strava.com/activities/151396799815...,408.047667,253.549696,2018-04-16,5.14857,8.2858,45.836667,2018
5,10754.4,2657,Gonna be huge with all this extra protein past...,Run,https://www.strava.com/activities/151396799815...,397.564229,247.035573,2018-04-16,6.682491,10.7544,51.546096,2018
6,21657.4,5271,Post-race LR,Long Run,https://www.strava.com/activities/151396799815...,391.662205,243.368216,2018-04-15,13.457318,21.6574,72.601653,2018
7,1500.0,233,UCLA Invite 1500,Race,https://www.strava.com/activities/151396799815...,249.975148,155.327742,2018-04-14,0.932059,1.5,15.264338,2018
8,5576.1,1461,WD + “Threshold” + strides,Run,https://www.strava.com/activities/151396799815...,421.624312,261.985853,2018-04-14,3.464837,5.5761,38.22303,2018
9,3914.8,993,WU,Run,https://www.strava.com/activities/151396799815...,408.254693,253.678336,2018-04-14,2.43255,3.9148,31.511903,2018


In [106]:
activities_text = []
for i in range(len(activities_df)):
    row = activities_df.iloc[i,]
    activities_text.append('<a href={}>{}</a><br>{}<br>'.format(row['url'],row['name'],row['date'].date()) + '{:.1f} miles<br>{:.2f} seconds/mile'.format(row['miles'], row['pace_mile']))

activities_df['text'] = activities_text

In [107]:
activities_text[0]

'<a href=https://www.strava.com/activities/15139679981516810737>Morning Run</a><br>2018-04-18<br>6.4 miles<br>412.97 seconds/mile'

## Bubble Charts:

In [108]:
data = []
for run_type in ['Run','Workout','Long Run','Race']:
    trace = go.Scatter(
        x=activities_df['miles'][activities_df['workout_type'] == run_type],
        y=activities_df['pace_mile'][activities_df['workout_type'] == run_type],
        mode='markers',
        hoverinfo='text',
        opacity = 0.8,
        name=run_type,
        hovertext = activities_df['text'][activities_df['workout_type'] == run_type],
        marker=dict(
            symbol='circle',
            sizemode='area',
            sizeref=sizeref,
            size=activities_df['size'][activities_df['workout_type'] == run_type],
            line=dict(
                width=2
            ),
        )
    )
    data.append(trace)

In [109]:
layout = go.Layout(
    title='Run Summary',
    hovermode='closest',
    xaxis=dict(
        title='Distance (Miles)',
        gridcolor='rgb(255, 255, 255)',
        range=[0, 20],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Pace (Seconds per Mile)',
        gridcolor='rgb(255, 255, 255)',
        range=[0,600],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

In [110]:
fig = go.Figure(data=data, layout=layout)

In [111]:
py.iplot(fig, filename='bubble_chart_test.fig')

Bits to improve:
- filter (w/ button)
- hyperlinks
- sort out pace in minutes and seconds?

## Parallel Coordinates:

In [26]:
activities_grouped_df = activities_df.groupby(['date'], as_index = False)['miles'].sum()

In [27]:
activities_grouped_df['dow'] = activities_grouped_df.date.apply(lambda x : x.weekday())
activities_grouped_df['week_start'] = activities_grouped_df.date.apply(lambda x : x - timedelta(days=x.weekday()))

In [28]:
miles_per_week = activities_grouped_df.groupby(['week_start'], as_index = False).miles.sum()

In [29]:
by_week_df = pd.DataFrame(activities_grouped_df.week_start.unique(), columns = ['week_start'])

In [30]:
for i in range(7):
    by_week_df['{}'.format(i)] = i

In [31]:
for i in range(7):
    by_week_df = pd.merge(by_week_df, activities_grouped_df, left_on = ['week_start','{}'.format(i)], right_on = ['week_start','dow'], how='left',suffixes = ('','_{}'.format(i)))

In [32]:
by_week_df = by_week_df[['week_start','miles','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']]
by_week_df.columns = ['week_start','miles_0','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']

In [33]:
by_week_df['year'] = by_week_df['week_start'].apply(lambda x : x.year)

In [34]:
by_week_df.fillna(0, inplace = True)

In [35]:
by_week_df = pd.merge(by_week_df, miles_per_week, how='left', on='week_start')

In [36]:
days_dict = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}

In [37]:
dimensions = list()

for i in range(7):
    dimensions.append(
            dict(range = [0,20],
                constraintrange = [0,20],
                label = '{}'.format(days_dict[i]), values = by_week_df['miles_{}'.format(i)]))

In [38]:
data = [
    go.Parcoords(
        line = dict(color = by_week_df['miles'],
                   colorscale = 'Hot',
                   showscale = True,
                   reversescale=True),
        opacity=0.5,
        dimensions = dimensions,hoverinfo='text')
        
]

In [39]:
layout = go.Layout(
    plot_bgcolor = '#E5E5E5',
    paper_bgcolor = '#E5E5E5',
    title = 'Miles per week broken down by day'
)

In [40]:
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'parcoords')

# Stacked Bar Charts

In [41]:
by_week_df['week_end'] = by_week_df['week_start'].apply(lambda x: (x + timedelta(days=6)).date())

In [42]:
data = []
for i in range(7):
    data.append(go.Bar(
        x=by_week_df['week_start'],
        y=by_week_df['miles_{}'.format(i)],
        name=days_dict[i]))

In [43]:
layout = go.Layout(
    barmode='stack'
)

In [44]:
layout = dict(
    barmode='stack',
    hovermode='closest',
    title='Miles per week',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(count=1,
                    label='YTD',
                    step='year',
                    stepmode='todate'),
                dict(count=1,
                    label='1y',
                    step='year',
                    stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(),
        type='date'
    )
)


In [45]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

# Seems to work better with smaller number of weeks - information sort of gets lost otherwise

Maybe split this one up now by workout/long run/race

## Animation Bubble Chart:

In [183]:
years = by_week_df.year.unique()

In [184]:
years

array([2009, 2014, 2015, 2016, 2017, 2018])

In [213]:
figure = {
    'data': [],
    'layout': {},
    'frames': []
}


In [214]:
figure['layout']['xaxis'] = {'range': [0, 20], 'title': 'Distance (Miles)'}
figure['layout']['yaxis'] = {'title': 'Pace (seconds per mile)', 'range':[0,600]}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': str(years[1]),
    'plotlycommand': 'animate',
    'values': years[1:],
    'visible': True
}

In [215]:
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': True},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

In [216]:
sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

In [217]:
year = years[1]

In [218]:
year

2014

In [219]:
for run_type in ['Run']:
    dataset_by_year_and_type = activities_df[activities_df.year==year][:20]

    data_dict = {
        'x': list(dataset_by_year_and_type['miles']),
        'y': list(dataset_by_year_and_type['pace_mile']),
        'mode': 'markers',
         'hovertext':dataset_by_year_and_type['text'],
        'marker': {
            'sizemode': 'area',
            'sizeref': sizeref,
            'size': dataset_by_year_and_type['size']
        },
        'name': run_type
    }
    figure['data'].append(data_dict)

In [220]:
for year in years[1:]:
    frame = {'data': [], 'name': str(year)}
    for run_type in ['Run']:
        dataset_by_year_and_type = activities_df[activities_df.year==int(year)][:20]

        data_dict = {
            'x': list(dataset_by_year_and_type['miles']),
            'y': list(dataset_by_year_and_type['pace_mile']),
            'mode': 'markers',
             'hovertext':dataset_by_year_and_type['text'],
            'marker': {
                'sizemode': 'area',
                'sizeref': sizeref,
                'size': dataset_by_year_and_type['size']
            },
            'name': 'hello'
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': True},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

In [221]:
figure['layout']['sliders'] = [sliders_dict]

In [222]:
iplot(figure)