# Strava-Vis: Beta Release
**Alex Howard & Taylor Pellerin**

In [None]:
import pandas as pd
from datetime import *
import numpy as np
import math
import  plotly.plotly  as py
import plotly.graph_objs as go
import json
from geoplotlib.layers import BaseLayer
from geoplotlib.core import BatchPainter
import geoplotlib
from geoplotlib.colors import colorbrewer
from geoplotlib.utils import epoch_to_str, BoundingBox, read_csv

In [None]:
metres_mile = 1609.34
workout_type_dict = {0:'Run',1:'Race',2:'Long Run',3:'Workout'}

## Data Pre-Processing

In [None]:
activities_df = pd.read_json('alex_all_acts.json')
data = json.load(open('alex_activities_latlng.json'))

In [None]:
activities_df = activities_df[['average_speed','distance','moving_time','name','start_date_local','id','workout_type','type']]
activities_df = activities_df[activities_df.type == 'Run']

In [None]:
activities_df['pace_mile'] = metres_mile / activities_df.average_speed
activities_df['pace_km'] = 1000 / activities_df.average_speed

In [None]:
activities_df['date'] = pd.to_datetime(activities_df.start_date_local.apply(lambda x : x.split('T')[0]))
activities_df.drop(['average_speed','start_date_local', 'type', 'id'], axis = 1, inplace = True)

In [None]:
activities_df.workout_type = activities_df.workout_type.fillna(0)
activities_df.workout_type = activities_df.workout_type.apply(lambda x : workout_type_dict[x])

In [None]:
activities_df['miles'] = activities_df.distance / metres_mile
activities_df['Distance (Kilometres)'] = activities_df.distance / 1000

In [None]:
activities_df['size'] = activities_df.moving_time.astype('float').apply(lambda x : math.sqrt(x))
sizeref = 20*max(activities_df['size'])/(100**2)
activities_df['year'] = activities_df.date.apply(lambda x: x.year)

In [None]:
activities_text = []
for i in range(len(activities_df)):
    row = activities_df.iloc[i,]
    activities_text.append('{}<br>{}<br>'.format(row['name'].encode('ascii','ignore'),row['date'])+ '{:.1f} miles<br>{:.2f} seconds/mile'.format(row['miles'], row['pace_mile']))

activities_df['text'] = activities_text

In [None]:
activities_df.head(2)

## 1. Bubble Chart

In [None]:
data = []
for run_type in ['Run','Workout','Long Run','Race']:
    trace = go.Scatter(
        x=activities_df['miles'][activities_df['workout_type'] == run_type],
        y=activities_df['pace_mile'][activities_df['workout_type'] == run_type],
        mode='markers',
        hoverinfo='text',
        opacity = 0.8,
        name=run_type,
        hovertext = activities_df['text'][activities_df['workout_type'] == run_type],
        marker=dict(
            symbol='circle',
            sizemode='area',
            sizeref=sizeref,
            size=activities_df['size'][activities_df['workout_type'] == run_type],
            line=dict(
                width=2
            ),
        )
    )
    data.append(trace)

layout = go.Layout(
    title='Run Summary',
    hovermode='closest',
    xaxis=dict(
        title='Distance (Miles)',
        gridcolor='rgb(255, 255, 255)',
        range=[0, 20],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Pace (Seconds per Mile)',
        gridcolor='rgb(255, 255, 255)',
        range=[0,600],
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='bubble_chart_test.fig')

## 2. Parallel Coordinates

In [None]:
activities_grouped_df = activities_df.groupby(['date'], as_index = False)['miles'].sum()
activities_grouped_df['dow'] = activities_grouped_df.date.apply(lambda x : x.weekday())
activities_grouped_df['week_start'] = activities_grouped_df.date.apply(lambda x : x - timedelta(days=x.weekday()))

miles_per_week = activities_grouped_df.groupby(['week_start'], as_index = False).miles.sum()
by_week_df = pd.DataFrame(activities_grouped_df.week_start.unique(), columns = ['week_start'])

In [None]:
for i in range(7):
    by_week_df['{}'.format(i)] = i

for i in range(7):
    by_week_df = pd.merge(by_week_df, activities_grouped_df, left_on = ['week_start','{}'.format(i)], right_on = ['week_start','dow'], how='left',suffixes = ('','_{}'.format(i)))

In [None]:
by_week_df = by_week_df[['week_start','miles','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']]
by_week_df.columns = ['week_start','miles_0','miles_1','miles_2','miles_3','miles_4','miles_5','miles_6']
by_week_df['year'] = by_week_df['week_start'].apply(lambda x : x.year)
by_week_df.fillna(0, inplace = True)
by_week_df = pd.merge(by_week_df, miles_per_week, how='left', on='week_start')

In [None]:
days_dict = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}

In [None]:
dimensions = list()

for i in range(7):
    dimensions.append(
            dict(range = [0,20],
                constraintrange = [0,20],
                label = '{}'.format(days_dict[i]), values = by_week_df['miles_{}'.format(i)]))

In [None]:
data = [
    go.Parcoords(
        line = dict(color = by_week_df['miles'],
                   colorscale = 'Hot',
                   showscale = True,
                   reversescale=True),
        opacity=0.5,
        dimensions = dimensions,hoverinfo='text')
        
]

layout = go.Layout(
    plot_bgcolor = '#E5E5E5',
    paper_bgcolor = '#E5E5E5',
    title = 'Miles per week broken down by day'
)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'parcoords')

## 3. Miles Per Week

In [None]:
by_week_df['week_end'] = by_week_df['week_start'].apply(lambda x: (x + timedelta(days=6)).date())

In [None]:
data = []
for i in range(7):
    data.append(go.Bar(
        x=by_week_df['week_start'],
        y=by_week_df['miles_{}'.format(i)],
        name=days_dict[i]))

In [None]:
layout = go.Layout(
    barmode='stack'
)

In [None]:
layout = dict(
    barmode='stack',
    hovermode='closest',
    title='Miles per week',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(count=1,
                    label='YTD',
                    step='year',
                    stepmode='todate'),
                dict(count=1,
                    label='1y',
                    step='year',
                    stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(),
        type='date'
    )
)


In [None]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

## 4. Geographic Visualisation:

In [None]:
class AllTrailsLayer(BaseLayer):

    def __init__(self):
        self.data = read_csv('alex.csv')
        self.cmap = colorbrewer(self.data['runner_id'], alpha=220)
        self.t = self.data['timestamp'].min()
        self.painter = BatchPainter()


    def draw(self, proj, mouse_x, mouse_y, ui_manager):
        self.painter = BatchPainter()
        df = self.data.where((self.data['timestamp'] > self.t) & (self.data['timestamp'] <= self.t + 15*60))

        for taxi_id in set(df['runner_id']):
            grp = df.where(df['runner_id'] == taxi_id)
            self.painter.set_color(self.cmap[taxi_id])
            x, y = proj.lonlat_to_screen(grp['lon'], grp['lat'])
            self.painter.points(x, y, 10)

        self.t += 2*60

        if self.t > self.data['timestamp'].max():
            self.t = self.data['timestamp'].min()

        self.painter.batch_draw()
        ui_manager.info(epoch_to_str(self.t))

        
    # this should get modified as well moving forward. Might be too small
    def bbox(self):
        return BoundingBox(north=37.801421, west=-122.517339, south=37.730097, east=-122.424474)

In [None]:
geoplotlib.add_layer(AllTrailsLayer())
geoplotlib.show()

In [None]:
class FollowTrailsLayer(BaseLayer):

    def __init__(self):
        self.data = read_csv('alex.csv')
        self.data = self.data.where(self.data['runner_id'] == list(set(self.data['runner_id']))[2])
        self.t = self.data['timestamp'].min()
        self.painter = BatchPainter()


    def draw(self, proj, mouse_x, mouse_y, ui_manager):
        self.painter = BatchPainter()
        self.painter.set_color([0,0,255])
        df = self.data.where((self.data['timestamp'] > self.t) & (self.data['timestamp'] <= self.t + 30*60))
        proj.fit(BoundingBox.from_points(lons=df['lon'], lats=df['lat']), max_zoom=14)
        x, y = proj.lonlat_to_screen(df['lon'], df['lat'])
        self.painter.linestrip(x, y, 10)
        self.t += 30
        if self.t > self.data['timestamp'].max():
            self.t = self.data['timestamp'].min()

        self.painter.batch_draw()
        ui_manager.info(epoch_to_str(self.t))

In [None]:
geoplotlib.add_layer(FollowTrailsLayer())
geoplotlib.show()