In [None]:
import pandas as pd
import geopandas as gpd
import os
import json
import colorcet as cc
colour_palette = cc.CET_R3

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook, reset_output

output_notebook()

### making data a bit easier to see
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

### should be outside the repo'
data_location = r'/Users/jj/code/strava_data/'

In [None]:
### will return a list of colours of n length from the colour_palette
def get_colours(n):
    distance = int(len(colour_palette) / n)
    return [x for i, x in enumerate(colour_palette) if i % distance == 0]

def aggregate(df, cat_columns=[], met_columns=[], aggregations=[]):
    raw_data = df[cat_columns+met_columns]
    grouped = raw_data.groupby(cat_columns).agg({m:aggregations for m in met_columns}).reset_index()
    grouped.columns = grouped.columns.map(lambda x: '_'.join(a for a in x if len(a)>0))
    print(grouped)

aggregate(raw, cat_columns=['type'], met_columns=['moving_time (seconds)', 'kudos_count'], aggregations=['max', 'sum'])

In [None]:
raw = pd.read_csv(data_location+'all_activities_cleaned.csv', index_col=0)
raw

In [None]:
if os.path.isfile(data_location+'athlete_info_jj.json'):
    athlete_info = json.load(open(data_location+'athlete_info_jj.json'))
    bikes = {x['id']: x['name'] for x in athlete_info['bikes']}
    raw['gear_id'] = raw['gear_id'].map(bikes)
    

In [None]:
def bar_chart_count(s):
    agged = s.value_counts()
    x = agged.index.to_list()
    y = agged.to_list()
    c = get_colours(len(x)-1)
    title = s.name[0].upper()+s.name[1:]+'s of Activities'

    p = figure(x_range = x, title=title, background_fill_color='whitesmoke', height=300)
    p.vbar(x=x, top=y, width=.9, fill_color=c, line_color='white', line_width=3, line_join='round')

    p.y_range.start = 0
    p.y_range.end = max(y) * 1.05
    p.xgrid.grid_line_color=None
    p.xaxis.major_label_orientation = .8
    p.yaxis.minor_tick_line_color = None

    show(p)

bar_chart_count(raw['type'])


In [None]:
bar_chart_count(raw['gear_id'])

In [None]:
def bar_chart_sum(cat_col, num_col, sort_by_values=True):

    agged = num_col.groupby(cat_col).sum().sort_values(ascending=False)
    if sort_by_values == False:
        agged = agged.sort_index()
    x = agged.index.astype('string').to_list()
    y = agged.to_list()
    c = get_colours(len(x)) [:len(x)]
    title = num_col.name[0].upper()+num_col.name[1:]+ ' by ' + cat_col.name[0].upper()+cat_col.name[1:]

    p = figure(x_range = x, title=title, background_fill_color='whitesmoke', height=300)
    p.vbar(x=x, top=y, width=.9, fill_color=c, line_color='white', line_width=3, line_join='round')

    p.y_range.start = 0
    p.y_range.end = max(y) * 1.05
    p.xgrid.grid_line_color=None
    p.xaxis.major_label_orientation = .8
    p.yaxis.minor_tick_line_color = None

    show(p)

bar_chart_sum(raw['gear_id'], raw['distance'] / 1000)

In [None]:
bar_chart_sum(raw['start_date_local'].astype('datetime64[ns]').dt.year, raw['distance'] / 1000, sort_by_values=False)

In [None]:
def stacked_bar_chart(bar_col, legend_col):

    agged = bar_col.groupby([legend_col, bar_col]).count().unstack().fillna(0)
    bars = agged.index.to_list()
    legends = agged.columns.to_list()
    c = get_colours(len(legends))


    data = {l:agged[l].to_list() for l in legends}

    c = get_colours(len(agged.columns))
    title = 'Activities by '+bar_col.name[0].upper() + bar_col.name[1:] + ' and ' + legend_col.name[0].upper() + legend_col.name[1:]

    p = figure(x_range=bars, title=title, background_fill_color='whitesmoke', height=300)
    bottom = [0 for x in bars]
    for i, l in enumerate(legends):
        top = [x+y for x, y in zip(bottom, data[l])]
        p.vbar(x=bars, bottom=bottom, top=top, width=.9, color=c[i], line_color='white', line_width=1, line_join='round', legend_label=l)
        bottom = top

    p.y_range.start = 0
    p.y_range.end = max(agged.sum(axis=1)) * 1.05
    p.xgrid.grid_line_color=None
    p.xaxis.major_label_orientation = .8
    p.yaxis.minor_tick_line_color = None
    p.legend.location = 'top_left'

    show(p)

stacked_bar_chart(raw['gear_id'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'))

In [None]:
def stacked_bar_chart_sum(bar_col, legend_col, num_col):

    agged = num_col.groupby([legend_col, bar_col]).sum().unstack().fillna(0)
    bars = agged.index.to_list()
    legends = agged.columns.to_list()
    c = get_colours(len(legends))


    data = {l:agged[l].to_list() for l in legends}

    c = get_colours(len(agged.columns))
    title = num_col.name[0].upper() + num_col.name[1:] + ' by '+bar_col.name[0].upper() + bar_col.name[1:] + ' and ' + legend_col.name[0].upper() + legend_col.name[1:]

    p = figure(x_range=bars, title=title, background_fill_color='whitesmoke', height=400)
    bottom = [0 for x in bars]
    for i, l in enumerate(legends):
        top = [x+y for x, y in zip(bottom, data[l])]
        p.vbar(x=bars, bottom=bottom, top=top, width=.9, color=c[i], line_color='white', line_width=1, line_join='round', legend_label=l)
        bottom = top

    p.y_range.start = 0
    p.y_range.end = max(agged.sum(axis=1)) * 1.2
    p.xgrid.grid_line_color=None
    p.xaxis.major_label_orientation = .8
    p.yaxis.minor_tick_line_color = None
    p.legend.location = 'top_left'
    p.legend.label_text_font_size = '8pt'

    show(p)

stacked_bar_chart_sum(raw['gear_id'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'), raw['distance'] / 1000)

In [None]:
stacked_bar_chart_sum(raw['gear_id'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'), raw['moving_time'] / 3600)

In [None]:
stacked_bar_chart_sum(raw['gear_id'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'), raw['total_elevation_gain'])

In [None]:
stacked_bar_chart_sum(raw['gear_id'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'), raw['kudos_count'])

In [None]:
stacked_bar_chart_sum(raw['type'], raw['start_date_local'].astype('datetime64[ns]').dt.year.astype('string'), raw['moving_time'] / 60 / 60)