In [None]:
import pandas as pd
import os
import json
import colorcet as cc
colour_palette = cc.CET_R3

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook, reset_output

output_notebook()

### making data a bit easier to see
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

### should be outside the repo'
data_location = r'/Users/jj/code/strava_data/'

In [None]:
raw = pd.read_csv(data_location+'all_activities_cleaned.csv', index_col=0, parse_dates=True)
raw = raw.apply(lambda x: pd.to_datetime(x) if 'date' in x.name else x)
raw

In [None]:
### will return a list of colours of n length from the colour_palette
def get_colours(n):
    distance = int(len(colour_palette) / n)
    return [x for i, x in enumerate(colour_palette) if i % distance == 0]

### will return a df whith cats and aggregated metrics
def aggregate(df, cat_columns, met_column=None, aggregation=None, order_by=None):

    raw_data = df[cat_columns]
    if type(cat_columns) == str:
        if not met_column and not aggregation:
            grouped = raw_data.value_counts().reset_index()
            grouped = grouped.rename(columns={'index':cat_columns, cat_columns:'count'})

        elif met_column and aggregation:
            raw_data = pd.concat([raw_data, df[met_column]], axis=1)
            grouped = raw_data.groupby(cat_columns).agg(aggregation).reset_index()
        
        if order_by: grouped = grouped.sort_values(order_by, ascending=False)
        grouped = grouped.to_dict('list')

    elif type(cat_columns) == list:
        if not met_column and not aggregation:
            grouped = raw_data.value_counts().unstack(fill_value=0)

        elif met_column and aggregation:
            raw_data = pd.concat([raw_data, df[met_column]], axis=1)
            grouped = raw_data.groupby(cat_columns).agg(aggregation).unstack(fill_value=0)
            grouped.columns = grouped.columns.get_level_values(1)
        if order_by: grouped = grouped.sort_values(order_by, ascending=False)
        grouped = {
            **{grouped.index.name:grouped.index.to_list()}, 
            **{cat_columns[1]:grouped.to_dict('list')}
        }

    else:
        return ('invalid_input')

    grouped['metadata'] = {'cat_columns':cat_columns, 'met_columns':met_column, 'aggregation':aggregation, 'order_by':order_by}

    return grouped

def custom_plot(x=None, title=None):
    p = figure(x_range=x, background_fill_color='whitesmoke', height=300, title=title)
    p.y_range.start = 0
    p.xgrid.grid_line_color=None
    p.xaxis.major_label_orientation = .8
    p.yaxis.minor_tick_line_color = None
    return p

###  case 1
# {x:[a, b, c], y:[count_a, count_b, count_c]}
# input = [a, a, b, c]
print('case1')
display(aggregate(raw, 'bike_name'))

### case 2
# {x:[a, b, c], y:[agg_a, agg_b, agg_c]}
# inputs = [a, a, b, c], metric, aggregation
print('case2')
display(aggregate(raw, 'bike_name', 'kudos_count', 'sum'))

### case 3
# {x:[a, b, c], z1: [count_a_z1, count_b_z1, count_c_z1], z2:[count_a_z2, count_b_z2, count_c_z2]}
# inputs = [[a, a, b, c], [z1, z2, z2, z3]]
print('case3')
display(aggregate(raw, ['bike_name', 'year']))

### case 4
# {x:[a, b, c], z1: [agg_a_z1, agg_b_z1, agg_c_z1], z2:[agg_a_z2, agg_b_z2, agg_c_z2]}
# inputs = [[a, a, b, c], [z1, z2, z2, z3]], metric_aggregation
print('case4')
display(aggregate(raw, ['bike_name', 'year'], 'kudos_count', 'sum'))



In [None]:
def bar_chart(x, y, t=None):
    c = get_colours(len(x)-1)
    p = custom_plot(x=x, title=t)
    p.vbar(x=x, top=y, width=.9, fill_color=c, line_color='white', line_width=3, line_join='round')
    show(p)

data = aggregate(raw, cat_columns='type')
title = 'Count of Activities by Type'
bar_chart(data['type'], data['count'], title)


In [None]:
data = aggregate(raw, cat_columns='bike_name', met_column='distance', aggregation='sum', order_by='distance')
title = 'Distance by Bike'
bar_chart(data['bike_name'], data['distance'], title)

In [None]:
def stacked_bar_chart(x, ys, t):

    c = get_colours(len(ys.keys()))
    p = custom_plot(x=x, title=t)

    ### iterate through second cat column
    bottom = [0 for z in x]
    for i, (k, v) in enumerate(ys.items()):
        top = [x+y for x, y in zip(bottom, v)]
        p.vbar(x=x, bottom=bottom, top=top, width=.9, color=c[i], line_color='white', line_width=1, line_join='round', legend_label=str(k))
        bottom = top

    p.legend.location = 'top_left'

    show(p)

data = aggregate(raw, cat_columns=['year', 'bike_name'])
title = 'Activities by Bike and Year'
stacked_bar_chart([str(z) for z in data['year']], data['bike_name'], t=title)

In [None]:
data = aggregate(raw, ['month', 'bike_name'], 'distance', 'sum')
title = 'Distance by Bike and Month'
stacked_bar_chart(data['month'], data['bike_name'], title)