In [1]:
import pandas as pd
from datetime import *
import numpy as np
import math

import chart_1_scatter
import chart_2_parallel
import chart_3_bar

In [2]:
metres_mile = 1609.34
workout_type_dict = {0:'Run',1:'Race',2:'Long Run',3:'Workout'}
days_dict = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}

In [3]:
def preprocess_activities(username):

    activities_df = pd.read_json('{}/activities.json'.format(username))

    activities_df = activities_df[
        ['average_speed', 'distance', 'moving_time', 'name', 'start_date_local', 'id', 'workout_type', 'type']]
    activities_df = activities_df[activities_df.type == 'Run']

    activities_df['pace_mile'] = metres_mile / activities_df.average_speed
    activities_df['pace_km'] = 1000 / activities_df.average_speed

    activities_df['date'] = pd.to_datetime(activities_df.start_date_local.apply(lambda x: x.split('T')[0]))
    activities_df.drop(['average_speed', 'start_date_local', 'type', 'id'], axis=1, inplace=True)

    activities_df.workout_type = activities_df.workout_type.fillna(0)
    activities_df.workout_type = activities_df.workout_type.apply(lambda x: workout_type_dict[x])

    activities_df['miles'] = activities_df.distance / metres_mile
    activities_df['Distance (Kilometres)'] = activities_df.distance / 1000

    activities_df['size'] = activities_df.moving_time.astype('float').apply(lambda x: math.sqrt(x))
    activities_df['year'] = activities_df.date.apply(lambda x: x.year)

    activities_text = []
    for i in range(len(activities_df)):
        row = activities_df.iloc[i,]
        activities_text.append('{}<br>{}<br>'.format(row['name'].encode('ascii', 'ignore'),
                                                     row['date'].date()) + '{:.1f} miles<br>{:.2f} seconds/mile'.format(
            row['miles'], row['pace_mile']))

    activities_df['text'] = activities_text
    return activities_df

In [4]:
def group_df(activities_df):

    activities_grouped_df = activities_df.groupby(['date'], as_index=False)['miles'].sum()
    activities_grouped_df['dow'] = activities_grouped_df.date.apply(lambda x: x.weekday())
    activities_grouped_df['week_start'] = activities_grouped_df.date.apply(lambda x: x - timedelta(days=x.weekday()))

    miles_per_week = activities_grouped_df.groupby(['week_start'], as_index=False).miles.sum()
    by_week_df = pd.DataFrame(activities_grouped_df.week_start.unique(), columns=['week_start'])
    by_week_df['miles'] = 0

    for i in range(7):
        by_week_df['{}'.format(i)] = i

    for i in range(7):
        by_week_df = pd.merge(by_week_df, activities_grouped_df, left_on=['week_start', '{}'.format(i)],
                              right_on=['week_start', 'dow'], how='left', suffixes=('', '_{}'.format(i)))

    by_week_df = by_week_df[['week_start', 'miles_0', 'miles_1', 'miles_2', 'miles_3', 'miles_4', 'miles_5', 'miles_6']]
    by_week_df['year'] = by_week_df['week_start'].apply(lambda x: x.year)
    by_week_df.fillna(0, inplace=True)
    by_week_df = pd.merge(by_week_df, miles_per_week, how='left', on='week_start')

    return activities_grouped_df, by_week_df

In [5]:
def group_df_2(activities_df):

    activities_grouped_df_2 = activities_df.groupby(['workout_type', 'week_start'], as_index=False)['miles'].sum()

    by_week_activity_df = pd.DataFrame(activities_grouped_df_2.week_start.unique(), columns=['week_start'])

    for i in activities_df.workout_type.unique():
        by_week_activity_df['{}'.format(i)] = i

    by_week_activity_df['miles'] = 0

    for i in range(activities_df.workout_type):
        by_week_activity_df = pd.merge(by_week_activity_df, activities_grouped_df_2,
                                       left_on=['week_start', '{}'.format(activities_df.workout_type.unique()[i])],
                                       right_on=['week_start', 'workout_type'], how='left',
                                       suffixes=('', '_{}'.format(activities_df.workout_type.unique()[i])))

    if 'miles_Long Run' not in by_week_activity_df.columns:
        by_week_activity_df['miles_Long Run'] = 0

    by_week_activity_df = by_week_activity_df[['week_start', 'miles_Run', 'miles_Workout', 'miles_Long Run', 'miles_Race']]
    by_week_activity_df.fillna(0, inplace=True)

    by_week_activity_df['miles_Run'] = np.array(by_week_activity_df['miles_Run']) + np.array(
        by_week_activity_df['miles_Long Run'])

    by_week_activity_df['miles_Run'] = np.array(by_week_activity_df['miles_Run']) + np.array(
        by_week_activity_df['miles_Long Run'])

    return activities_grouped_df_2, by_week_activity_df

In [6]:
def plot_charts(username):

    activities_df = preprocess_activities(username)

    chart_1 = chart_1_scatter.chart_plot(activities_df)

    activities_grouped_df, by_week_df = group_df(activities_df)

    chart_2 = chart_2_parallel.chart_plot(by_week_df)

    activities_df['week_start'] = activities_df.date.apply(lambda x: x - timedelta(days=x.weekday()))

    activities_grouped_df_2, by_week_activity_df = group_df_2(activities_df)

    chart_3 = chart_3_bar.chart_plot(by_week_activity_df)

    return(chart_1, chart_2, chart_3)


In [16]:
activities_df = preprocess_activities('luke_cotter')

In [28]:
activities_df

Unnamed: 0,distance,moving_time,name,workout_type,pace_mile,pace_km,date,miles,Distance (Kilometres),size,year,text,week_start
0,4884.0,1467,CD,Run,483.430460,300.390508,2018-05-01,3.034784,4.8840,38.301436,2018,CD<br>2018-05-01<br>3.0 miles<br>483.43 second...,2018-04-30
1,5079.3,1033,"2x3,2,1’ (1’)",Run,327.301200,203.376042,2018-05-01,3.156139,5.0793,32.140317,2018,"2x3,2,1 (1)<br>2018-05-01<br>3.2 miles<br>327....",2018-04-30
2,4795.2,1365,WU,Run,458.109878,284.656988,2018-05-01,2.979607,4.7952,36.945906,2018,WU<br>2018-05-01<br>3.0 miles<br>458.11 second...,2018-04-30
3,18227.6,4977,Morning Run,Run,439.470235,273.074823,2018-04-29,11.326134,18.2276,70.547856,2018,Morning Run<br>2018-04-29<br>11.3 miles<br>439...,2018-04-23
4,6072.4,1639,Evening Run,Run,434.369771,269.905533,2018-04-28,3.773224,6.0724,40.484565,2018,Evening Run<br>2018-04-28<br>3.8 miles<br>434....,2018-04-23
5,3562.1,1153,CD,Run,520.990612,323.729362,2018-04-28,2.213392,3.5621,33.955854,2018,CD<br>2018-04-28<br>2.2 miles<br>520.99 second...,2018-04-23
6,800.0,117,Cuppers 800 - 2nd in 1.57.8,Run,235.352442,146.241591,2018-04-28,0.497098,0.8000,10.816654,2018,Cuppers 800 - 2nd in 1.57.8<br>2018-04-28<br>0...,2018-04-23
7,3540.3,1052,WU,Run,478.258544,297.176820,2018-04-28,2.199846,3.5403,32.434549,2018,WU<br>2018-04-28<br>2.2 miles<br>478.26 second...,2018-04-23
8,3983.6,1186,Garlic bread run + drills and strides,Run,479.112831,297.707651,2018-04-27,2.475300,3.9836,34.438351,2018,Garlic bread run + drills and strides<br>2018-...,2018-04-23
9,5428.7,1556,Dropping Christofi with Lefroy,Run,461.261106,286.615076,2018-04-27,3.373246,5.4287,39.446166,2018,Dropping Christofi with Lefroy<br>2018-04-27<b...,2018-04-23


In [17]:
chart_1 = chart_1_scatter.chart_plot(activities_df)

In [18]:
activities_grouped_df, by_week_df = group_df(activities_df)

In [19]:
chart_2 = chart_2_parallel.chart_plot(by_week_df)

In [20]:
activities_df['week_start'] = activities_df.date.apply(lambda x: x - timedelta(days=x.weekday()))

In [21]:
activities_grouped_df_2, by_week_activity_df = group_df_2(activities_df)

IndexError: index 2 is out of bounds for axis 0 with size 2

In [13]:
chart_3 = chart_3_bar.chart_plot(by_week_activity_df)

In [23]:
activities_grouped_df_2 = activities_df.groupby(['workout_type', 'week_start'], as_index=False)['miles'].sum()

In [24]:
by_week_activity_df = pd.DataFrame(activities_grouped_df_2.week_start.unique(), columns=['week_start'])

In [25]:
for i in activities_df.workout_type.unique():
        by_week_activity_df['{}'.format(i)] = i

In [26]:
by_week_activity_df['miles'] = 0


In [27]:
by_week_activity_df

Unnamed: 0,week_start,Run,Race,miles
0,2017-04-03,Run,Race,0
1,2017-05-22,Run,Race,0
2,2015-12-07,Run,Race,0
3,2015-12-14,Run,Race,0
4,2015-12-21,Run,Race,0
5,2015-12-28,Run,Race,0
6,2016-01-04,Run,Race,0
7,2016-01-11,Run,Race,0
8,2016-01-18,Run,Race,0
9,2016-01-25,Run,Race,0
