In [5]:
import sys
sys.path.append("../_build/")

import constants
import pandas as pd
import json

In [6]:
def miles_hiked_per_day(df):
    completed = constants.get_completed(df)[[constants.DATE_COL, constants.TO_SPRINGER_COL]]
    completed[constants.DATE_COL+"_dt"] = completed[constants.DATE_COL].dt.strftime('%Y-%m-%d')

    # Need to shift the values, since the last checkpoint of each day should be the first checkpoint of the next day
    completed[constants.TO_SPRINGER_COL + '_shifted'] = completed[constants.TO_SPRINGER_COL].shift(1)
    completed[constants.DATE_COL + '_shifted'] = completed[constants.DATE_COL].shift(1)
    completed[constants.DATE_COL + '_dt_shifted'] = completed[constants.DATE_COL + '_dt'].shift(1)

    # Exclude calculating the overnight duration by filtering where the dates are different
    completed = completed[completed[constants.DATE_COL + '_dt'] == completed[constants.DATE_COL + '_dt_shifted']]

    # Group by day
    f = {constants.TO_SPRINGER_COL: 'last', 'to_spgr_shifted': 'first', constants.DATE_COL: 'last', constants.DATE_COL + '_shifted': 'first'}
    miles_per_day = completed.groupby(constants.DATE_COL+'_dt').agg(f)
    miles_per_day['miles'] = miles_per_day[constants.TO_SPRINGER_COL] - miles_per_day[constants.TO_SPRINGER_COL + '_shifted']
    miles_per_day['duration'] = miles_per_day[constants.DATE_COL] - miles_per_day[constants.DATE_COL + '_shifted']

    # Calculate Average Miles per Day (does not include zero days)
    avg_mileage = "0.0"
    if len(completed) > 0:
        avg_mileage = "{:.1f}".format(miles_per_day['miles'].mean())

    # Utility to format hiking duration
    def format_duration(d):
        seconds = d.total_seconds()
        hours = seconds // 3600
        minutes = (seconds % 3600) // 60

        return '%02d:%02d' % (hours, minutes)
        
    # Fix rounding issue by converting each mileage value into a string
    miles_per_day['date'] = miles_per_day.index
    miles_per_day['miles'] = miles_per_day['miles'].apply(lambda x: '{:.1f}'.format(x))
    miles_per_day['duration'] = miles_per_day['duration'].apply(lambda x: format_duration(x))

    # Fill in the missing days so we can calculate the number of zero days
    num_zeros = 0
    if len(completed) > 0:
        zeros = miles_per_day.copy()
        idx = pd.date_range(zeros.index.min(), zeros.index.max())
        zeros.index = pd.DatetimeIndex(zeros.index)
        zeros = zeros.reindex(idx, fill_value=0)
        num_zeros = len(zeros[zeros['miles'] == 0])

    return {'mileage': miles_per_day[['date', 'miles', 'duration']].to_dict('records'), 'avg_mileage': avg_mileage, 'num_zeros': num_zeros}

In [7]:
checkpoints = constants.read_poi_file()
mpd = miles_hiked_per_day(checkpoints)

print(mpd)

{'mileage': [{'date': '2017-03-13', 'miles': '7.3', 'duration': '03:34'}, {'date': '2017-03-14', 'miles': '5.8', 'duration': '03:25'}, {'date': '2017-03-15', 'miles': '3.8', 'duration': '01:44'}, {'date': '2017-03-16', 'miles': '7.7', 'duration': '05:02'}, {'date': '2017-03-17', 'miles': '8.4', 'duration': '05:25'}, {'date': '2017-03-18', 'miles': '7.2', 'duration': '04:19'}, {'date': '2017-03-19', 'miles': '11.5', 'duration': '05:25'}, {'date': '2017-03-20', 'miles': '9.7', 'duration': '04:00'}, {'date': '2017-03-22', 'miles': '10.4', 'duration': '04:56'}, {'date': '2017-03-23', 'miles': '10.8', 'duration': '06:27'}, {'date': '2017-03-24', 'miles': '12.2', 'duration': '06:25'}, {'date': '2017-03-25', 'miles': '16.3', 'duration': '07:23'}, {'date': '2017-03-26', 'miles': '7.2', 'duration': '02:39'}, {'date': '2017-03-28', 'miles': '15.8', 'duration': '07:40'}, {'date': '2017-03-29', 'miles': '11.7', 'duration': '06:21'}, {'date': '2017-03-30', 'miles': '6.7', 'duration': '03:03'}, {'da

In [2]:
df = constants.read_poi_file()    
    
completed = constants.get_completed(df)[[constants.DATE_COL, constants.TO_SPRINGER_COL]]
completed[constants.DATE_COL+"_dt"] = completed[constants.DATE_COL].dt.strftime('%Y-%m-%d')

print(len(completed))
print(len(df))
completed.head(10)

170
546


Unnamed: 0,dt_reached,to_spgr,dt_reached_dt
0,2017-03-13 10:03:00,-8.8,2017-03-13
1,2017-03-13 13:37:00,-1.5,2017-03-13
2,2017-03-14 11:37:00,-1.5,2017-03-14
3,2017-03-14 12:14:00,0.0,2017-03-14
4,2017-03-14 12:25:00,0.2,2017-03-14
5,2017-03-14 12:47:00,1.0,2017-03-14
6,2017-03-14 13:18:00,2.0,2017-03-14
7,2017-03-14 13:42:00,2.8,2017-03-14
8,2017-03-14 15:02:00,4.3,2017-03-14
9,2017-03-15 12:02:00,4.3,2017-03-15


In [3]:
# Need to shift the values, since the last checkpoint of each day should be the first checkpoint of the next day
completed[constants.TO_SPRINGER_COL + '_shifted'] = completed[constants.TO_SPRINGER_COL].shift(1)
completed[constants.DATE_COL + '_shifted'] = completed[constants.DATE_COL].shift(1)
completed[constants.DATE_COL + '_dt_shifted'] = completed[constants.DATE_COL + '_dt'].shift(1)

completed.head(10)

Unnamed: 0,dt_reached,to_spgr,dt_reached_dt,to_spgr_shifted,dt_reached_shifted,dt_reached_dt_shifted
0,2017-03-13 10:03:00,-8.8,2017-03-13,,NaT,
1,2017-03-13 13:37:00,-1.5,2017-03-13,-8.8,2017-03-13 10:03:00,2017-03-13
2,2017-03-14 11:37:00,-1.5,2017-03-14,-1.5,2017-03-13 13:37:00,2017-03-13
3,2017-03-14 12:14:00,0.0,2017-03-14,-1.5,2017-03-14 11:37:00,2017-03-14
4,2017-03-14 12:25:00,0.2,2017-03-14,0.0,2017-03-14 12:14:00,2017-03-14
5,2017-03-14 12:47:00,1.0,2017-03-14,0.2,2017-03-14 12:25:00,2017-03-14
6,2017-03-14 13:18:00,2.0,2017-03-14,1.0,2017-03-14 12:47:00,2017-03-14
7,2017-03-14 13:42:00,2.8,2017-03-14,2.0,2017-03-14 13:18:00,2017-03-14
8,2017-03-14 15:02:00,4.3,2017-03-14,2.8,2017-03-14 13:42:00,2017-03-14
9,2017-03-15 12:02:00,4.3,2017-03-15,4.3,2017-03-14 15:02:00,2017-03-14


In [27]:
# Exclude calculating the overnight duration by filtering where the dates are different
completed = completed[completed[constants.DATE_COL + '_dt'] == completed[constants.DATE_COL + '_dt_shifted']]

# Group by day
f = {constants.TO_SPRINGER_COL: 'last', 'to_spgr_shifted': 'first', constants.DATE_COL: 'last', constants.DATE_COL + '_shifted': 'first'}
miles_per_day = completed.groupby(constants.DATE_COL+'_dt').agg(f)
miles_per_day['miles'] = miles_per_day[constants.TO_SPRINGER_COL] - miles_per_day[constants.TO_SPRINGER_COL + '_shifted']
miles_per_day['duration'] = miles_per_day[constants.DATE_COL] - miles_per_day[constants.DATE_COL + '_shifted']


# Fix rounding issue by converting each mileage value into a string
def format_duration(d):
    seconds = d.total_seconds()
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    
    return '%02d:%02d' % (hours, minutes)

miles_per_day['date'] = miles_per_day.index
miles_per_day['miles'] = miles_per_day['miles'].apply(lambda x: '{:.1f}'.format(x))
miles_per_day['duration'] = miles_per_day['duration'].apply(lambda x: format_duration(x))

mpd = {'mileage': miles_per_day[['date', 'miles', 'duration']].to_dict('records')}

print(mpd)

{'mileage': [{'date': '2017-03-13', 'miles': '7.3', 'duration': '03:34'}, {'date': '2017-03-14', 'miles': '5.8', 'duration': '03:25'}, {'date': '2017-03-15', 'miles': '3.8', 'duration': '01:44'}, {'date': '2017-03-16', 'miles': '7.7', 'duration': '05:02'}, {'date': '2017-03-17', 'miles': '8.4', 'duration': '05:25'}, {'date': '2017-03-18', 'miles': '7.2', 'duration': '04:19'}, {'date': '2017-03-19', 'miles': '11.5', 'duration': '05:25'}, {'date': '2017-03-20', 'miles': '9.7', 'duration': '04:00'}, {'date': '2017-03-22', 'miles': '10.4', 'duration': '04:56'}, {'date': '2017-03-23', 'miles': '10.8', 'duration': '06:27'}, {'date': '2017-03-24', 'miles': '12.2', 'duration': '06:25'}, {'date': '2017-03-25', 'miles': '16.3', 'duration': '07:23'}, {'date': '2017-03-26', 'miles': '7.2', 'duration': '02:39'}, {'date': '2017-03-28', 'miles': '15.8', 'duration': '07:40'}, {'date': '2017-03-29', 'miles': '11.7', 'duration': '06:21'}, {'date': '2017-03-30', 'miles': '6.7', 'duration': '03:03'}, {'da

In [28]:
with open(constants.statsFilePath, 'w') as outfile:
    json.dump({**mpd}, outfile)