# Pilot - Running Analysis

This Jupyter Notebook is to quickly test feasibility of the things I want to do with this project

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import date
from butterworth_distance import get_data#, smooth, distance
from scipy import stats

In [2]:
def distance(data): # returns distance in m
    print(data)
    nextdata = pd.DataFrame([data['lat'], data['lon'], data['time']], ["nextlat", "nextlon","nexttime"])
    nextdata = nextdata.transpose().shift(periods=-1, axis=0)
    data = pd.concat([data, nextdata], axis=1)
    data['nexttime'] = pd.to_datetime(data['nexttime'])
    data['timediff'] = data['nexttime'] - data['time']
    
    radius=6371000
    lat = radify(data['lat']).astype(np.float64)
    lon = radify(data['lon']).astype(np.float64)
    nextlat = radify(data['nextlat']).astype(np.float64)
    nextlon = radify(data['nextlon']).astype(np.float64)
    
    a = 2*radius
    b = (np.sin((nextlat-lat)/2))**2
    c = np.cos(lat) * np.cos(nextlat) * ((nextlon-lon)/2)**2

    data['distbetween'] = a*np.arcsin(np.sqrt(b+c))
    
    # Want to exclude any pauses (gaps in GPS data longer than 8 seconds)
    # and any slow running (pace greater than 0.67 s/m, same as speed
    # less than 1.5 m/s)
    data['pace'] = data['timediff'] / data['distbetween']
    
    maximumtime = pd.Timedelta(seconds=8)
    maximumpace = pd.Timedelta(seconds=0.67)

    data = data[(data['timediff'] <= maximumtime) | (data['pace'] <= maximumpace)]
    
    totaldistance = pd.DataFrame.sum(data['distbetween'], axis=0)
    return totaldistance

def radify(column):
    return column * np.pi / 180
    
def get_distance(filename):
    points = get_data(filename)
    res = distance(points)
    print('Before filtering: %0.2f' % res)
    points = smooth(points)
    res = distance(points)
    print('After filtering: %0.2f' % res)
    return res

def smooth(data):
    # Return Butterworth-smoothed data, eventually
    from scipy import signal
    b, a = signal.butter(3, 0.1)#, btype='lowpass', analog=False)
    lat = signal.filtfilt(b, a, data['lat'])
    lon = signal.filtfilt(b, a, data['lon'])
    low_passed = pd.DataFrame([data['time'], lat, lon], ["time", "lat", "lon"])
    low_passed = low_passed.transpose()
    return low_passed

get_distance('RK_gpx _2017-07-13_1747.gpx')

                   time      lat      lon
0   2017-07-14 00:47:40  49.2806 -122.917
1   2017-07-14 00:47:40  49.2806 -122.917
2   2017-07-14 00:47:40  49.2806 -122.917
3   2017-07-14 00:47:42  49.2807 -122.917
4   2017-07-14 00:47:56  49.2808 -122.916
5   2017-07-14 00:48:17  49.2808 -122.916
6   2017-07-14 00:48:25  49.2808 -122.916
7   2017-07-14 00:48:31  49.2809 -122.915
8   2017-07-14 00:48:43  49.2809 -122.914
9   2017-07-14 00:49:04  49.2809 -122.914
10  2017-07-14 00:49:14  49.2809 -122.914
11  2017-07-14 00:49:25  49.2808 -122.913
12  2017-07-14 00:49:50  49.2807 -122.912
13  2017-07-14 00:50:04  49.2806 -122.911
14  2017-07-14 00:50:33  49.2806 -122.911
15  2017-07-14 00:50:39  49.2805 -122.911
16  2017-07-14 00:50:45  49.2804 -122.911
17  2017-07-14 00:50:54  49.2803 -122.911
18  2017-07-14 00:51:01  49.2804  -122.91
19  2017-07-14 00:51:08  49.2804  -122.91
20  2017-07-14 00:51:12  49.2805  -122.91
21  2017-07-14 00:51:14  49.2805  -122.91
22  2017-07-14 00:51:17  49.2806  

TypeError: cannot operate on a series without a rhs of a series/ndarray of type datetime64[ns] or a timedelta

In [None]:
running_data = []
for filename in os.listdir(os.getcwd()):
    if (filename[-4::] == '.gpx'):
        running_data.append(filename)

In [None]:
running_df = pd.DataFrame(data=running_data, columns=['filename'])
running_df['datetime'] = running_df['filename'].str.slice(start=8, stop=23)

In [None]:
# strptime stuff

In [None]:
running_df['datetime'] = pd.to_datetime(running_df['datetime'], format='%Y-%m-%d_%H%M')
def to_timestamp(inputdatetime):
    return inputdatetime.timestamp()
running_df['timestamp'] = running_df['datetime'].apply(to_timestamp)

In [None]:
running_df['distance'] = running_df['filename'].apply(get_distance)

In [None]:
fit = stats.linregress(running_df['timestamp'], running_df['distance'])
fit.slope, fit.intercept

In [None]:
plt.figure()
plt.plot(running_df['datetime'], running_df['distance'], 'b.', alpha = 0.5)
plt.plot(running_df['datetime'], running_df['timestamp']*fit.slope+fit.intercept, 'r-', linewidth=3)
plt.xticks(rotation=70)
plt.show()

In [None]:
print('The p-value is', fit.pvalue)