In [27]:
from collections import OrderedDict
import csv
import json
import xml.etree.ElementTree as ET 
from datetime import datetime

import osmnx as ox
from shapely.geometry import Point
from geopy.distance import distance, lonlat

In [28]:
## Utils

def convert_timestamp_offset(timestamp):
    dt = datetime.utcfromtimestamp(int(timestamp))
    midnight = dt.replace(hour=0, minute=0, second=0, microsecond=0)
    delta = dt - dt.replace(hour=0, minute=0, second=0, microsecond=0)
    return delta.seconds


def is_valid_point(current_point, previous_point, geo_boundary):
    coords1 = (current_point['long'], current_point['lat'])
    time1 = int(current_point['time'])
    point = Point(coords1)
    if not point.within(geo_boundary):
        return False
    elif not previous_point:
        return True
    else:
        coords2 = (previous_point['long'], previous_point['lat'])
        time2 = int(previous_point['time'])
        dist = distance(lonlat(*coords1), lonlat(*coords2)).meters
        delta_t = time1 - time2
        vel = dist / delta_t if delta_t > 0 else 0
        if dist < 20 or delta_t < 20 or vel > 22.35:
            return False
        else:
            return True

In [29]:
## Strava data

VALID_FILE_EXT = '.gpx'
BASE_PATH = 'raw/strava_data/'
DATE_FMT = '%Y-%m-%dT%H:%M:%SZ'

def parse_gpx(filename):
    xmlfile = filename
    NS = '{http://www.topografix.com/GPX/1/1}'

    tree = ET.parse(xmlfile)
    root = tree.getroot()
    
    points = []
    for point in root.iter('{}trkpt'.format(NS)):
        timestamp = datetime.strptime(point.find('{}time'.format(NS)).text, DATE_FMT).strftime('%s')
        point_data = {
            'lat': float(point.get('lat')),
            'long': float(point.get('lon')),
            'time': int(timestamp),
            'offset': convert_timestamp_offset(timestamp),
            'type': 'strava'
        }

        points.append(point_data)

    return points

with open(BASE_PATH + 'activities.csv', 'r') as infile:
    reader = csv.DictReader(infile)
    strava_lookup_data = [row for row in reader]

valid_filenames = [row['filename'] for row in strava_lookup_data if VALID_FILE_EXT in row['filename'] and row['commute'] == 'true']

strava_parsed_data = OrderedDict()
for index, filename in enumerate(valid_filenames):
    parsed_data = parse_gpx(BASE_PATH + filename)
    strava_parsed_data[index] = parsed_data
    


In [30]:
## Scoot data

FILEPATH = 'raw/scoot-data/pthaas_ride_traces.csv'
DATE_FMT = '%Y-%m-%d %H:%M:%S'

with open(FILEPATH, 'r') as infile:
    reader = csv.DictReader(infile)
    scoot_data = [row for row in reader]
    
scoot_parsed_data = OrderedDict()
for row in scoot_data:
    ride_id = row['ride_id']
    timestamp = datetime.strptime(row['scu_timestamp'], DATE_FMT).strftime('%s')
    point_data = {
        'lat': float(row['latitude']),
        'long': float(row['longitude']),
        'time': int(timestamp),
        'offset': convert_timestamp_offset(timestamp),
        'type': 'scoot'
    }
    if ride_id in scoot_parsed_data.keys():
        scoot_parsed_data[ride_id].append(point_data)
    else:
        scoot_parsed_data[ride_id] = [point_data]

In [31]:
segments = []
for _, item in strava_parsed_data.items():
    segments.append(item)
    
for _, item in scoot_parsed_data.items():
    segments.append(item)

SF_GEO = ox.gdf_from_place('San Francisco, CA, USA').iloc[0]['geometry']

type_dict = {
    'strava': 0,
    'scoot': 1
}

parsed_trips = []
for trip in segments:
    trip_data = {}
    trip_data['type'] = type_dict[trip[0]['type']]
    segment_data = []
    previous_point = None
    for this_point in trip:
        is_valid = is_valid_point(this_point, previous_point, SF_GEO)
        if not is_valid:
            continue
        data = [this_point['long'], this_point['lat'], this_point['offset']]
        segment_data.append(data)
        previous_point = this_point
    trip_data['segments'] = segment_data
    parsed_trips.append(trip_data)
    
with open('trip_data.json', 'w') as outfile:
    json.dump(parsed_trips, outfile)

In [25]:
SF_GEO = ox.gdf_from_place('San Francisco, CA, USA').iloc[0]['geometry']

type_dict = {
    'strava': 0,
    'scoot': 1
}

parsed_trips = []
for trip in [strava_parsed_data[0]]:
    trip_data = {}
    trip_data['type'] = type_dict[trip[0]['type']]
    segment_data = []
    previous_point = None
    for this_point in trip:
        
        is_valid = is_valid_point(this_point, previous_point, SF_GEO)
        if not is_valid:
            continue
        
        data = [this_point['long'], this_point['lat'], this_point['offset']]
        segment_data.append(data)
        previous_point = this_point
    trip_data['segments'] = segment_data
    parsed_trips.append(trip_data)
    
len(parsed_trips[0]['segments'])

First point
Dist: 3.1937782172725098; Delta_t: 3; Vel: 1.0645927390908365
Dist: 4.050762165490481; Delta_t: 15; Vel: 0.27005081103269873
Dist: 7.43644198683292; Delta_t: 17; Vel: 0.4374377639313482
Dist: 10.476174482456047; Delta_t: 20; Vel: 0.5238087241228023
Dist: 13.015824281695117; Delta_t: 22; Vel: 0.5916283764406871
Dist: 15.672417130600895; Delta_t: 23; Vel: 0.6814094404609085
Dist: 19.094955884294297; Delta_t: 24; Vel: 0.7956231618455957
Dist: 5.339084808548288; Delta_t: 1; Vel: 5.339084808548288
Dist: 11.85613661661348; Delta_t: 2; Vel: 5.92806830830674
Dist: 17.455625380075134; Delta_t: 3; Vel: 5.818541793358378
Dist: 23.051843905165477; Delta_t: 4; Vel: 5.762960976291369
Dist: 27.845783727370954; Delta_t: 5; Vel: 5.56915674547419
Dist: 33.603983947901355; Delta_t: 6; Vel: 5.6006639913168925
Dist: 39.23963746201873; Delta_t: 7; Vel: 5.605662494574104
Dist: 46.35969020086635; Delta_t: 8; Vel: 5.794961275108294
Dist: 51.65642155325382; Delta_t: 9; Vel: 5.7396023948059804
Dist: 

Dist: 28.285412369616218; Delta_t: 5; Vel: 5.657082473923244
Dist: 34.46023576648098; Delta_t: 6; Vel: 5.7433726277468296
Dist: 40.08925892779423; Delta_t: 7; Vel: 5.7270369896848905
Dist: 44.1517621400006; Delta_t: 8; Vel: 5.518970267500075
Dist: 50.53910420529089; Delta_t: 9; Vel: 5.615456022810099
Dist: 55.89161814382819; Delta_t: 10; Vel: 5.589161814382819
Dist: 61.13303515163501; Delta_t: 11; Vel: 5.557548650148638
Dist: 65.67094215229872; Delta_t: 12; Vel: 5.47257851269156
Dist: 71.6138058686578; Delta_t: 13; Vel: 5.508754297589062
Dist: 77.407950644987; Delta_t: 14; Vel: 5.529139331784785
Dist: 83.08865126106794; Delta_t: 15; Vel: 5.5392434174045295
Dist: 88.06674457507512; Delta_t: 16; Vel: 5.504171535942195
Dist: 94.30440655368807; Delta_t: 17; Vel: 5.547318032569886
Dist: 100.2489572732245; Delta_t: 18; Vel: 5.569386515179139
Dist: 106.67537589029244; Delta_t: 19; Vel: 5.614493467910129
Dist: 8.357093693762605; Delta_t: 1; Vel: 8.357093693762605
Dist: 13.634801167661895; Delt

Dist: 83.50176378205752; Delta_t: 11; Vel: 7.5910694347325025
Dist: 90.70278053033988; Delta_t: 12; Vel: 7.55856504419499
Dist: 98.97220132096923; Delta_t: 13; Vel: 7.613246255459172
Dist: 107.42210679251619; Delta_t: 14; Vel: 7.673007628036871
Dist: 115.1637913832922; Delta_t: 15; Vel: 7.677586092219481
Dist: 122.38166844385603; Delta_t: 16; Vel: 7.648854277741002
Dist: 128.35283362842614; Delta_t: 17; Vel: 7.550166684025067
Dist: 133.77733548374215; Delta_t: 18; Vel: 7.43207419354123
Dist: 138.67634367890554; Delta_t: 19; Vel: 7.2987549304687125
Dist: 22.468544885928935; Delta_t: 3; Vel: 7.489514961976312
Dist: 29.218957556560195; Delta_t: 4; Vel: 7.304739389140049
Dist: 36.91029170536646; Delta_t: 5; Vel: 7.382058341073292
Dist: 44.387382874840256; Delta_t: 6; Vel: 7.397897145806709
Dist: 51.31811320987007; Delta_t: 7; Vel: 7.331159029981438
Dist: 58.38457250345525; Delta_t: 8; Vel: 7.298071562931907
Dist: 65.8881442544869; Delta_t: 9; Vel: 7.320904917165212
Dist: 73.17908920052562;

Dist: 6.576430523596236; Delta_t: 2; Vel: 3.288215261798118
Dist: 10.626734892345832; Delta_t: 3; Vel: 3.542244964115277
Dist: 15.417920705131522; Delta_t: 4; Vel: 3.8544801762828804
Dist: 20.094402076327682; Delta_t: 5; Vel: 4.018880415265537
Dist: 25.55487742076218; Delta_t: 6; Vel: 4.259146236793696
Dist: 31.76599782367222; Delta_t: 7; Vel: 4.537999689096031
Dist: 38.68553291388774; Delta_t: 8; Vel: 4.835691614235968
Dist: 45.46524794995597; Delta_t: 9; Vel: 5.051694216661775
Dist: 49.164118687790136; Delta_t: 10; Vel: 4.916411868779013
Dist: 54.8600485906181; Delta_t: 11; Vel: 4.987277144601645
Dist: 61.86681671381785; Delta_t: 12; Vel: 5.15556805948482
Dist: 68.10136681465471; Delta_t: 13; Vel: 5.238566678050362
Dist: 73.49633684809311; Delta_t: 14; Vel: 5.249738346292365
Dist: 79.48493674871798; Delta_t: 15; Vel: 5.298995783247865
Dist: 87.26985948893592; Delta_t: 16; Vel: 5.454366218058495
Dist: 94.38836901317642; Delta_t: 17; Vel: 5.552257000775083
Dist: 101.93675490426506; Del

Dist: 94.0634516773898; Delta_t: 13; Vel: 7.2356501290299855
Dist: 101.22689930792698; Delta_t: 14; Vel: 7.23049280770907
Dist: 107.77508727229342; Delta_t: 15; Vel: 7.185005818152894
Dist: 116.09316835399964; Delta_t: 16; Vel: 7.255823022124978
Dist: 121.21561813696137; Delta_t: 17; Vel: 7.130330478644787
Dist: 127.20326364500778; Delta_t: 18; Vel: 7.06684798027821
Dist: 133.4146551620964; Delta_t: 19; Vel: 7.021823955899811
Dist: 3.912989105211555; Delta_t: 1; Vel: 3.912989105211555
Dist: 9.972764175709203; Delta_t: 3; Vel: 3.324254725236401
Dist: 16.619172644739063; Delta_t: 5; Vel: 3.3238345289478124
Dist: 20.793213586577618; Delta_t: 6; Vel: 3.4655355977629365
Dist: 24.56480489218525; Delta_t: 7; Vel: 3.5092578417407503
Dist: 29.526549781513424; Delta_t: 8; Vel: 3.690818722689178
Dist: 36.1622341984271; Delta_t: 9; Vel: 4.018026022047455
Dist: 40.7600943564257; Delta_t: 10; Vel: 4.07600943564257
Dist: 50.98399017584423; Delta_t: 12; Vel: 4.248665847987019
Dist: 80.69086603215581; 

78