Post process data generated by `generate_data.ipynb`.
Concatenate distance matrix data and put in Pandas dataframe.

In [1]:
from pprint import pprint
import os
import json
import numpy as np
import pandas as pd

Load data object.
A json file.

In [2]:
base_path = os.path.join('C:\\', 'Users', 'glenn', 'src', 'pycommute')
data_directory = os.path.join(base_path, 'data')
data_file = os.path.join(data_directory, 'test_data.json')
dataframe_file = os.path.join(data_directory, 'test_data_dataframe.hdf5')
with open(data_file, 'r') as f:
    data_object = json.load(f)

Get relevant info from data object

In [3]:
distance_matrices_transit = data_object['distance_matrices_transit']
distance_matrices_driving = data_object['distance_matrices_driving']
origins_batches = data_object['origins_batches']
destinations_geocodes = data_object['destinations_geocodes']


In [4]:
destination_coords = [
    (geocode['geometry']['location']['lat'], geocode['geometry']['location']['lng']) for geocode in destinations_geocodes
]

Create pandas table. Columns are:
origin coords,
origin address,
destination coords,
destination address,
travel mode,
distance text,
distance value,
duration text,
duration value,
status,

Function that takes a batch and creates a list of dictionaries that correspond to a row in the dataframe.

In [5]:
def entries_from_batch(batch_matrix, mode, batch_origins, destination_coords):
    entries = []
    assert batch_matrix['status'] == 'OK', "Batch status isn't `OK`. It's {}.".format(batch_matrix['status'])
    destination_addresses = batch_matrix['destination_addresses']
    origin_addresses = batch_matrix['origin_addresses']
    rows = batch_matrix['rows']
    
    assert len(origin_addresses) == len(batch_origins), "Number of origin addresses ({}) doesn't correspond to number of origin coordinates in `batch_origins` ({}).".format(len(origin_addresses), len(batch_origins))
    assert len(destination_addresses) == 2, "Number of destination addresses ({}) isn't 2.".format(len(destination_addresses))

    for row, origin_address, origin_coordinate in zip(rows, origin_addresses, batch_origins):
        elements = row['elements']
        assert len(elements) == 2, "Number of elements ({}) isn't 2".format(len(elements))
        for element, destination_address, destination_coordinate in zip(elements, destination_addresses, destination_coords):
            entry = {
                'origin coords': tuple(origin_coordinate),
                'origin address': origin_address,
                'destination coords': destination_coordinate,
                'destination address': destination_address,
                'travel mode': mode,
                'distance text': element['distance']['text'],
                'distance value': element['distance']['value'],
                'duration text': element['duration']['text'],
                'duration value': element['duration']['value'],
                'status': element['status'],
            }
            entries.append(entry)
    return entries

Build the dataframe

In [6]:
df = pd.DataFrame()

for batch_matrix, batch_origins in zip(distance_matrices_driving, origins_batches):
    entries = entries_from_batch(batch_matrix, 'driving', batch_origins, destination_coords)
    df = df.append(entries, ignore_index=True, verify_integrity=True)
for batch_matrix, batch_origins in zip(distance_matrices_transit, origins_batches):
    entries = entries_from_batch(batch_matrix, 'transit', batch_origins, destination_coords)
    df = df.append(entries, ignore_index=True, verify_integrity=True)

df

Unnamed: 0,origin coords,origin address,destination coords,destination address,travel mode,distance text,distance value,duration text,duration value,status
0,"(63.418312, 10.358785)","Viktor Baumanns vei 37, 7020 Trondheim, Norway","(63.4123278, 10.404471)","Klæbuveien 125, 7031 Trondheim, Norway",driving,6.5 km,6491,14 mins,826,OK
1,"(63.418312, 10.358785)","Viktor Baumanns vei 37, 7020 Trondheim, Norway","(63.428781, 10.4732474)","Landbruksvegen 2, 7047 Trondheim, Norway",driving,10.6 km,10604,16 mins,955,OK
2,"(63.418312, 10.360795497435129)","Riiser-larsens vei 18, 7020 Trondheim, Norway","(63.4123278, 10.404471)","Klæbuveien 125, 7031 Trondheim, Norway",driving,5.9 km,5941,13 mins,759,OK
3,"(63.418312, 10.360795497435129)","Riiser-larsens vei 18, 7020 Trondheim, Norway","(63.428781, 10.4732474)","Landbruksvegen 2, 7047 Trondheim, Norway",driving,10.1 km,10054,15 mins,888,OK
4,"(63.418312, 10.362805994870259)","Riiser-larsens vei 20, 7020 Trondheim, Norway","(63.4123278, 10.404471)","Klæbuveien 125, 7031 Trondheim, Norway",driving,5.9 km,5941,13 mins,759,OK
...,...,...,...,...,...,...,...,...,...,...
5699,"(63.438097075330106, 10.477404348672659)","Arkitekt Ebbells veg 10, 7053 Ranheim, Norway","(63.428781, 10.4732474)","Landbruksvegen 2, 7047 Trondheim, Norway",transit,1.4 km,1388,19 mins,1126,OK
5700,"(63.438097075330106, 10.479414846107789)","Arkitekt Ebbells veg 10, 7053 Ranheim, Norway","(63.4123278, 10.404471)","Klæbuveien 125, 7031 Trondheim, Norway",transit,9.3 km,9294,50 mins,3010,OK
5701,"(63.438097075330106, 10.479414846107789)","Arkitekt Ebbells veg 10, 7053 Ranheim, Norway","(63.428781, 10.4732474)","Landbruksvegen 2, 7047 Trondheim, Norway",transit,1.5 km,1518,20 mins,1212,OK
5702,"(63.438097075330106, 10.481425343542918)","Arkitekt Ebbells veg 16, 7053 Ranheim, Norway","(63.4123278, 10.404471)","Klæbuveien 125, 7031 Trondheim, Norway",transit,9.3 km,9313,50 mins,3024,OK


Store dataframe

In [7]:
df.to_hdf(dataframe_file, 'commute_data', 'w', complevel=9)