In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.cm import ScalarMappable
import seaborn as sns
sns.set()
%matplotlib inline

# Adding the module to the path for future import
# Note that the following is idempotent when this notebook is run from "/examples"
import sys
import os
from os import path
from pathlib import Path
os.chdir('../utilities')
import visualization as viz
import plans_parser as parser

import tqdm
import math

# To parse .xml output files
from lxml import etree


%load_ext autoreload
%autoreload 2

In [2]:
COMPETITION = "competition"
SUBMISSION_INPUTS = "submission-inputs"
ITERS = "ITERS"
REFERENCE_DATA = "reference-data"
SCENARIO_NAME = "sioux_faux"
AGENCY = "sioux_faux_bus_lines"
CONFIG = "config"
EXAMPLES = "examples"

path_input = (Path.cwd().parent / "submission-inputs").absolute()

# ********** CHANGE SAMPLE_SIZE (string) *******************#
# 1k or 15k
SAMPLE_SIZE = '15k'

# ********** CHANGE iter_number (integer) *******************#
iter_number = 20
ITER_FOLDER = f"it.{iter_number}"

# PATHS TO BAU DATA 
# 1.ouput folder
path_output_folder_bau = (Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / f"bau/warm-start/sioux_faux-{SAMPLE_SIZE}__warm-start").absolute()
path_output_folder_bau = viz.unzip_file(path_output_folder_bau)

# 2. Network and population files
path_network_file = Path.cwd().parent / REFERENCE_DATA /SCENARIO_NAME / CONFIG/ "physsim-network.xml"
path_population_file = Path.cwd().parent / REFERENCE_DATA /SCENARIO_NAME / CONFIG / f"{SAMPLE_SIZE}/population.xml.gz"

# ********** CHANGE PATH OUTPUT FOLDER (pathlib.Path object)*****************#
path_output_folder = Path.cwd().parent / EXAMPLES / "example_run_output_folder"


In [150]:
max_incentive = 50
max_income = 150000
max_age = 120
max_fare = 10
transit_scale_factor = 0.1

poi_types = ['work', 'secondary']
time_ranges = {'morning peak': range(7, 10), "evening peak": range(17, 20)}
max_time = 900
utm_zone = "14N"

# Importing agencies ids from agency.txt
agency_ids = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / AGENCY / "gtfs_data/agency.txt")
agency_ids = agency_ids["agency_id"].tolist()

# Importing route ids from `routes.txt`
route_df = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / AGENCY /"gtfs_data/routes.txt")
route_ids = route_df["route_id"].sort_values(ascending = True).tolist()

# Importing vehicle types and seating capacities from `availableVehicleTypes.csv` file
available_vehicle_types = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / AGENCY /"availableVehicleTypes.csv")

buses_list = available_vehicle_types["vehicleTypeId"][1:].tolist()
seating_capacities = available_vehicle_types[["vehicleTypeId", "seatingCapacity"]].set_index("vehicleTypeId", drop=True).T.to_dict("records")[0]

# Extracting Operational costs per bus type from the `vehicleCosts.csv` file
operational_costs = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / AGENCY / "vehicleCosts.csv")
operational_costs = operational_costs[["vehicleTypeId", "opAndMaintCost"]].set_index("vehicleTypeId", drop=True).T.to_dict("records")[0]

# Extracting route_id / trip_id correspondence from the `trips.csv` file
trips = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / AGENCY / "gtfs_data/trips.txt")
trip_to_route = trips[["trip_id", "route_id"]].set_index("trip_id", drop=True).T.to_dict('records')[0]

# Extracting bus fares from the submission input csv: "submission-inputs/MassTransitFares.csv"
bus_fares_df = pd.read_csv(path_output_folder_bau / COMPETITION / SUBMISSION_INPUTS / "MassTransitFares.csv")

#Extracting Fuel cost from the `beamFuelTypes.csv` file
fuel_costs = pd.read_csv(Path.cwd().parent / REFERENCE_DATA / SCENARIO_NAME / CONFIG / SAMPLE_SIZE / "beamFuelTypes.csv")
fuel_costs.loc[len(fuel_costs)] = ["food", 0]
fuel_costs = fuel_costs.set_index("fuelTypeId", drop=True).T.to_dict('records')[0]


In [9]:
# Importing all xml.gz files from the output folder of the run
events_path = path_output_folder_bau / "outputEvents.xml.gz"
output_plans_path = path_output_folder_bau / "outputPlans.xml.gz"
experienced_plans_path = path_output_folder_bau / ITERS / "it.100" / "100.experiencedPlans.xml.gz"
persons_path = path_output_folder_bau / "outputPersonAttributes.xml.gz"
households_path = path_output_folder_bau / "outputHouseholds.xml.gz"

In [128]:
%%time
persons_attributes_df = parser.extract_person_dataframes(output_plans_path, 
                                                         persons_path, households_path, 
                                                         path_output_folder)

person_dataframe.csv generated
CPU times: user 3.27 s, sys: 446 ms, total: 3.72 s
Wall time: 3.72 s


In [143]:
%%time
activities_df = parser.extract_activities_dataframes(experienced_plans_path, path_output_folder)

activities_dataframe.csv generated
CPU times: user 912 ms, sys: 58.4 ms, total: 970 ms
Wall time: 978 ms


In [161]:
%%time
trips_df = parser.get_trips_output(experienced_plans_path)

CPU times: user 1.28 s, sys: 69.7 ms, total: 1.35 s
Wall time: 1.37 s


In [156]:
trips_df.columns

Index(['PID', 'Trip_ID', 'Origin_Activity_ID', 'Destination_activity_ID',
       'Trip_Purpose', 'Mode', 'Start_time', 'Duration_sec', 'Distance_m',
       'Path_linkIds'],
      dtype='object')

In [151]:
%%time
bus_fares_df = parser.parse_bus_fare_input(bus_fares_df, route_ids)

CPU times: user 4.78 ms, sys: 159 µs, total: 4.94 ms
Wall time: 5.33 ms


In [194]:
legs_df = parser.extract_legs_dataframes(events_path, trips_df, persons_attributes_df, bus_fares_df, trip_to_route, fuel_costs, path_output_folder)

Empty DataFrame
Columns: [time, type, person, vehicle, driver, vehicleType, length, numPassengers, departureTime, arrivalTime, mode, links, fuelType, fuel]
Index: []
Empty DataFrame
Columns: [time, type, person, vehicle, driver, vehicleType, length, numPassengers, departureTime, arrivalTime, mode, links, fuelType, fuel]
Index: []


IndexError: ('index 0 is out of bounds for axis 0 with size 0', 'occurred at index 16')

In [6]:
final_trips_df = parser.merge_legs_trips(legs_df, trips_df)

NameError: name 'output_plans_data' is not defined

# Test.

In [15]:
from pathlib import Path
p = Path(r"/Users/vgolfi/Documents/GitHub/Uber-Prize-Starter-Kit_GITLAB/reference-data/sioux_faux/bau/warm-start/sioux_faux-15k__warm-start/ITERS/it.100/100.legHistogramWalk.png")

In [16]:
p.suffix

'.png'

# get_person_output_from_households_xml

In [26]:
households_xml = parser.open_xml(r"/Users/vgolfi/Documents/GitHub/Uber-Prize-Starter-Kit_GITLAB/reference-data/sioux_faux/bau/warm-start/sioux_faux-15k__warm-start/outputHouseholds.xml")
households_root = households_xml.getroot()
hhd_array = []

In [88]:
for child in households_root:
    print(child.tag, child.attrib)
    pass

{http://www.matsim.org/files/dtd}household {'id': '10600-2013000431434-0'}
{http://www.matsim.org/files/dtd}household {'id': '1200-2014001105461-0'}
{http://www.matsim.org/files/dtd}household {'id': '1804-2013001049684-0'}
{http://www.matsim.org/files/dtd}household {'id': '405-2016000409711-0'}
{http://www.matsim.org/files/dtd}household {'id': '10102-2014000448294-0'}
{http://www.matsim.org/files/dtd}household {'id': '10405-2012000251540-0'}
{http://www.matsim.org/files/dtd}household {'id': '1902-2015000087715-0'}
{http://www.matsim.org/files/dtd}household {'id': '1801-2016000280380-0'}
{http://www.matsim.org/files/dtd}household {'id': '600-2016000996040-0'}
{http://www.matsim.org/files/dtd}household {'id': '1600-2013000345554-0'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2013000481572-0'}
{http://www.matsim.org/files/dtd}household {'id': '10102-2014001459793-1'}
{http://www.matsim.org/files/dtd}household {'id': '408-2012000849754-0'}
{http://www.matsim.org/files/dtd}hous

{http://www.matsim.org/files/dtd}household {'id': '700-2013000743438-0'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2014000287720-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2015000795744-0'}
{http://www.matsim.org/files/dtd}household {'id': '10103-2013000831682-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2016001200571-6'}
{http://www.matsim.org/files/dtd}household {'id': '1002-2012000797087-0'}
{http://www.matsim.org/files/dtd}household {'id': '202-2015000272305-0'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2013000280546-2'}
{http://www.matsim.org/files/dtd}household {'id': '202-2014000392422-0'}
{http://www.matsim.org/files/dtd}household {'id': '10600-2012000089288-0'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2013001192909-3'}
{http://www.matsim.org/files/dtd}household {'id': '408-2013001042627-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2012001171118-1'}
{http://www.matsim.org/files/dtd}house

{http://www.matsim.org/files/dtd}household {'id': '1901-2016001120110-0'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2015000831253-3'}
{http://www.matsim.org/files/dtd}household {'id': '1200-2015000653794-0'}
{http://www.matsim.org/files/dtd}household {'id': '700-2016001114517-0'}
{http://www.matsim.org/files/dtd}household {'id': '1804-2015000790664-0'}
{http://www.matsim.org/files/dtd}household {'id': '202-2012000578958-0'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2012001404956-0'}
{http://www.matsim.org/files/dtd}household {'id': '1001-2015001205185-0'}
{http://www.matsim.org/files/dtd}household {'id': '600-2016000155763-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2016000905283-2'}
{http://www.matsim.org/files/dtd}household {'id': '900-2016000280380-0'}
{http://www.matsim.org/files/dtd}household {'id': '500-2014000053367-1'}
{http://www.matsim.org/files/dtd}household {'id': '1500-2016000770003-1'}
{http://www.matsim.org/files/dtd}household 

{http://www.matsim.org/files/dtd}household {'id': '10401-2015001314717-1'}
{http://www.matsim.org/files/dtd}household {'id': '1901-2015000810401-1'}
{http://www.matsim.org/files/dtd}household {'id': '900-2016001295719-0'}
{http://www.matsim.org/files/dtd}household {'id': '1801-2014001100354-2'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2016000401992-0'}
{http://www.matsim.org/files/dtd}household {'id': '1105-2016000985405-0'}
{http://www.matsim.org/files/dtd}household {'id': '300-2014000107799-0'}
{http://www.matsim.org/files/dtd}household {'id': '1600-2012000409645-0'}
{http://www.matsim.org/files/dtd}household {'id': '10600-2014000273051-0'}
{http://www.matsim.org/files/dtd}household {'id': '405-2016000071413-0'}
{http://www.matsim.org/files/dtd}household {'id': '300-2014000346032-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2015001314717-1'}
{http://www.matsim.org/files/dtd}household {'id': '405-2014001239636-0'}
{http://www.matsim.org/files/dtd}househo

{http://www.matsim.org/files/dtd}household {'id': '1801-2016000509662-1'}
{http://www.matsim.org/files/dtd}household {'id': '1002-2015000839944-0'}
{http://www.matsim.org/files/dtd}household {'id': '1700-2016001424895-0'}
{http://www.matsim.org/files/dtd}household {'id': '10405-2013001005966-0'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2013000377800-0'}
{http://www.matsim.org/files/dtd}household {'id': '1002-2016000699010-0'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2016000030344-1'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2016001098391-0'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2012001100895-0'}
{http://www.matsim.org/files/dtd}household {'id': '10600-2014001249779-0'}
{http://www.matsim.org/files/dtd}household {'id': '10401-2013000407259-0'}
{http://www.matsim.org/files/dtd}household {'id': '401-2014000923144-1'}
{http://www.matsim.org/files/dtd}household {'id': '1108-2013000262837-0'}
{http://www.matsim.org/files/dtd}ho

{http://www.matsim.org/files/dtd}household {'id': '201-2013000413780-0'}
{http://www.matsim.org/files/dtd}household {'id': '1801-2014001269642-0'}
{http://www.matsim.org/files/dtd}household {'id': '1500-2016000198374-0'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2012000110774-0'}
{http://www.matsim.org/files/dtd}household {'id': '202-2015000961501-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2014000901035-1'}
{http://www.matsim.org/files/dtd}household {'id': '405-2013000703874-0'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2014001332040-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2014000113616-5'}
{http://www.matsim.org/files/dtd}household {'id': '401-2014000763883-0'}
{http://www.matsim.org/files/dtd}household {'id': '1200-2012001195734-0'}
{http://www.matsim.org/files/dtd}household {'id': '10600-2015000191231-0'}
{http://www.matsim.org/files/dtd}household {'id': '300-2012000142774-0'}
{http://www.matsim.org/files/dtd}household

{http://www.matsim.org/files/dtd}household {'id': '1804-2012001254838-1'}
{http://www.matsim.org/files/dtd}household {'id': '300-2016000614752-0'}
{http://www.matsim.org/files/dtd}household {'id': '10600-2015000432050-0'}
{http://www.matsim.org/files/dtd}household {'id': '1902-2014000097368-0'}
{http://www.matsim.org/files/dtd}household {'id': '10102-2012000388530-0'}
{http://www.matsim.org/files/dtd}household {'id': '500-2012001048206-0'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2013000367446-1'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2016000335446-0'}
{http://www.matsim.org/files/dtd}household {'id': '405-2015000751671-0'}
{http://www.matsim.org/files/dtd}household {'id': '1804-2014000561422-0'}
{http://www.matsim.org/files/dtd}household {'id': '1200-2012000873181-0'}
{http://www.matsim.org/files/dtd}household {'id': '1801-2014001023329-1'}
{http://www.matsim.org/files/dtd}household {'id': '600-2013001457521-0'}
{http://www.matsim.org/files/dtd}househ

{http://www.matsim.org/files/dtd}household {'id': '10102-2015001491257-0'}
{http://www.matsim.org/files/dtd}household {'id': '1801-2014001249779-0'}
{http://www.matsim.org/files/dtd}household {'id': '408-2014000836897-0'}
{http://www.matsim.org/files/dtd}household {'id': '1108-2016001407958-1'}
{http://www.matsim.org/files/dtd}household {'id': '1105-2012001106989-1'}
{http://www.matsim.org/files/dtd}household {'id': '1002-2016000391264-0'}
{http://www.matsim.org/files/dtd}household {'id': '1101-2016000837646-0'}
{http://www.matsim.org/files/dtd}household {'id': '1002-2013000211454-0'}
{http://www.matsim.org/files/dtd}household {'id': '407-2012001333257-0'}
{http://www.matsim.org/files/dtd}household {'id': '1803-2012000252732-1'}
{http://www.matsim.org/files/dtd}household {'id': '1107-2015000585393-0'}
{http://www.matsim.org/files/dtd}household {'id': '1106-2015000784412-1'}
{http://www.matsim.org/files/dtd}household {'id': '10104-2015000816977-3'}
{http://www.matsim.org/files/dtd}house

In [116]:
child_1 = households_root.findall('{http://www.matsim.org/files/dtd}household')

In [93]:
child.findall('members')

[]

In [124]:
child_1[0].find("{http://www.matsim.org/files/dtd}income").text

'\n\t\t\t\t20936.0\n\t\t'

In [64]:
households_root

<Element {http://www.matsim.org/files/dtd}households at 0x1a2325c5c8>