## Step 3: Add route stops

Follows Step 2, `02_add_vehicle_assignments_to_db.ipynb`

Followed by Step 4, `04_add_stop_times_to_db.ipynb`

**Run successfully for `ituran_synchromatics_data.sqlite` in Data Integration - All Months**

This script creates or replaces a table in the database at the supplied
path that contains the set of stops for each of five Downtown DASH routes. The
source Excel files are hand-crafted and assumed to be perfect.

In [1]:
import argparse
import numpy as np
from os import path, listdir
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Converted from argparse for use in ipynb
# parser = argparse.ArgumentParser()

db_path='ituran_synchromatics_data.sqlite'
route_stop_table_name='route_stop'

project_root_dir = r'\\vntscex.local\DFS\3BC-Share$_Mobileye_Data\Data\Data Integration - All Months' 
data_root_dir = path.join(project_root_dir, 'route_stops')
# was: data_root_dir='route_stops'


In [3]:
def read_route_stop_data(dir_path):
  route_stop_data = []

  for file_name in listdir(dir_path):
    # we assume that all files exist at the root
    file_path = path.join(dir_path, file_name)

    # pandas treats strings as objects
    df = pd.read_excel(file_path, dtype={
      'route_id': np.uint32, 'route_name': object, 'stop_id': np.uint32,
      'stop_name': object, 'latitude': np.float64, 'longitude': np.float64,
      'heading': object, 'sequence': np.uint8, 'is_terminal': np.bool_})
    route_stop_data.append(df)

  route_stop_data = pd.concat(
    route_stop_data, ignore_index=True, verify_integrity=True)

  route_stop_data.set_index(
    pd.RangeIndex(route_stop_data.shape[0]), inplace=True)

  return route_stop_data

In [4]:
#db_path = path.join('sqlite:///', db_path)
db_path = 'sqlite:///' + path.join(project_root_dir, 'ituran_synchromatics_data.sqlite')

db = create_engine(db_path)

route_stop_data = read_route_stop_data(data_root_dir)

  # print(route_stop_data.head(2))
  # print(route_stop_data.dtypes)

In [5]:
route_stop_data.head()

route_stop_data['route_id'].unique() # was missing 298!! Problem with file, now replaced


array([ 296,  297,  298,  408,  409, 7690, 8435, 9212, 9270, 9736, 9960],
      dtype=uint64)

In [6]:
  # poor performance has been observed when adding more than one million records
  # at a time
route_stop_data.to_sql(
  route_stop_table_name, db, if_exists='append',
  chunksize=1000000, index=False)