In [18]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?
import sys

from siuba import *
import pandas as pd
import geopandas as gpd
import datetime as dt

from rt_analysis import rt_parser
from rt_analysis import rt_filter_map_plot

import shared_utils

In [2]:
import datetime as dt
import os
import re
import time
from pathlib import Path
from typing import Union

import branca
import dask_geopandas as dg
import folium
import gcsfs
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
import siuba  # need for type hints
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.tables import tbls
from numba import jit
from shared_utils import geography_utils, gtfs_utils, gtfs_utils_v2, map_utils, utils
from siuba import *

# Migrate existing rt_analysis to use the v2 warehouse 

* mostly just changing queries around

In [3]:
fs = gcsfs.GCSFileSystem()

# set system time
os.environ["TZ"] = "America/Los_Angeles"
time.tzset()

GCS_PROJECT = "cal-itp-data-infra"
BUCKET_NAME = "calitp-analytics-data"
BUCKET_DIR = "data-analyses/rt_delay"
GCS_FILE_PATH = f"gs://{BUCKET_NAME}/{BUCKET_DIR}/"
EXPORT_PATH = f"{GCS_FILE_PATH}cached_views/"

In [4]:
# Datetime formats
DATE_WEEKDAY_FMT = "%b %d (%a)"  # Jun 01 (Wed) for 6/1/22
MONTH_DAY_FMT = "%m_%d"  # 6_01 for 6/1/22
HOUR_MIN_FMT = "%H:%M"  # 08:00 for 8 am, 13:00 for 1pm
HOUR_MIN_SEC_FMT = "%H:%M:%S"  # 08:15:05 for 8:15 am + 5 sec, 13:15:05 for 1:15pm + 5 sec
FULL_DATE_FMT = "%Y-%m-%d"  # 2022-06-01 for 6/1/22

In [5]:
def check_cached(
    filename: str,
    GCS_FILE_PATH: Union[str, Path] = GCS_FILE_PATH,
    subfolder: Union[str, Path] = "cached_views/",
) -> Union[str, Path]:
    """
    Check GCS bucket to see if a file already is there.
    Returns the path, if it exists.

    GCS_FILE_PATH: Defaults to gs://calitp-analytics-data/data-analyses/rt_delay/
    """
    path = f"{GCS_FILE_PATH}{subfolder}{filename}"
    if fs.exists(path):
        return path
    else:
        return None

In [6]:
def convert_ts(ts: int) -> dt.datetime:
    pacific_dt = dt.datetime.fromtimestamp(ts)
    return pacific_dt

In [7]:
def get_vehicle_positions(
    itp_id: int, analysis_date: dt.date, export_path: Union[str, Path] = EXPORT_PATH
) -> pd.DataFrame:
    """
    itp_id: an itp_id (string or integer)
    analysis_date: datetime.date

    Interim function for getting complete vehicle positions data for a
    single operator on a single date of interest.
    To be replaced as RT views are implemented...

    Currently drops positions for day after analysis date after 2AM,
    temporary fix to balance capturing trips crossing
    midnight with avoiding duplicates...
    """

    next_date = analysis_date + dt.timedelta(days=1)
    date_str = analysis_date.strftime(FULL_DATE_FMT)

    start = dt.datetime.combine(analysis_date, dt.time(0))
    end = start + dt.timedelta(days=1, seconds=2 * 60**2)

    filename = f"vp_{itp_id}_{date_str}.parquet"
    path = check_cached(filename)

    # these times should now be Pacific?
    st_combined = dt.datetime.combine(analysis_date, dt.time(0))
    st_ts_utc = int(st_combined.timestamp())
    end_combined = dt.datetime.combine(analysis_date + dt.timedelta(days=1), dt.time(2))
    end_ts_utc = int(end_combined.timestamp())

    if path:
        print("found parquet")
        return pd.read_parquet(path)
    else:
        df = query_sql(
            f"""
        SELECT calitp_itp_id, calitp_url_number,
        timestamp AS vehicle_timestamp,
        vehicle_label AS entity_id, vehicle_id,
        trip_id, longitude AS vehicle_longitude, latitude AS vehicle_latitude
        FROM `cal-itp-data-infra.staging.stg_rt__vehicle_positions`
        WHERE calitp_itp_id = {itp_id} AND date IN ("{analysis_date}", "{next_date}")
        AND timestamp > {st_ts_utc}
        AND timestamp < {end_ts_utc}
        """
        )

        df = df >> distinct(_.trip_id, _.vehicle_timestamp, _keep_all=True)
        df = df.dropna(subset=["vehicle_timestamp"])
        assert not df.empty, f"no vehicle positions data found for {date_str}"
        df.vehicle_timestamp = df.vehicle_timestamp.apply(convert_ts)
        # header timestamp not present in staging, add upstream if desired
        # df.header_timestamp = df.header_timestamp.apply(convert_ts)
        df = df >> filter(_.vehicle_timestamp > start, _.vehicle_timestamp < end)

        # assert df.vehicle_timestamp.min() < dt.datetime.combine(analysis_date, dt.time(0)), 'rt data starts after analysis date'
        # assert dt.datetime.combine(analysis_date, dt.time(hour=23, minute=59)) < df.vehicle_timestamp.max(), 'rt data ends early on analysis date'
        # if not df.vehicle_timestamp.min() < dt.datetime.combine(analysis_date, dt.time(0)):
        #     warnings.warn('rt data starts after analysis date')
        # if not dt.datetime.combine(end) < df.vehicle_timestamp.max():
        #     warnings.warn('rt data ends early on analysis date')

        df.to_parquet(f"{export_path}{filename}")
        return df

In [8]:
ac_v1 = get_vehicle_positions(4, dt.date(2022, 10, 17))

found parquet


In [9]:
ac_v1 >> head(3)

Unnamed: 0,calitp_itp_id,calitp_url_number,vehicle_timestamp,entity_id,vehicle_id,trip_id,vehicle_longitude,vehicle_latitude
0,4,0,2022-10-17 00:59:37,,1361,783020,-122.27368,37.80517
1,4,0,2022-10-17 00:59:42,,2251,14070010,-122.15236,37.743004
2,4,0,2022-10-17 00:59:40,,2245,13273010,-122.29322,37.838596


In [10]:
provider = (tbls.mart_transit_database.dim_provider_gtfs_data()
            >> collect()
           )

In [11]:
provider.columns

Index(['key', 'guidelines_assessed', 'reports_site_assessed',
       'organization_key', 'organization_name', 'organization_itp_id',
       'organization_hubspot_company_record_id', 'organization_ntd_id',
       'organization_source_record_id', 'service_key', 'service_name',
       'service_source_record_id', 'gtfs_service_data_customer_facing',
       'regional_feed_type', 'associated_schedule_gtfs_dataset_key',
       'schedule_gtfs_dataset_name', 'schedule_source_record_id',
       'service_alerts_gtfs_dataset_name', 'service_alerts_source_record_id',
       'vehicle_positions_gtfs_dataset_name',
       'vehicle_positions_source_record_id', 'trip_updates_gtfs_dataset_name',
       'trip_updates_source_record_id', 'schedule_gtfs_dataset_key',
       'service_alerts_gtfs_dataset_key', 'vehicle_positions_gtfs_dataset_key',
       'trip_updates_gtfs_dataset_key', '_valid_from', '_valid_to',
       '_is_current'],
      dtype='object')

In [8]:
analysis_date = dt.date(2023, 2, 17)

In [14]:
import pytz

In [15]:
analysis_dt = dt.datetime.combine(analysis_date, dt.time(0,0))
analysis_dt = analysis_dt.replace(tzinfo=pytz.UTC)

In [16]:
## TODO unexpected regional feed for 10.17? (AC Transit 4)
## may need own always ditch regional type logic here, or not use reports_site_assessed?

In [17]:
## is mtc regional feed...
# tbls.mart_transit_database.dim_gtfs_datasets() >> filter(_.key == 'f5670225b34b10c22339915c7bce3707') >> head(3)

In [18]:
one_org = provider >> filter(_.organization_itp_id == 4,
                  _.vehicle_positions_gtfs_dataset_key,
                  _.reports_site_assessed,
                  _._valid_from < analysis_dt,
                  _._valid_to >= analysis_dt)

In [19]:
one_org

Unnamed: 0,key,guidelines_assessed,reports_site_assessed,organization_key,organization_name,organization_itp_id,organization_hubspot_company_record_id,organization_ntd_id,organization_source_record_id,service_key,...,vehicle_positions_source_record_id,trip_updates_gtfs_dataset_name,trip_updates_source_record_id,schedule_gtfs_dataset_key,service_alerts_gtfs_dataset_key,vehicle_positions_gtfs_dataset_key,trip_updates_gtfs_dataset_key,_valid_from,_valid_to,_is_current
11829,77955416b0ce88db34b68e56f44bc6dd,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,d1444c8a6478da002d65324246682885,...,recIAk0ojKEvO8yl5,Bay Area 511 AC Transit Trip Updates,recFOyTNrrrVpozMy,444700afe086ed24e3cb888cecd3037c,,4bb240cf480589e58c6e58d06d6fb72c,6aba5c314d297bcedd7ec86fcaf9361c,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False
11832,398354c595b11c671c7fb7913aa93950,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,b0aad71e4c04e4c647b5d36b07e5cb63,...,recIAk0ojKEvO8yl5,Bay Area 511 AC Transit Trip Updates,recFOyTNrrrVpozMy,444700afe086ed24e3cb888cecd3037c,,4bb240cf480589e58c6e58d06d6fb72c,6aba5c314d297bcedd7ec86fcaf9361c,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False
11838,4e486663e81b6dba84fdf0d9d6f67f64,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,26963c5207c102bbdca4777e580765fd,...,recIAk0ojKEvO8yl5,Bay Area 511 AC Transit Trip Updates,recFOyTNrrrVpozMy,444700afe086ed24e3cb888cecd3037c,,4bb240cf480589e58c6e58d06d6fb72c,6aba5c314d297bcedd7ec86fcaf9361c,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False
12383,860dc5fe4888c4a3f431dc4dd551159f,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,4b74478a08534933b97246e56f065742,...,rec31PoLtsVT9kDeb,Bay Area 511 Dumbarton Express TripUpdates,recCsrEnH5Bjgmbfm,d7dcb48dbed269f345db12d1884a9e33,,6d1991ecbdee1e06031cda8761757f03,5c3e65766dda65958cf4da845286c0d5,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False


In [20]:
distinct_vp_feeds = one_org >> distinct(_.vehicle_positions_gtfs_dataset_key, _keep_all=True)
distinct_vp_feeds

Unnamed: 0,key,guidelines_assessed,reports_site_assessed,organization_key,organization_name,organization_itp_id,organization_hubspot_company_record_id,organization_ntd_id,organization_source_record_id,service_key,...,vehicle_positions_source_record_id,trip_updates_gtfs_dataset_name,trip_updates_source_record_id,schedule_gtfs_dataset_key,service_alerts_gtfs_dataset_key,vehicle_positions_gtfs_dataset_key,trip_updates_gtfs_dataset_key,_valid_from,_valid_to,_is_current
0,77955416b0ce88db34b68e56f44bc6dd,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,d1444c8a6478da002d65324246682885,...,recIAk0ojKEvO8yl5,Bay Area 511 AC Transit Trip Updates,recFOyTNrrrVpozMy,444700afe086ed24e3cb888cecd3037c,,4bb240cf480589e58c6e58d06d6fb72c,6aba5c314d297bcedd7ec86fcaf9361c,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False
1,860dc5fe4888c4a3f431dc4dd551159f,False,True,87a88b5598e8666c1298092f2cc5df8e,Alameda-Contra Costa Transit District,4.0,1880690867,90014,recOZgevYf7Jimm9L,4b74478a08534933b97246e56f065742,...,rec31PoLtsVT9kDeb,Bay Area 511 Dumbarton Express TripUpdates,recCsrEnH5Bjgmbfm,d7dcb48dbed269f345db12d1884a9e33,,6d1991ecbdee1e06031cda8761757f03,5c3e65766dda65958cf4da845286c0d5,2023-02-07 00:00:00+00:00,2023-03-13 23:59:59.999999+00:00,False


In [21]:
st_date = analysis_date
end_date = analysis_date + dt.timedelta(days=1)

In [22]:
ac_v1.columns

Index(['calitp_itp_id', 'calitp_url_number', 'vehicle_timestamp', 'entity_id',
       'vehicle_id', 'trip_id', 'vehicle_longitude', 'vehicle_latitude'],
      dtype='object')

In [23]:
##TODO somehow support unscheduled trips?
org_vl = (tbls.mart_gtfs.fct_vehicle_locations() >> filter((_.dt == st_date) | (_.dt == end_date))
         >> filter(_.gtfs_dataset_key.isin(distinct_vp_feeds.vehicle_positions_gtfs_dataset_key))
         >> select(_.gtfs_dataset_key, _.vehicle_timestamp, _.vehicle_id, _.trip_id,
                  _.position_longitude, _.position_latitude)
         >> collect()
         )



In [24]:
org_vl >> head(3)

Unnamed: 0,gtfs_dataset_key,vehicle_timestamp,vehicle_id,trip_id,position_longitude,position_latitude
0,4bb240cf480589e58c6e58d06d6fb72c,2023-02-16 23:59:39+00:00,1217,12899020,-122.35482,37.963722
1,4bb240cf480589e58c6e58d06d6fb72c,2023-02-16 23:59:39+00:00,2211,6630020,-122.04145,37.5321
2,4bb240cf480589e58c6e58d06d6fb72c,2023-02-16 23:59:39+00:00,7032,948020,-122.26163,37.8108


In [25]:
org_vl.columns

Index(['gtfs_dataset_key', 'vehicle_timestamp', 'vehicle_id', 'trip_id',
       'position_longitude', 'position_latitude'],
      dtype='object')

In [26]:
gdf = gpd.GeoDataFrame(org_vl, geometry=gpd.points_from_xy(org_vl.position_longitude, org_vl.position_latitude),
                crs = shared_utils.geography_utils.WGS84)

In [27]:
## (gdf >> select(_.geometry, _.trip_id) >> head(1000)).explore()

In [28]:
import sys

In [29]:
new_size = sys.getsizeof(org_vl)
new_size

380786665

In [30]:
old_size = sys.getsizeof(ac_v1)
old_size

345707533

In [31]:
## OK after selecting similar subset of columns :) 
new_size / old_size

1.1014705456244716

## New Trips Query

In [32]:
old_trips = shared_utils.rt_utils.get_trips(4, dt.date(2022, 10, 17))

found parquet


In [33]:
old_trips >> head(3)

Unnamed: 0,calitp_itp_id,calitp_url_number,service_date,trip_key,trip_id,route_id,direction_id,shape_id,calitp_extracted_at,calitp_deleted_at,route_type,route_long_name,route_desc,route_short_name
0,4,1,2022-10-17,-1409619756174269082,5909020,658,0,shp-658-56,2022-08-07,2022-12-04,3,Skyline - Bret Harte - MacArthur,,658
1,4,1,2022-10-17,7637808243967000074,13231020,78,1,shp-78-03,2022-08-07,2022-12-04,3,Fruitvale Bart\ Ferry Terminal,,78
2,4,0,2022-10-17,4286204798464335370,10240020,623,0,shp-623-57,2022-08-07,2022-12-04,3,Irvington High - Horner Jr. High,,623


In [34]:
org_trips = (tbls.mart_gtfs.fct_daily_scheduled_trips()
             >> filter(_.activity_date == analysis_date)
             >> filter(_.gtfs_dataset_key.isin(distinct_vp_feeds.schedule_gtfs_dataset_key))
             >> select(_.trip_key, _.gtfs_dataset_key, _.activity_date,
                       _.trip_id, _.route_id, _.route_short_name,
                       _.shape_id, _.direction_id, _.route_type,
                       _.route_long_name, _.route_desc
                      )
             # no longer need to join in routes, thanks v2 warehouse!
             >> collect()
            )

In [35]:
org_trips >> head(3)

Unnamed: 0,trip_key,gtfs_dataset_key,activity_date,trip_id,route_id,route_short_name,shape_id,direction_id,route_type,route_long_name,route_desc
0,75a9da76738e2839c3c4e80c70ffc51e,444700afe086ed24e3cb888cecd3037c,2023-02-17,9986020,34,34,shp-34-07,1,3,Estudillo - Davis - Meekland,
1,f85e951538208c01928a984343fe05f5,444700afe086ed24e3cb888cecd3037c,2023-02-17,13260020,39,39,shp-39-05,1,3,Skyline - Dimond - Fruitvale,
2,06efdd32d7cc558267a1c9ec5ad8c1d8,444700afe086ed24e3cb888cecd3037c,2023-02-17,6663020,41,41,shp-41-55,0,3,Whitman - Huntwood - Union Landg,


In [36]:
new_size = sys.getsizeof(org_trips)
new_size

3667653

In [37]:
old_size = sys.getsizeof(old_trips)
old_size

3452715

In [38]:
## OK after selecting similar subset of columns :) 
new_size / old_size

1.0622518800422276

## New ST Query

In [22]:
old_st = shared_utils.rt_utils.get_stop_times(4, dt.date(2022, 10, 17))

found parquet


In [23]:
old_st >> head(3)

Unnamed: 0,calitp_itp_id,trip_id,stop_id,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,...,stop_time_key,calitp_deleted_at,stop_sequence,stop_time_continuous_pickup,stop_time_continuous_drop_off,stop_sequence_rank,arrival_ts,departure_ts,trip_key,departure_hour
0,4,12731020,53832,7:51:34,7:51:34,33 PIEDMONT TO HIGHLAND WAY VIA OAKLAND AVE,0,0,15292.32,0,...,6674480576887928016,2022-12-04,57,,,57,28294,28294,4622753192079563450,7
1,4,2699020,51832,12:17:36,12:17:36,72M JACK LONDON SQ VIA SAN PABLO AVE DOWNTOWN ...,0,0,18080.36,0,...,7067786241977062485,2022-12-04,60,,,60,44256,44256,491570519637676407,12
2,4,3198020,3099,18:21:08,18:21:08,57 FOOTHILL SQUARE VIA MACARTHUR BLVD,0,0,18275.13,0,...,4813443666059748000,2022-12-04,57,,,57,66068,66068,-6354043175372643119,18


In [9]:
analysis_date

datetime.date(2023, 2, 17)

In [None]:
## TODO worked for LADOT but runs out of memory here??

In [48]:
org_feed_keys = (tbls.mart_transit_database.dim_provider_gtfs_data()
    >> filter(_._is_current, _.reports_site_assessed,
            _.organization_name == 'Alameda-Contra Costa Transit District')
            ## think more about how to start/persist org level identifiers...
            ## could be an attribute, or in any case leave first index table as sql...
    >> select(_.gtfs_dataset_key == _.schedule_gtfs_dataset_key)
    >> inner_join(_, tbls.mart_gtfs.fct_daily_feed_scheduled_service_summary(), by = 'gtfs_dataset_key')
    >> filter(_.activity_date == analysis_date)
    >> distinct(_.feed_key)
    ## only one in this c`ase
    )

## still required to filter to trips running on date...
trips_day_filtered = (tbls.mart_gtfs.fct_daily_scheduled_trips()
                      >> filter(_.activity_date == analysis_date)
                     )
org_st = (
    org_feed_keys
    >> inner_join(_, tbls.mart_gtfs.dim_stop_times(), on = 'feed_key')
    >> inner_join(_, trips_day_filtered, on = ['feed_key', 'trip_id'])
    >> inner_join(_, (tbls.mart_gtfs.dim_stops() >> select(_.feed_key,
                        _.stop_id, _.stop_name))
                  , on = ['feed_key', 'stop_id'])
    >> select(_.feed_key, _.gtfs_dataset_key, _.trip_id,
             _.stop_id, _.arrival_time, _.departure_time,
             _.timepoint, _.stop_sequence, _.continuous_drop_off,
             _.continuous_pickup)
    )

In [49]:
org_feed_keys ## probably catching regional feed!

Unnamed: 0,feed_key
0,2e0478675d2b2bcd6c93da9354d92755
1,aa047b2e150d2833f6713867b2c4ae71


#### TODO ask in data OH if there's a good way to get from dataset key to feed key...
https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.fct_daily_scheduled_trips#code

In [50]:
org_st = org_st >> collect()

In [51]:
old_st.columns

Index(['calitp_itp_id', 'trip_id', 'stop_id', 'arrival_time', 'departure_time',
       'stop_headsign', 'pickup_type', 'drop_off_type', 'shape_dist_traveled',
       'timepoint', 'calitp_extracted_at', 'calitp_hash', 'stop_time_key',
       'calitp_deleted_at', 'stop_sequence', 'stop_time_continuous_pickup',
       'stop_time_continuous_drop_off', 'stop_sequence_rank', 'arrival_ts',
       'departure_ts', 'trip_key', 'departure_hour'],
      dtype='object')

In [52]:
org_st.columns

Index(['feed_key', 'gtfs_dataset_key', 'trip_id', 'stop_id', 'arrival_time',
       'departure_time', 'timepoint', 'stop_sequence', 'continuous_drop_off',
       'continuous_pickup'],
      dtype='object')

In [53]:
new_size = sys.getsizeof(org_st)
new_size

120812527

In [54]:
old_size = sys.getsizeof(old_st)
old_size

206354241

In [55]:
## hey look this one's smaller :) 
new_size / old_size

0.585461807882107

## New Shapes Query

In [56]:
old_shp = shared_utils.rt_utils.get_routelines(4, dt.date(2022, 10, 17))

In [57]:
old_shp >> head(3)

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,4,0,shp-95-53,"LINESTRING (-180326.410 -34738.218, -180345.10..."
1,4,0,shp-65-12,"LINESTRING (-199094.894 -13747.291, -199076.60..."
2,4,0,shp-86-52,"LINESTRING (-183767.650 -36536.552, -183750.70..."


In [80]:
org_shp = (
    org_feed_keys
    >> inner_join(_, tbls.mart_gtfs.dim_shapes_arrays(), on = 'feed_key')
    >> inner_join(_, (trips_day_filtered >> distinct(
                        _.feed_key, _.shape_id)),
                  on = ['feed_key', 'shape_id'])
    >> select(_.feed_key, _.shape_id, _.pt_array)
    )



In [81]:
org_shp = org_shp >> collect()

In [82]:
def linestring_from_wkt(wkt_list):
    '''
    Use shapely to create linestring geometries from wkt points arrays
    in tables such as dim_shapes_arrays
    
    wkt_list: list of points in wkt format
    '''
    ## shapely 1.85; 2.0 has new top-level from_wkt...
    pt_list = [shapely.wkt.loads(pt) for pt in wkt_list]
    linestring = shapely.geometry.LineString(pt_list)
    return linestring

In [83]:
org_shp['geometry'] = org_shp.pt_array.apply(linestring_from_wkt)

In [84]:
org_shp = org_shp >> select(-_.pt_array)

In [85]:
org_shp

Unnamed: 0,feed_key,shape_id,geometry
0,2e0478675d2b2bcd6c93da9354d92755,shp-239-59,"LINESTRING (-121.97549 37.556958, -121.9753 37..."
1,2e0478675d2b2bcd6c93da9354d92755,shp-65-14,"LINESTRING (-122.246738 37.880352, -122.246696..."
2,2e0478675d2b2bcd6c93da9354d92755,shp-200-16,"LINESTRING (-121.976025 37.557496, -121.975895..."
3,2e0478675d2b2bcd6c93da9354d92755,shp-33-17,"LINESTRING (-122.209505 37.825505, -122.209488..."
4,2e0478675d2b2bcd6c93da9354d92755,shp-72-33,"LINESTRING (-122.27785 37.796567, -122.277628 ..."
...,...,...,...
323,aa047b2e150d2833f6713867b2c4ae71,DB10036,"LINESTRING (-122.1502733 37.395688, -122.15039..."
324,aa047b2e150d2833f6713867b2c4ae71,DB10039,"LINESTRING (-122.0172182 37.5902461, -122.0170..."
325,aa047b2e150d2833f6713867b2c4ae71,DB10045,"LINESTRING (-122.1499304 37.3954966, -122.1496..."
326,aa047b2e150d2833f6713867b2c4ae71,DB0084,"LINESTRING (-122.017422 37.5903968, -122.01741..."
