# Debug geography_utils: unexpected behavior, only returning 1 row

In [1]:
import datetime as dt
import geopandas as gpd
import os
import pandas as pd

os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
from siuba import *

import utils
import shared_utils

E0321 16:27:56.181770062     972 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E0321 16:27:58.964364653     972 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


## Current GTFS shapes

In [2]:
test = shared_utils.geography_utils.make_routes_shapefile(ITP_ID_LIST = [182], 
                                                              alternate_df=None)
test



Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,182,0,100751_FEB22,"LINESTRING (-118.26517 33.92773, -118.26517 33..."
1,182,0,100755_FEB22,"LINESTRING (-118.26510 34.03349, -118.26539 34..."
2,182,0,100756_FEB22,"LINESTRING (-118.26517 33.92773, -118.26517 33..."
3,182,0,100757_FEB22,"LINESTRING (-118.26802 33.95985, -118.26803 33..."
4,182,0,100759_FEB22,"LINESTRING (-118.26517 33.92773, -118.26517 33..."
...,...,...,...,...
691,182,0,940255_FEB22,"LINESTRING (-118.26461 34.03322, -118.26431 34..."
692,182,0,940256_FEB22,"LINESTRING (-118.37529 34.16866, -118.37453 34..."
693,182,0,940257_FEB22,"LINESTRING (-118.26461 34.03322, -118.26431 34..."
694,182,0,960250_FEB22,"LINESTRING (-118.23760 34.05850, -118.23723 34..."


## Historical shapes with `alternate_df` parameter

In [4]:
SELECTED_DATE = dt.date(2021, 10, 22)

la_metro = (tbl.gtfs_schedule_type2.shapes()
 >> filter(_.calitp_itp_id==182)
 >> filter(_.calitp_extracted_at <= SELECTED_DATE, 
           _.calitp_deleted_at > SELECTED_DATE)
 >> collect()
)

la_metro2 = shared_utils.geography_utils.make_routes_shapefile(ITP_ID_LIST=[182], 
                                      CRS="EPSG:4326", alternate_df=la_metro)

la_metro2

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,182,0,100705_SEPT21,"LINESTRING (-118.38284 34.08562, -118.38209 34..."
1,182,0,100706_SEPT21,"LINESTRING (-118.26510 34.03349, -118.26539 34..."
2,182,0,100707_SEPT21,"LINESTRING (-118.38354 34.08209, -118.38336 34..."
3,182,0,100708_SEPT21,"LINESTRING (-118.32602 34.08294, -118.32602 34..."
4,182,0,100709_SEPT21,"LINESTRING (-118.26247 34.03373, -118.26201 34..."
...,...,...,...,...
732,182,0,DSE-HG-DS,"LINESTRING (-118.28728 33.86931, -118.28796 33..."
733,182,0,DSE-HG-HG,"LINESTRING (-118.23821 34.07487, -118.23816 34..."
734,182,0,DSE-US-DS,"LINESTRING (-118.23699 34.05551, -118.23673 34..."
735,182,0,DSE-US-US-CF,"LINESTRING (-118.23911 34.07540, -118.23869 34..."


## Debug previous error (ITP ID: 13)?

Take a look at `ITP_ID==13`. It had spit out an error with a `NoneType` for one of the `shape_id` values.

If it comes up with an error, maybe a check to make sure `shape_id` is not `NoneType` is what's needed?

Seems ok, no errors, but let's address the 2 use cases in `geography_utils`:
1. No `shape_id` present at all, create `shape_id` column and fill with `route_id`
1. Operator has mostly valid `shape_id` values, may occasionally have missing `shape_id`...drop those.


In [5]:
operator = (tbl.gtfs_schedule_type2.shapes()
 >> filter(_.calitp_itp_id==13)
 >> filter(_.calitp_extracted_at <= SELECTED_DATE, 
           _.calitp_deleted_at > SELECTED_DATE)
 >> collect()
)

operator2 = shared_utils.geography_utils.make_routes_shapefile(ITP_ID_LIST=[13], 
                                      CRS="EPSG:4326", alternate_df=operator)

operator2

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,13,0,114,"LINESTRING (-74.47105 40.47332, -74.47109 40.4..."
1,13,0,115,"LINESTRING (-80.25776 25.84955, -80.25977 25.8..."
2,13,0,116,"LINESTRING (-73.99446 40.75033, -73.99434 40.7..."
3,13,0,118,"LINESTRING (-118.23678 34.05618, -118.23591 34..."
4,13,0,119,"LINESTRING (-118.23678 34.05618, -118.22940 34..."
...,...,...,...,...
100,13,0,324,"LINESTRING (-71.05530 42.35231, -71.05479 42.3..."
101,13,0,325,"LINESTRING (-73.99446 40.75033, -73.99306 40.7..."
102,13,0,326,"LINESTRING (-73.99446 40.75033, -73.99306 40.7..."
103,13,0,327,"LINESTRING (-77.00608 38.89695, -77.00608 38.8..."


In [6]:
test2 = shared_utils.geography_utils.make_routes_shapefile(ITP_ID_LIST = [13], 
                                                              alternate_df=None)
test2

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,13,0,101,"LINESTRING (-118.23678 34.05618, -118.23591 34..."
1,13,0,102,"LINESTRING (-117.16958 32.71617, -117.17020 32..."
2,13,0,103,"LINESTRING (-73.99446 40.75033, -73.99446 40.7..."
3,13,0,104,"LINESTRING (-81.14826 32.08341, -81.14506 32.0..."
4,13,0,105,"LINESTRING (-73.99446 40.75033, -73.99306 40.7..."
...,...,...,...,...
144,13,0,86,"LINESTRING (-71.05530 42.35231, -71.05479 42.3..."
145,13,0,90,"LINESTRING (-76.45197 37.02273, -76.45310 37.0..."
146,13,0,92,"LINESTRING (-73.99446 40.75033, -73.99434 40.7..."
147,13,0,94,"LINESTRING (-77.49694 37.61769, -77.49670 37.6..."


In [7]:
# Previously, this error came up when it was run for 2/8/22
SELECTED_DATE = "2022-2-8"

error = (tbl.gtfs_schedule_type2.shapes()
 >> filter(_.calitp_itp_id==13)
 >> filter(_.calitp_extracted_at <= SELECTED_DATE, 
           _.calitp_deleted_at > SELECTED_DATE)
 >> collect()
)

error2 = shared_utils.geography_utils.make_routes_shapefile(ITP_ID_LIST=[13], 
                                      CRS="EPSG:4326", alternate_df=error)

error2

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry
0,13,0,114,"LINESTRING (-74.47105 40.47332, -74.47109 40.4..."
1,13,0,115,"LINESTRING (-80.25776 25.84955, -80.25977 25.8..."
2,13,0,116,"LINESTRING (-73.99446 40.75033, -73.99434 40.7..."
3,13,0,118,"LINESTRING (-118.23678 34.05618, -118.23591 34..."
4,13,0,119,"LINESTRING (-118.23678 34.05618, -118.22940 34..."
...,...,...,...,...
100,13,0,324,"LINESTRING (-71.05530 42.35231, -71.05479 42.3..."
101,13,0,325,"LINESTRING (-73.99446 40.75033, -73.99306 40.7..."
102,13,0,326,"LINESTRING (-73.99446 40.75033, -73.99306 40.7..."
103,13,0,327,"LINESTRING (-77.00608 38.89695, -77.00608 38.8..."
