## Tiger Census
* https://www2.census.gov/geo/pdfs/maps-data/data/tiger/tgrshp2019/TGRSHP2019_TechDoc.pdf
* S1200 - secondary road
* S1100 - primary road
* S1400 - local roads
* Build off scripts/cut_road_segments.py


In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [1]:
import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask import delayed, compute

import geopandas as gpd
import pandas as pd
from calitp_data_analysis.sql import to_snakecase

import geopandas
from segment_speed_utils import helpers
from segment_speed_utils.project_vars import analysis_date
from shared_utils import dask_utils, geography_utils, utils

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/"
SHARED_GCS = f"{GCS_FILE_PATH}shared_data/"


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas


### Tiger - Load Roads

In [3]:
def load_roads(road_type_wanted: list, buffer_or_not: bool = False)-> gpd.GeoDataFrame:
    """
    Load roads based on what you filter.
    Can also buffer the roads or not.

    Args:
        road_type_wanted (list): the type of roads you want.
        
        https://www2.census.gov/geo/pdfs/maps-data/data/tiger/tgrshp2019/TGRSHP2019_TechDoc.pdf
        buffer_or_not (bool): add a buffer of 200.

    Returns:
        GDF. As of 4/18/23, returns 953914 nunique linearid
    """
    df = gpd.read_parquet(
        f"{SHARED_GCS}all_roads_2020_state06.parquet",
        filters=[("MTFCC", "in", road_type_wanted)],
        columns=["LINEARID", "geometry", "FULLNAME"],
    ).to_crs(geography_utils.CA_NAD83Albers)

    if buffer_or_not:
        df = df.assign(geometry=df.geometry.buffer(200))

    df = to_snakecase(df)

    return df

### GTFS Shapes

In [4]:
def gtfs_stops_operators(date) -> gpd.GeoDataFrame:
    """
    Load stops with operator and
    feed key information.

    Args:
        date: date wanted for the datasets to be drawn from

    Returns:
        GDF
    """
    stops = (
        helpers.import_scheduled_stops(
            date, (), ["feed_key", "stop_id", "stop_key", "geometry"]
        )
        .compute()
        .drop_duplicates()
    )

    stops = stops.set_crs(geography_utils.CA_NAD83Albers)

    # Buffer each stop by 50 feet
    stops = stops.assign(buffered_geometry=stops.geometry.buffer(50))

    # Set geometry
    stops = stops.set_geometry("buffered_geometry")

    # Merge for operator information
    trips = (
        helpers.import_scheduled_trips(analysis_date, (), ["name", "feed_key"])
        .compute()
        .drop_duplicates()
    )
    
    
    m1 = pd.merge(stops, trips, on=["feed_key"], how="left")

    # Fill in na
    m1.name = m1.name.fillna("None")

    return m1

In [5]:
# stops = gtfs_stops_operators(analysis_date)

In [6]:
def gtfs_routes_operators(date) -> gpd.GeoDataFrame:
    """
    Load routes with operator and feed key information.

    Args:
        date: date wanted for the datasets to be drawn from

    Returns:
        GeoDataFrame
    """
    gtfs_shapes = helpers.import_scheduled_shapes(date).compute().drop_duplicates()
    
    gtfs_shapes = gtfs_shapes.set_crs(geography_utils.CA_NAD83Albers)
    
    trips = (
        helpers.import_scheduled_trips(date, (), ["name", "shape_array_key"])
        .compute()
        .drop_duplicates()
    )
    
    m1 = pd.merge(gtfs_shapes, trips, how="outer", on="shape_array_key")

    return m1

In [7]:
def order_operators(date) -> list:
    """
    Re order a list of operators the largest
    ones will be at the top of the list.

    Args:
        date: date wanted for the datasets to be drawn from
    """
    operator_list = (
        helpers.import_scheduled_trips(date, (), ["name"]).compute().sort_values("name")
    )
    operator_list = operator_list.name.unique().tolist()

    # Reorder list so the biggest operators are at the beginning
    # based on NTD services data
    big_operators = [
        "LA DOT Schedule",
        "LA Metro Bus Schedule",
        "LA Metro Rail Schedule",
        "Bay Area 511 Muni Schedule",
        "Bay Area 511 AC Transit Schedule",
        "Bay Area 511 Santa Clara Transit Schedule",
        "Bay Area 511 BART Schedule",
        "San Diego Schedule",
        "OCTA Schedule",
        "Sacramento Schedule",
        "Bay Area 511 Sonoma-Marin Area Rail Transit Schedule",
        "Bay Area 511 SFO AirTrain Schedule",
        "Bay Area 511 South San Francisco Shuttle Schedule",
        "Bay Area 511 Marin Schedule",
        "Bay Area 511 County Connection Schedule",
        "Bay Area 511 MVGO Schedule",
        "Bay Area 511 Commute.org Schedule",
        "Bay Area 511 Union City Transit Schedule",
        "Bay Area 511 BART Schedule",
        "Bay Area 511 Caltrain Schedule",
        "Bay Area 511 Fairfield and Suisun Transit Schedule",
        "Bay Area 511 Dumbarton Express Schedule",
        "Bay Area 511 SamTrans Schedule",
        "Bay Area 511 Vine Transit Schedule",
        "Bay Area 511 Tri-Valley Wheels Schedule",
        "Bay Area 511 Sonoma County Transit Schedule",
        "Bay Area 511 Santa Rosa CityBus Schedule",
        "Bay Area 511 Golden Gate Transit Schedule",
        "Bay Area 511 Golden Gate Ferry Schedule",
        "Bay Area 511 San Francisco Bay Ferry Schedule",
        "Bay Area 511 SolTrans Schedule",
        "Bay Area 511 ACE Schedule",
        "Bay Area 511 Emery Go-Round Schedule",
        "Bay Area 511 Tri Delta Schedule",
        "Bay Area 511 Petaluma Schedule",
        "Bay Area 511 Capitol Corridor Schedule",
    ]

    # Delete off the big operators
    operator_list = list(set(operator_list) - set(big_operators))

    # Add back in the operators
    final_list = big_operators + operator_list

    return final_list

### Tiger Local Roads

#### Cut all roads - stops 1st then routes 
* Use some small operators to test.

In [8]:
def loop_sjoin(date, local_roads_gdf, gdf_routes_stops) -> gpd.GeoDataFrame:
    
    """
    By operator, sjoin its routes/stops to 
    local roads gdf. Delete off any linear ids that are joined
    to save time/memory.
    
    Args:
        local_roads_gdf: local roads gdf (should be buffered roads).
        gdf_routes_stops: stops or routes gdf
        date: date wanted for the datasets to be drawn from

    Returns:
        GDF
    """
    # Empty dataframe
    sjoin_full_results = pd.DataFrame()
    
    # Find all unique operators, ordered by largest operators first
    operators_list = order_operators(date)
    
    # Loop through and sjoin by operator
    for operator in operators_list:
        shapes_filtered = gdf_routes_stops.loc[
            gdf_routes_stops.name == operator
        ].reset_index(drop=True)

        # Delete any local road linear ids that have already been found by an operator
        try:
            # List of linear IDS
            linearid_to_delete = sjoin_full_results.linearid.unique().tolist()
            
            # Filter out the linear IDS in buffered local roads
            local_roads_gdf = local_roads_gdf[
                ~local_roads_gdf.linearid.isin(linearid_to_delete)
            ].reset_index(drop=True)
        except:
            pass

        sjoin1 = (
            gpd.sjoin(
                local_roads_gdf,
                shapes_filtered,
                how="inner",
                predicate="intersects",
            )[["linearid"]]
            .drop_duplicates()
            .reset_index(drop=True)
        )

        sjoin_full_results = pd.concat([sjoin_full_results, sjoin1], axis=0)

    sjoin_full_results = sjoin_full_results.drop_duplicates()
    
    return sjoin_full_results

In [9]:
def sjoin_stops(buffered_roads, original_roads, date):
    """
    Sjoin stops to local roads.
    
    Returns:
        A list of linear IDs that have already
        been found and a GDF.
    """
    start = datetime.datetime.now()
    
    # Load stops
    gtfs_stops = gtfs_stops_operators(date)
 
    # Loop through and sjoin by operator
    stops_sjoin = loop_sjoin(date, buffered_roads, gtfs_stops)

    # Merge back to original local roads 
    m1 = pd.merge(original_roads, stops_sjoin, on="linearid", how="inner")

    # Fill in null values for fullname
    m1.fullname = m1.fullname.fillna("None")
    
    # Find linear ids to delete
    linearid_to_delete = m1.linearid.unique().tolist()
    
    # Save
    m1.to_parquet(f"{SHARED_GCS}local_roads_stops_sjoin.parquet")
    
    end = datetime.datetime.now()
    print(f"Done with sjoin with stops with local roads. Time lapsed: {end-start}")
    
    return m1, linearid_to_delete

In [10]:
def sjoin_routes(buffered_roads, original_roads, date, linearid_to_filter:list):
    start = datetime.datetime.now()
    
    # Load stops
    gtfs_routes = gtfs_routes_operators(date)
    
    # Delete out linear ids that have already been found
    local_roads_buffered = buffered_roads[~buffered_roads.linearid.isin(linearid_to_filter)].reset_index(drop=True)
    local_roads_og = original_roads[~original_roads.linearid.isin(linearid_to_filter)].reset_index(drop=True)
    
    routes_sjoin = loop_sjoin(date, local_roads_buffered, gtfs_routes)
    
    # Merge back to original local roads 
    m1 = pd.merge(local_roads_og, routes_sjoin, on="linearid", how="inner")

    # Fill in null values for fullname
    m1.fullname = m1.fullname.fillna("None")
    
    # Save
    m1.to_parquet(f"{SHARED_GCS}local_roads_routes_sjoin.parquet")
    
    end = datetime.datetime.now()
    print(f"Done with sjoin with routes and local roads. Time lapsed: {end-start}")
    
    return m1

In [11]:
def sjoin_local_roads(date):
    """
    Sjoin local roads with stops first, then routes.
    """
    start = datetime.datetime.now()
    print(start)
    
    # Load local roads - buffered
    local_roads_buffered = load_roads(["S1400"], True)
    
    # Load local roads - not buffered
    local_roads_og = load_roads(["S1400"], False)
    
    # Deal with stops first
    stops_sjoin, linear_id_stops = sjoin_stops(local_roads_buffered, local_roads_og, date)
    
    # Move onto routes
    routes_sjoin = sjoin_routes(local_roads_buffered, local_roads_og, date, linear_id_stops)
    
    # Stack
    all_local_roads = pd.concat([stops_sjoin, routes_sjoin], axis=0)
    
    all_local_roads.to_parquet(f"{SHARED_GCS}local_roads_all_routes_stops_sjoin.parquet")
    end = datetime.datetime.now()
    
    print(f"Done with doing an sjoin w/ all local roads. Time lapsed: {end-start}")
    return all_local_roads

In [12]:
# all_ops = cut_local_roads(analysis_date)

In [13]:
# all_ops.shape

In [14]:
#  all_ops.sample()

In [15]:
# all_ops.plot()

In [16]:
def chunk_dask_df(gdf, chunk_row_size:int):
    """
    Break up dataframes by a certain 
    number of rows, turn them into a dask
    dataframe
    
    Args:
        gdf: the local roads that intersect w/ stops and routes
        chunk_row_size(int): how many rows each dataframe should
        be after splitting it out.
    
    Returns:
        List of dask dataframes. Length of how many dask dataframes
        are returned after cutting.
    """
    # Specify how many rows I want the gdf to broken into per df
    n = chunk_row_size
    
    # Break it out
    list_df = [gdf[i:i+n] for i in range(0,gdf.shape[0],n)]
    
    # Turn each dataframe to a dask one
    my_ddfs = []
    for df in list_df:
        ddf = dd.from_pandas(df, npartitions= 1)
        my_ddfs.append(ddf)
    
    return my_ddfs, len(my_ddfs)

In [17]:
# ddfs, length = chunk_dask_df(all_ops, 10000)

In [18]:
# len(ddfs)

In [19]:
# type(ddfs)

In [20]:
# type(ddfs[0]), type(ddfs[15])

In [21]:
def dask_segment(ddf_list:list, ddfs_range: list)-> gpd.GeoDataFrame:
    """
    Use dask to cut roads into segments.
    
    Args:
        ddf_list: dask dataframes stored in a list. 
        ddf_list[0] will yield a ddf.
        
        ddfs_range: how many items are in the ddf_list.
    
    Returns:
        A GDF.
    """
    # Empty dataframe
    my_results = []
    
    # For each dask dataframe int the list
    # cut them and append the results into the empty df.
    for i in ddfs_range:
        my_df = ddf_list[i]
        cut_geometry = delayed(geography_utils.cut_segments)(my_df, ["linearid", "fullname"], 1_000)
        my_results.append(cut_geometry)
        print(f"done with {i}")
        
    # Compute results into a normal gdf
    compute_results = [compute(i)[0] for i in my_results] 
    
    # Concat results
    results_gdf = pd.concat(compute_results)
    
    return results_gdf

In [22]:
# test = dask_segment(ddfs, [0,1])

In [23]:
# test.shape

In [24]:
def cut_local_roads(date, chunk_row_size: int)-> gpd.GeoDataFrame:
    start = datetime.datetime.now()
    print(f"Cut: local roads {start}")
    
    # Find all local roads that intersect with
    # stops and routes.
    local_roads_unsegmented = sjoin_local_roads(date)
    
    # Divide the gdf into equal sized chunks (roughly)
    # and turn them into dask gdfs
    ddfs, length = chunk_dask_df(local_roads_unsegmented, chunk_row_size)
    
    # Split the list of split dask dataframes
    # into half.
    length_list = [*range(0,length)]
    ddf1 = length_list[:len(length_list)//2]
    ddf2 = length_list[len(length_list)//2:]
    
    # Cut geometry
    part1 = dask_segment(ddfs, ddf1)
    part1.to_parquet(f"{SHARED_GCS}segmented_local_rds_first_pt.parquet")
    print("Done with cutting part1")
    
    part2 = dask_segment(ddfs, ddf2)
    part2.to_parquet(f"{SHARED_GCS}segmented_local_rds_second_pt.parquet")
    print("Done with cutting part2")
    
    segmented_local_roads = pd.concat([part1, part1])
    segmented_local_roads.to_parquet(f"{SHARED_GCS}segmented_local_rds.parquet")
    
    end = datetime.datetime.now()
    print(f"Done cutting local roads in {end-start} minutes")
    return segmented_local_roads

In [25]:
# test = cut_local_roads(analysis_date, 10000)

### Concat local roads and primary/secondary ones

In [26]:
def cut_primary_secondary_roads():
    start = datetime.datetime.now()
    print(f"Cutting primary/secondary roads {start}")

    # Find all primary and secondary roads
    # regardless of intersection w/ GTFS shapes
    primary_secondary_mtfcc = ["S1100", "S1200"]
    primary_secondary_roads = load_roads(primary_secondary_mtfcc)

    segments = geography_utils.cut_segments(
        primary_secondary_roads, ["linearid", "fullname"], 1_000  # 1 km segments
    )

    segments.to_parquet(f"{SHARED_GCS}segmented_primary_secondary_roads.parquet")

    end = datetime.datetime.now()
    print(f"Done cutting primary & secondary roads: {end-start}")
    return segments

In [27]:
# primary_secondary = cut_primary_secondary_roads()

In [28]:
def cut_all_roads(date, chunk_row_size):
    """
    Takes about 1.5 hours.
    """
    start = datetime.datetime.now()
    print(f"Cutting all local roads/primary/secondary roads {start}")
    
    # Find local roads that intersect  with GTFS shapes, then
    # segment them
    local_roads_gdf = cut_local_roads(date, chunk_row_size)
    
     # Segment primary and secondary roads
    segmented_primary_secondary_rds = cut_primary_secondary_roads()

    # Concat
    all_roads = pd.concat(
        [segmented_primary_secondary_rds, local_roads_gdf], axis=0
    )
    all_roads.to_parquet(f"{SHARED_GCS}segmented_all_roads.parquet")

    end = datetime.datetime.now()
    print(f"time lapsed for cutting all roads: {end-start}")

In [30]:
# all_roads = cut_all_roads(analysis_date, 10_000)

In [37]:
all_tiger = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/all_roads_2020_state06.parquet")

In [38]:
all_tiger.LINEARID.nunique()

1064498

In [40]:
all_tiger.columns

Index(['LINEARID', 'FULLNAME', 'RTTYP', 'MTFCC', 'geometry'], dtype='object')

In [47]:
all_tiger.shape

(1065311, 5)

In [44]:
all_tiger.groupby(['MTFCC']).agg({'LINEARID':'nunique', 'FULLNAME':'nunique'}).head(3)

Unnamed: 0_level_0,LINEARID,FULLNAME
MTFCC,Unnamed: 1_level_1,Unnamed: 2_level_1
S1100,1247,281
S1200,6116,2008
S1400,946551,264858


In [31]:
all_roads = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/segmented_all_roads.parquet")

In [46]:
all_roads.shape

(462112, 4)

In [45]:
all_roads.linearid.nunique(), all_roads.fullname.nunique()

(167326, 70645)

In [39]:
all_roads.linearid.nunique()

167326

In [57]:
# all_roads.head(50).explore('segment_sequence', cmap = 'tab20c', style_kwds = {'weight':10})

In [50]:
all_roads.linearid.value_counts().head(20)

11019653760031    406
11020508601175    302
1104747998929     301
1104262103639     275
11018382472869    274
1105640135361     251
1105640135753     251
1104485962201     250
1104747998956     250
1104262558672     215
1108475766043     215
1106039345433     210
110416069762      183
1106039348204     174
1101917542965     165
11010927752153    155
1104755709990     149
1104755709991     149
110411099535      146
11019640550400    141
Name: linearid, dtype: int64

In [32]:
primary_secondary_segmented = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/segmented_primary_secondary_roads.parquet")

In [34]:
local = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/all_segmented_local_rds.parquet")

In [36]:
local.columns

Index(['geometry', 'linearid', 'fullname', 'segment_sequence'], dtype='object')

In [55]:
local.linearid.nunique()

159963

### Take #2

In [108]:
# Stops/routes sjoined to local roads - originally has 322720 roads. 
# Sample 100
sjoin_local_rds = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/local_roads_all_routes_stops_sjoin.parquet").head(100)

In [109]:
sjoin_local_rds.shape

(100, 3)

In [110]:
sjoin_local_rds_ddf = dd.from_pandas(sjoin_local_rds, npartitions= 1)

In [111]:
type(sjoin_local_rds_ddf), len(sjoin_local_rds_ddf)

(dask_geopandas.core.GeoDataFrame, 100)

In [112]:
segmented_ddf = delayed(geography_utils.cut_segments)(sjoin_local_rds_ddf, ["linearid", "fullname"], 1_000)

In [113]:
my_list = [segmented_ddf]

In [114]:
type(my_list)

list

In [115]:
results2 = [compute(i)[0] for i in my_list]

In [116]:
type(results2)

list

In [117]:
ddf = dd.multi.concat(results2, axis=0).reset_index(drop=True)

In [118]:
ddf = dd.from_pandas(ddf, npartitions= 1)

In [119]:
type(ddf)

dask_geopandas.core.GeoDataFrame

In [120]:
ddf2 = ddf.repartition(partition_size="85MB")

In [121]:
ddf2.to_parquet(f"{SHARED_GCS}daskutilstest")

In [122]:
gddf = dg.read_parquet(f"{SHARED_GCS}daskutilstest/")

In [123]:
dask_utils.concat_and_export(SHARED_GCS, "daskutilstest", filetype="gdf")

Read in gs://calitp-analytics-data/data-analyses/shared_data/daskutilstest
Save out gs://calitp-analytics-data/data-analyses/shared_data/daskutilstest.parquet


In [124]:
test = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/shared_data/daskutilstest.parquet")

In [125]:
type(test)

geopandas.geodataframe.GeoDataFrame

In [126]:
test.shape

(125, 4)

In [127]:
all_tiger.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1065311 entries, 0 to 5216
Data columns (total 5 columns):
 #   Column    Non-Null Count    Dtype   
---  ------    --------------    -----   
 0   LINEARID  1065311 non-null  object  
 1   FULLNAME  653916 non-null   object  
 2   RTTYP     653916 non-null   object  
 3   MTFCC     1065311 non-null  object  
 4   geometry  1065311 non-null  geometry
dtypes: geometry(1), object(4)
memory usage: 48.8+ MB


In [137]:
all_roads[all_roads.fullname == "N Milpitas Blvd"]

Unnamed: 0,geometry,linearid,fullname,segment_sequence
10785,"LINESTRING (-168933.860 -60072.680, -168944.871 -60044.772)",1104485402143,N Milpitas Blvd,0
10785,"LINESTRING (-168933.860 -60072.680, -168944.871 -60044.772)",1104485402143,N Milpitas Blvd,0


In [139]:
all_roads[all_roads.linearid == "1105598251022"]

Unnamed: 0,geometry,linearid,fullname,segment_sequence


In [148]:
cesar_chavez = all_tiger[all_tiger.FULLNAME == "Cesar Chavez"].to_crs(geography_utils.CA_StatePlane)

In [149]:
cesar_chavez.geometry.length

539    5626.69
1567    235.39
1828   6414.47
2579    807.01
dtype: float64

In [141]:
all_tiger[all_tiger.FULLNAME == "N Milpitas Blvd"]

Unnamed: 0,LINEARID,FULLNAME,RTTYP,MTFCC,geometry
124,1104485402143,N Milpitas Blvd,M,S1400,"LINESTRING (-121.91265 37.46065, -121.91278 37.46090)"
612,1105598251022,N Milpitas Blvd,M,S1400,"LINESTRING (-121.89952 37.43223, -121.89958 37.43238, -121.89978 37.43298, -121.89990 37.43325, -121.89999 37.43339, -121.90038 37.43399, -121.90055 37.43424, -121.90061 37.43437, -121.90068 37.43452, -121.90082 37.43491, -121.90091 37.43547, -121.90099 37.43610, -121.90121 37.43758, -121.90126 37.43795, -121.90137 37.43819, -121.90146 37.43844, -121.90164 37.43878, -121.90173 37.43901, -121.90246 37.43965, -121.90364 37.44021, -121.90423 37.44065, -121.90456 37.44100, -121.90484 37.44144, -121.90503 37.44202, -121.90502 37.44224, -121.90504 37.44252, -121.90506 37.44275, -121.90514 37.44297, -121.90526 37.44319, -121.90543 37.44346, -121.90562 37.44376, -121.90593 37.44408, -121.90631 37.44437, -121.90663 37.44465, -121.90732 37.44525, -121.90781 37.44560, -121.90857 37.44621, -121.90862 37.44626, -121.90893 37.44667, -121.90914 37.44708, -121.90966 37.44920, -121.90986 37.44998, -121.90997 37.45052, -121.90998 37.45060, -121.91004 37.45084, -121.91006 37.45092, -121.91008 37.45103, -121.91014 37.45134, -121.91016 37.45145, -121.91021 37.45166, -121.91034 37.45230, -121.91038 37.45251, -121.91046 37.45293, -121.91048 37.45298, -121.91050 37.45311, -121.91066 37.45376, -121.91084 37.45470, -121.91099 37.45532, -121.91103 37.45536, -121.91118 37.45608, -121.91136 37.45690, -121.91149 37.45749, -121.91151 37.45762, -121.91152 37.45766, -121.91160 37.45801, -121.91163 37.45817, -121.91167 37.45834, -121.91170 37.45847, -121.91174 37.45866, -121.91179 37.45886, -121.91182 37.45899, -121.91185 37.45916, -121.91193 37.45937, -121.91199 37.45954, -121.91206 37.45968, -121.91207 37.45970, -121.91217 37.45985, -121.91225 37.45996, -121.91245 37.46029, -121.91265 37.46065)"


In [146]:
all_tiger[all_tiger.LINEARID == "1104485402143"].explore(style_kwds={'weight':10})

In [130]:
dask_utils.compute_and_export(
    results = my_list, 
    gcs_folder = SHARED_GCS,
    file_name = "utils_test",
    export_single_parquet = True)


AttributeError: 'GeoDataFrame' object has no attribute 'repartition'