# State Highway Network postmiles to highway segment

Outline approach using a single highway-direction within one district.

In [1]:
import geopandas as gpd
import pandas as pd
import shapely

from utils import PROCESSED_GCS
from shared_utils.shared_data import GCS_FILE_PATH as SHARED_GCS

In [2]:
gdf = gpd.read_parquet(
    f"{SHARED_GCS}state_highway_network_postmiles.parquet",
    columns = ["route", "direction", "odometer", "pm", "geometry"],
    filters = [[
        ("district", "==", 7), 
        ("route", "==", 60), 
        ("direction", "==", "WB")
    ]]
)

In [3]:
gdf.explore("pm", tiles = "CartoDB Positron")

In [4]:
highway_group_cols = ["route", "direction"]

gdf = gdf.sort_values(
    highway_group_cols + ["odometer"]
).reset_index(drop=True)

In [5]:
gdf.head()

Unnamed: 0,route,direction,odometer,pm,geometry
0,60,WB,0.0,0.0,POINT (-118.22874 34.02871)
1,60,WB,0.1,0.1,POINT (-118.22696 34.02906)
2,60,WB,0.174,0.174,POINT (-118.22564 34.02933)
3,60,WB,0.174,0.0,POINT (-118.22562 34.02934)
4,60,WB,0.274,0.1,POINT (-118.22382 34.02953)


In [6]:
# What's up with rows 2 and 3...where odometer is the same value, 
# but pm has different values?
gpd.read_parquet(
    f"{SHARED_GCS}state_highway_network_postmiles.parquet",
    filters = [[
        ("district", "==", 7), 
        ("route", "==", 60), 
        ("direction", "==", "WB"), 
        ("odometer", ">=", 0.1), ("odometer", "<=", 0.5),
    ]]
)

Unnamed: 0,route,rtesuffix,routes,pmrouteid,county,district,pmprefix,pm,pmsuffix,pmc,odometer,pminterval,hwysegment,aligncode,routetype,direction,pmoffset,geometry
0,60,,60,LA.060...L,LA,7,,0.3,,0.3,0.474,0.1,Mid Segment,Left,State,WB,0.3,POINT (-118.22013 34.02944)
1,60,,60,LA.060...L,LA,7,,0.1,,0.1,0.274,0.1,Mid Segment,Left,State,WB,0.1,POINT (-118.22382 34.02953)
2,60,,60,LA.060.L.L,LA,7,L,0.174,,L0.174,0.174,0.001,End Segment,Left,State,WB,0.174,POINT (-118.22564 34.02933)
3,60,,60,LA.060.L.L,LA,7,L,0.1,,L0.1,0.1,0.1,Mid Segment,Left,State,WB,0.1,POINT (-118.22696 34.02906)
4,60,,60,LA.060...L,LA,7,,0.2,,0.2,0.374,0.1,Mid Segment,Left,State,WB,0.2,POINT (-118.22197 34.02948)
5,60,,60,LA.060...L,LA,7,,0.0,,0.0,0.174,10.0,Begin Segment,Left,State,WB,0.001,POINT (-118.22562 34.02934)


In [7]:
# They are truly very close together, 
# so let's get a sorting done, drop duplicates
gdf.head().explore(
    "odometer", 
    tiles = "CartoDB Positron", 
    categorical=True
)

In [8]:
gdf2 = gdf.sort_values(
    highway_group_cols + ["odometer"]
).drop_duplicates(
    subset=highway_group_cols + ["odometer"]
).reset_index(drop=True)

In [9]:
gdf2.head()

Unnamed: 0,route,direction,odometer,pm,geometry
0,60,WB,0.0,0.0,POINT (-118.22874 34.02871)
1,60,WB,0.1,0.1,POINT (-118.22696 34.02906)
2,60,WB,0.174,0.174,POINT (-118.22564 34.02933)
3,60,WB,0.274,0.1,POINT (-118.22382 34.02953)
4,60,WB,0.374,0.2,POINT (-118.22197 34.02948)


In [10]:
gdf.shape, gdf2.shape

((321, 5), (314, 5))

In [11]:
def draw_line_between_points(
    gdf: gpd.GeoDataFrame,
    group_cols: list = ["route", "direction"]
) -> gpd.GeoDataFrame:
    gdf = gdf.assign(
        start_geometry = gdf.geometry,
        end_geometry = (gdf.groupby(group_cols, group_keys=False)
                        .geometry.shift(-1)
                       )
    ).dropna(subset="end_geometry")
    
    gdf = gdf.assign(
        line_geometry = gdf.apply(
            lambda x: shapely.LineString(
            [x.start_geometry, x.end_geometry]
        ), axis=1).set_crs("EPSG:4326")
    )
    
    return gdf
    

In [12]:
gdf3 = draw_line_between_points(gdf2)

In [13]:
gdf3.set_geometry("line_geometry").explore(
    "odometer", tiles = "CartoDB Positron",
    categorical=True
)