# Calculate direction vectors or primary direction for vehicle positions

* Need to do this at scale, since we have to run through all the vp
* Doing the shift in `A2_sjoin_postprocessing` is ok...but can use performance improvements
* Need to figure out a better approach, since we cannot do a full sjoin of vp against road segments. 
   * Even with just primary/secondary roads, vp joined to road segments creates 13M rows. Every `linearid-mtfcc-fullname` needs to be tested for direction, since we have a segment running in each direction.
* Can do spatial join by each of the 4 directions, and only join vp that also run in that direction with those segments (eastbound running vp spatially joined to eastbound road segments)
* Approach:
   * Split the df into 2: the first vp of the trip, which automatically won't have a prior against which to calculate direction. Set this to `Unknown`.
    * For other vp, get a primary direction, and `Unknown` is a category if the point doesn't progress. 

We will always combine `Unknown` with a particular direction to do sjoin.

In [1]:
import dask.dataframe as dd
import dask_geopandas as dg
import datetime
import geopandas as gpd
import pandas as pd

from calitp_data_analysis.geography_utils import WGS84
from segment_speed_utils import helpers, segment_calcs
from segment_speed_utils.project_vars import (analysis_date, SEGMENT_GCS, 
                                            PROJECT_CRS)
from shared_utils import rt_utils

In [2]:
test_trip_keys = [
    "e1314915976e6f1b119daa9c2b2f8750",
    "eb274d224d049e9db8d104e9f90c244f",
    "73799e2a19e2e202dcfc79398f980dcb",
    "ea5309e36e55eea5d950969ac81fd7e3",
    "cb836ecd731326986eb0e0276558ee94",
    "d7a046b8dd30f5a7d0c30a6f7d1f5f39",
]

In [3]:
dict_inputs = helpers.get_parameters("./scripts/config.yml", "road_segments")

INPUT_FILE = dict_inputs["stage1"]
SEGMENT_FILE = dict_inputs["segments_file"]
TRIP_GROUPING_COLS = dict_inputs["trip_grouping_cols"]
GROUPING_COL = dict_inputs["grouping_col"]
SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"]
EXPORT_FILE = dict_inputs["stage2"]

BUFFER_METERS = 35

In [4]:
# Import vp, keep trips that are usable
vp = dd.read_parquet(
    f"{SEGMENT_GCS}{INPUT_FILE}_{analysis_date}/",
    columns = ["trip_instance_key", "vp_idx", "x", "y"],
    filters = [[("trip_instance_key", "in", test_trip_keys)]]
)

vp_gddf = dg.from_dask_dataframe(
    vp,
    geometry = dg.points_from_xy(vp, x="x", y="y", crs=WGS84)
).set_crs(WGS84).to_crs(PROJECT_CRS).drop(columns = ["x", "y"])

#vp_gddf = vp_gddf.repartition(npartitions=100).persist()

In [5]:
usable_bounds = segment_calcs.get_usable_vp_bounds_by_trip(vp)

In [6]:
vp_gddf2 = vp_gddf.assign(
    prior_vp_idx = vp_gddf.vp_idx - 1
).merge(
    usable_bounds, 
    on = "trip_instance_key",
    how = "inner"
)

In [7]:
first_vp = (vp_gddf2[vp_gddf2.vp_idx == vp_gddf2.min_vp_idx]
            .drop(columns = ["min_vp_idx", "max_vp_idx"])
           )

In [8]:
can_get_direction = (vp_gddf2[vp_gddf2.vp_idx != vp_gddf2.min_vp_idx]
                     .drop(columns = ["min_vp_idx", "max_vp_idx"])
                    )       

In [9]:
vp_gddf_renamed = (vp_gddf[["vp_idx", "geometry"]]
                   .add_prefix("prior_")
                   .set_geometry("prior_geometry")
                  )

In [10]:
vp_with_prior = dd.merge(
    can_get_direction,
    vp_gddf_renamed,
    on = "prior_vp_idx",
    how = "inner"
)

In [11]:
vp_with_prior["vp_primary_direction"] = vp_with_prior.apply(
    lambda x: 
    rt_utils.primary_cardinal_direction(x.prior_geometry, x.geometry),
    axis=1, meta = ("vp_primary_direction", "object")
)

In [12]:
results = vp_with_prior.compute()
results.head()

Unnamed: 0,trip_instance_key,vp_idx,geometry,prior_vp_idx,prior_geometry,vp_primary_direction
0,d7a046b8dd30f5a7d0c30a6f7d1f5f39,17806,POINT (-229532.010 29094.362),17805,POINT (-229532.010 29094.362),Unknown
1,d7a046b8dd30f5a7d0c30a6f7d1f5f39,17813,POINT (-229055.993 29024.804),17812,POINT (-228972.581 28989.803),Westbound
0,73799e2a19e2e202dcfc79398f980dcb,17590,POINT (-229131.026 28935.912),17589,POINT (-229131.900 28935.936),Eastbound
1,73799e2a19e2e202dcfc79398f980dcb,17596,POINT (-229129.134 28941.086),17595,POINT (-229128.697 28941.074),Westbound
2,ea5309e36e55eea5d950969ac81fd7e3,17692,POINT (-229122.120 28941.782),17691,POINT (-229046.090 29035.095),Southbound


In [13]:
results.sort_values(["trip_instance_key", "vp_idx"]).head(10)

Unnamed: 0,trip_instance_key,vp_idx,geometry,prior_vp_idx,prior_geometry,vp_primary_direction
1,73799e2a19e2e202dcfc79398f980dcb,17569,POINT (-229126.093 28937.221),17568,POINT (-229062.287 29018.528),Southbound
2,73799e2a19e2e202dcfc79398f980dcb,17570,POINT (-229126.081 28937.665),17569,POINT (-229126.093 28937.221),Northbound
1,73799e2a19e2e202dcfc79398f980dcb,17571,POINT (-229126.072 28937.999),17570,POINT (-229126.081 28937.665),Northbound
2,73799e2a19e2e202dcfc79398f980dcb,17572,POINT (-229126.572 28938.902),17571,POINT (-229126.072 28937.999),Northbound
1,73799e2a19e2e202dcfc79398f980dcb,17573,POINT (-229126.572 28938.902),17572,POINT (-229126.572 28938.902),Unknown
1,73799e2a19e2e202dcfc79398f980dcb,17574,POINT (-229126.565 28939.124),17573,POINT (-229126.572 28938.902),Northbound
2,73799e2a19e2e202dcfc79398f980dcb,17575,POINT (-229126.565 28939.124),17574,POINT (-229126.565 28939.124),Unknown
0,73799e2a19e2e202dcfc79398f980dcb,17576,POINT (-229127.445 28938.926),17575,POINT (-229126.565 28939.124),Westbound
0,73799e2a19e2e202dcfc79398f980dcb,17577,POINT (-229127.458 28938.482),17576,POINT (-229127.445 28938.926),Southbound
0,73799e2a19e2e202dcfc79398f980dcb,17578,POINT (-229128.344 28938.062),17577,POINT (-229127.458 28938.482),Westbound
