In [None]:
import geopandas as gpd
import osmnx as ox
import pandas as pd

In [None]:
file_path = "../Data/"
output_file_path = file_path + "Output/"

# Import human settlement shapefile from GHS UCDB

In [None]:
UCDB_gdf = gpd.read_file(file_path + "GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg")

In [None]:
# Select the polygon that is the human settlement area of LA: plot it, you can see there are holes in it
LA_UCDB_gdf = UCDB_gdf[UCDB_gdf["UC_NM_MN"] == "Los Angeles"]
LA_UCDB_gdf.plot()

# Import TIGERline census tract shapefile

In [None]:
CA_22tract = gpd.read_file(file_path + "tl_2022_06_tract/tl_2022_06_tract.shp")
LA_UCDB_gdf.crs == CA_22tract.crs

In [None]:
CA_22tract_crs = CA_22tract.to_crs(4326)
LA_UCDB_gdf.crs == CA_22tract_crs.crs

In [None]:
LA_22tract = CA_22tract_crs[CA_22tract_crs["COUNTYFP"] == "037"]

# Get the shapefile of intersection of LA county and LA human settlement area

In [None]:
# Plot the difference of two shapefile
ax = LA_22tract.plot(color="palegreen", edgecolor="green", figsize=(20, 10))
LA_UCDB_gdf.plot(ax=ax, color="red")

In [None]:
LA_intersections = LA_22tract.overlay(LA_UCDB_gdf, how="intersection")
LA_intersections_whole = LA_intersections.dissolve()

In [None]:
LA_intersections_whole.plot()

# get street network for analysis
### As there are holes in the polygon we are interested in, I made the decision of pulling the street network larger than the polygon we are interested in sampling OD pairs, by 
#### (1) buffered the polygon just to be safe and get a larger street network
#### (2) get the street network by the extent of the bounding box

In [None]:
LA_intersections_whole_proj = LA_intersections_whole.to_crs(
    3043,
)  # project a crs that is in unit of meters
LA_intersections_whole_proj["geometry"] = LA_intersections_whole_proj["geometry"].buffer(3000)
LA_intersections_whole_proj = LA_intersections_whole_proj.to_crs(4326)  # Back to 4326

In [None]:
ax = LA_intersections_whole_proj.plot(color="palegreen", edgecolor="green", figsize=(20, 10))
LA_intersections_whole.plot(ax=ax, color="red")

In [None]:
LA_intersections_whole_proj.bounds

In [None]:
G_box = ox.graph_from_bbox(
    LA_intersections_whole_proj.bounds["maxy"][0],
    LA_intersections_whole_proj.bounds["miny"][0],
    LA_intersections_whole_proj.bounds["maxx"][0],
    LA_intersections_whole_proj.bounds["minx"][0],
    network_type="drive",
)

In [None]:
ox.plot_graph(G_box)

In [None]:
# Save Geopackages
ox.save_graph_geopackage(G_box, filepath=output_file_path + "LA_intersect_network.gpkg")
# Save Graphml
ox.save_graphml(G_box, filepath=output_file_path + "LA_intersect_network.graphml")

# Attach information to all the nodes in the bounding box street network

In [None]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_box)
gdf = gdf_nodes.reset_index(drop=False)

In [None]:
gdf_proj = ox.project_gdf(gdf, to_latlong=True)
gdf_proj["x"] = gdf_proj["geometry"].x
gdf_proj["y"] = gdf_proj["geometry"].y

In [None]:
# Los Angeles census tract used by uber movements
uber_tract = gpd.read_file(file_path + "los_angeles_censustracts.json")

In [None]:
# attach information on
selected_cols = ["osmid", "y", "x", "highway", "street_count", "ref", "geometry", "GEOID"]

gdf_proj_tract = gpd.sjoin(gdf_proj, CA_22tract_crs, how="left", predicate="within")[selected_cols]
gdf_proj_tract_uber = gpd.sjoin(gdf_proj_tract, uber_tract, how="left", predicate="within")[
    selected_cols + ["MOVEMENT_ID", "TRACT"]
]
gdf_proj_tract_uber

# Sample 100,000 nodes as origin and destination (100,000 OD pairs) in potential nodes candidates

In [None]:
# select only the nodes that are in the intersected boundary
gdf_proj_tract_uber
nodes_candidate = gpd.sjoin(
    gdf_proj_tract_uber,
    LA_intersections_whole,
    how="inner",
    predicate="within",
)

In [None]:
nodes_candidate.to_csv(output_file_path + "nodes_candidate.csv")

In [None]:
origin = nodes_candidate.sample(100000, random_state=123, replace=True).copy()
origin = origin[["osmid", "y", "x", "GEOID_left", "MOVEMENT_ID"]]
origin.columns = ["oid", "oy", "ox", "oGEOID", "oMOVEMENT_ID"]
destin = nodes_candidate.sample(100000, random_state=321, replace=True).copy()
destin = destin[["osmid", "y", "x", "GEOID_left", "MOVEMENT_ID"]]
destin.columns = ["did", "dy", "dx", "dGEOID", "dMOVEMENT_ID"]

In [None]:
origin = origin.reset_index(drop=True)
destin = destin.reset_index(drop=True)

In [None]:
temp_OD = pd.concat([origin, destin], sort=False, axis=1)
temp_OD = temp_OD.drop(temp_OD[temp_OD["oid"] == temp_OD["did"]].index)

In [None]:
temp_OD

In [None]:
temp_OD.to_csv(output_file_path + "potential_OD.csv")