In [None]:
import geopandas as gpd
import pandas as pd
import osmnx as ox

In [None]:
file_path = '../Data/'
output_file_path = file_path + 'Output/'

# Street network in LA county

In [None]:
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')

# Street network in LA urban settlement area, dataverse

In [None]:
# street network for LA urban settlement area
G_graphml = ox.io.load_graphml(file_path + 'los_angeles-14.graphml')
fig, ax = ox.plot_graph(G_graphml)

In [None]:
# California census tract downloaded from census bureau
CA_22tract = gpd.read_file(file_path + "tl_2022_06_tract/tl_2022_06_tract.shp")

In [None]:
# Los Angeles census tract used by uber movements
uber_tract = gpd.read_file(file_path + "los_angeles_censustracts.json")

In [None]:
CA_22tract.crs

In [None]:
CA_22tract_crs = CA_22tract.to_crs(4326)
CA_22tract_crs.crs

In [None]:
uber_tract.crs

In [None]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
gdf = gdf_nodes.reset_index(drop=False)

In [None]:
gdf_proj = ox.project_gdf(gdf, to_latlong=True)
gdf_proj['x'] = gdf_proj['geometry'].x
gdf_proj['y'] = gdf_proj['geometry'].y

In [None]:
gdf_proj.crs

# attach (spatial join) census tract GEOID and Uber Movement Tract Movement ID to all the nodes in LA county

In [None]:
selected_cols = ["osmid", "y", "x", "highway", "street_count", "ref",
                 "geometry", "GEOID"]

gdf_proj_tract = gpd.sjoin(gdf_proj, CA_22tract_crs, how='left', predicate='within')[selected_cols]
gdf_proj_tract_uber = gpd.sjoin(gdf_proj_tract, uber_tract, how='left', predicate='within')[selected_cols + ["MOVEMENT_ID", "TRACT"]]
gdf_proj_tract_uber

# Sample 100,000 nodes as origin and destination (100,000 OD pairs) in Los Angeles County

In [None]:
origin = gdf_proj_tract_uber.sample(100000, random_state=12345,
                                    replace=True).copy()
origin = origin[["osmid", "y", "x", "GEOID", "MOVEMENT_ID"]]
origin.columns = ['oid', 'oy', 'ox', 'oGEOID', 'oMOVEMENT_ID']
destin = gdf_proj_tract_uber.sample(100000, random_state=12345,
                                    replace=True).copy()
destin = destin[["osmid", "y", "x", "GEOID", "MOVEMENT_ID"]]
destin.columns = ['did', 'dy', 'dx', 'dGEOID', 'dMOVEMENT_ID']

In [None]:
origin = origin.reset_index(drop=True)
destin = destin.reset_index(drop=True)

In [None]:
temp_OD = pd.concat([origin, destin], sort=False, axis=1)
temp_OD = temp_OD.drop(temp_OD[temp_OD['oid'] == temp_OD['did']].index)

# attach (spatial join) census tract GEOID and Uber Movement Tract Movement ID to all the nodes in LA urban area

In [None]:
gdfml_nodes, gdfml_edges = ox.graph_to_gdfs(G_graphml)
gdfml = gdfml_nodes.reset_index(drop=False)

In [None]:
gdfml_proj = ox.project_gdf(gdfml, to_latlong=True)
gdfml_proj['x'] = gdfml_proj['geometry'].x
gdfml_proj['y'] = gdfml_proj['geometry'].y

In [None]:
gdfml_proj_tract = gpd.sjoin(gdfml_proj, CA_22tract_crs, how='left',
                             predicate='within')
gdfml_proj_tract_clean = gdfml_proj_tract[
    ["osmid", "y", "x", "highway", "street_count", "ref", "geometry", "GEOID"]]
gdfml_proj_tract_uber = gpd.sjoin(gdfml_proj_tract_clean, uber_tract,
                                  how='left', predicate='within')
gdfml_proj_tract_uber_clean = gdfml_proj_tract_uber[
    ["osmid", "y", "x", "highway", "street_count", "ref", "geometry", "GEOID",
     "MOVEMENT_ID", "TRACT"]]
gdfml_proj_tract_uber_clean

# Sample 100,000 nodes as origin and destination (100,000 OD pairs) in Los Angeles County

In [None]:
originml = gdfml_proj_tract_uber_clean.sample(100000, replace=True).copy()
originml = originml[["osmid", "y", "x", "GEOID", "MOVEMENT_ID"]]
originml.columns = ['oid', 'oy', 'ox', 'oGEOID', 'oMOVEMENT_ID']
destinml = gdfml_proj_tract_uber_clean.sample(100000, replace=True).copy()
destinml = destinml[["osmid", "y", "x", "GEOID", "MOVEMENT_ID"]]
destinml.columns = ['did', 'dy', 'dx', 'dGEOID', 'dMOVEMENT_ID']

In [None]:
originml = originml.reset_index(drop=True)
destinml = destinml.reset_index(drop=True)

In [None]:
tempml_OD = pd.concat([originml, destinml], sort=False, axis=1)
tempml_OD = tempml_OD.drop(
    tempml_OD[tempml_OD['oid'] == tempml_OD['did']].index)

# Save sampled PD pairs as csv

In [None]:
tempml_OD.to_csv(output_file_path + "tempml_OD.csv")
temp_OD.to_csv(output_file_path + "temp_OD.csv")