Load walk+transit network

In [130]:
import warnings
warnings.filterwarnings('ignore')

In [131]:
import urbanaccess as ua

# Define the file path to your saved network
h5_file_path = "networks/transit_1011.h5"

# Load the network
network = ua.network.load_network(filename=h5_file_path)

Successfully read store: data/networks/transit_1011.h5 with the following keys: ['/edges', '/impedance_names', '/nodes', '/two_way']
Successfully read store: data/networks/transit_1011.h5 with the following keys: ['/edges', '/impedance_names', '/nodes', '/two_way']


In [132]:
# Print all available attributes
print(dir(network))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'net_connector_edges', 'net_edges', 'net_nodes', 'osm_edges', 'osm_nodes', 'transit_edges', 'transit_nodes']


In [133]:
print(network.net_nodes.head())  # Check first rows of nodes
print(network.net_edges.head())  # Check first rows of edges

               x          y
id_int                     
1      -1.899896  43.318994
2      -1.902670  43.314959
3      -1.912339  43.317663
4      -1.917594  43.318983
5      -1.921789  43.320913
   from  to    weight
0     1   2  2.800000
1     2   3  2.500000
2     3   4  1.150000
3     4   5  0.950000
4     5   6  1.183333


In [134]:
node_df = network.net_nodes
print(node_df.head())

               x          y
id_int                     
1      -1.899896  43.318994
2      -1.902670  43.314959
3      -1.912339  43.317663
4      -1.917594  43.318983
5      -1.921789  43.320913


In [135]:
node_df.reset_index(inplace=True)

In [136]:
print(node_df.head())

   id_int         x          y
0       1 -1.899896  43.318994
1       2 -1.902670  43.314959
2       3 -1.912339  43.317663
3       4 -1.917594  43.318983
4       5 -1.921789  43.320913


In [137]:
edge_df = network.net_edges
print(edge_df.head())

   from  to    weight
0     1   2  2.800000
1     2   3  2.500000
2     3   4  1.150000
3     4   5  0.950000
4     5   6  1.183333


In [138]:
print(node_df.columns)

Index(['id_int', 'x', 'y'], dtype='object')


In [139]:
import pandas as pd

# Merge using the updated 'id_int' column instead of 'id'
tmp = pd.merge(edge_df, node_df[['id_int']], left_on='from', right_index=True, sort=False, copy=False, how='left')

# Assign the 'from_int' column based on 'id_int' from node_df
tmp['from_int'] = tmp['id_int']

# Drop the unnecessary columns
tmp.drop(['id_int'], axis=1, inplace=True)

# Check the resulting dataframe
print(tmp.head())

   from  to    weight  from_int
0     1   2  2.800000       2.0
1     2   3  2.500000       3.0
2     3   4  1.150000       4.0
3     4   5  0.950000       5.0
4     5   6  1.183333       6.0


In [140]:
# Merge to get the 'to_int' column
tmp = pd.merge(tmp, node_df[['id_int']], left_on='to', right_index=True, sort=False, copy=False, how='left')

# Assign the 'to_int' column based on 'id_int' from node_df
tmp['to_int'] = tmp['id_int']

# Drop the unnecessary columns
tmp.drop(['id_int'], axis=1, inplace=True)

# Check the resulting dataframe
print(tmp.head())


   from  to    weight  from_int  to_int
0     1   2  2.800000       2.0     3.0
1     2   3  2.500000       3.0     4.0
2     3   4  1.150000       4.0     5.0
3     4   5  0.950000       5.0     6.0
4     5   6  1.183333       6.0     7.0


In [141]:
node_df.rename(columns={'id_int': 'id'}, inplace=True)
print(node_df.head())

   id         x          y
0   1 -1.899896  43.318994
1   2 -1.902670  43.314959
2   3 -1.912339  43.317663
3   4 -1.917594  43.318983
4   5 -1.921789  43.320913


In [142]:
network_formated=ua.network._format_pandana_edges_nodes(edge_df, node_df)

Edge and node tables formatted for Pandana with integer node ids: id_int, to_int, and from_int. Took 0.51 seconds


In [143]:
print(network_formated)

(         from     to    weight  from_int  to_int
0           1      2  2.800000         1       2
1           2      3  2.500000         2       3
2           3      4  1.150000         3       4
3           4      5  0.950000         4       5
4           5      6  1.183333         5       6
...       ...    ...       ...       ...     ...
423704   4202  64640  0.308245      4202   64640
423705  64640   4202  7.808245     64640    4202
423706  64640   4202  7.808245     64640    4202
423707  64640   4202  7.808245     64640    4202
423708  64640   4202  7.808245     64640    4202

[423709 rows x 5 columns],             id         x          y
id_int                             
1            1 -1.899896  43.318994
2            2 -1.902670  43.314959
3            3 -1.912339  43.317663
4            4 -1.917594  43.318983
5            5 -1.921789  43.320913
...        ...       ...        ...
125262  125262 -2.211893  43.298687
125263  125263 -2.213562  43.296225
125264  125264 -2.21444

In [144]:
df = network_formated[0]  # Extract the DataFrame

for i in range(5):
    print(df['from'].iloc[i], df['to'].iloc[i], df['weight'].iloc[i])

1 2 2.8
2 3 2.5
3 4 1.15
4 5 0.95
5 6 1.1833333333333333


In [145]:
edges_df, nodes_df = network_formated

In [146]:
print(nodes_df.columns)  # Should work if nodes_df is a DataFrame

Index(['id', 'x', 'y'], dtype='object')


In [147]:
print(edges_df.columns)  # Should work if edges_df is a DataFrame

Index(['from', 'to', 'weight', 'from_int', 'to_int'], dtype='object')


In [148]:
import pandana

# Extract the 'from_int', 'to_int', and 'weight' columns for the edges
imp = pd.DataFrame(edges_df['weight'])

# Create a Pandana network
pt_network = pandana.Network(nodes_df.x, nodes_df.y, 
                      edges_df.from_int, edges_df.to_int, imp, False)

Use the network (check that it works properly)

In [152]:
O_lat,O_lon=[43.304787562955205, -2.0162325112056965]

O_nodes = pt_network.get_node_ids([O_lon], [O_lat]).values
O_nodes

D_lat,D_lon=[43.313605588539566, -2.0054322679462913]

D_nodes = pt_network.get_node_ids([D_lon], [D_lat]).values
D_nodes

# Convert both node arrays to integers (if they are not already)
O_nodes = O_nodes.astype(int)
D_nodes = D_nodes.astype(int)

# Now calculate the shortest path length
pt_network.shortest_path_length(O_nodes[0], D_nodes[0])

13.876

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, LineString

# Assuming 'network' is your Pandana network
nodes_gdf = gpd.GeoDataFrame(
    pt_network.nodes_df,
    geometry=[Point(xy) for xy in zip(pt_network.nodes_df['x'], pt_network.nodes_df['y'])],
    crs="EPSG:4326"  # Ensure it's in WGS84 for compatibility with Kepler
)

# Save nodes to GeoJSON
nodes_gdf.to_file("nodes.geojson", driver="GeoJSON")

In [None]:
import geopandas as gpd
from shapely.geometry import LineString, Point
import pandas as pd
import pandana as pdna

# Example data
# Assuming 'network_formated' is your edges DataFrame
# Assuming 'nodes_df' is your nodes DataFrame

# Get the nearest nodes for origin and destination using your logic
O_lat, O_lon = [43.304795980913255, -2.0160877677716376]
D_lat, D_lon = [43.305456412997316, -2.0167954816992952]

O_nodes = pt_network.get_node_ids([O_lon], [O_lat]).values.astype(int)
D_nodes = pt_network.get_node_ids([D_lon], [D_lat]).values.astype(int)

# Calculate the shortest path between the origin and destination
shortest_path_nodes = pt_network.shortest_path(O_nodes[0], D_nodes[0])

# print the shortest path length
print(pt_network.shortest_path_length(O_nodes[0], D_nodes[0]))

# Ensure the shortest path is valid
if len(shortest_path_nodes) == 0:
    raise ValueError("Shortest path calculation failed. The path is empty!")

# Extract coordinates for the nodes involved in the shortest path
# Accessing the 'nodes_df' which holds the x, y coordinates
path_coords = [(nodes_df.loc[node, 'x'], nodes_df.loc[node, 'y']) for node in shortest_path_nodes]

# Ensure we have at least 2 valid points
if len(path_coords) < 2:
    raise ValueError(f"Not enough valid coordinates found! Found only {len(path_coords)} points.")

# Create a LineString geometry from the coordinates
route_geometry = LineString(path_coords)

# Create a list of Point geometries for the nodes
node_geometries = [Point(nodes_df.loc[node, 'x'], nodes_df.loc[node, 'y']) for node in shortest_path_nodes]

# Create a GeoDataFrame to store the route and the nodes
route_gdf = gpd.GeoDataFrame([{
    'origin_node': O_nodes[0],
    'destination_node': D_nodes[0],
    'length': pt_network.shortest_path_length(O_nodes[0], D_nodes[0]),
    'geometry': route_geometry
}], crs="EPSG:4326")

# Add node geometries to the same GeoDataFrame as individual nodes
nodes_gdf = gpd.GeoDataFrame([{
    'node_id': node,
    'geometry': geom
} for node, geom in zip(shortest_path_nodes, node_geometries)], crs="EPSG:4326")

# Combine the route and nodes GeoDataFrames into a single GeoDataFrame
combined_gdf = pd.concat([route_gdf, nodes_gdf], ignore_index=True)

# Save the combined route and nodes to a GeoJSON file
combined_gdf.to_file("shortest_route_with_nodes.geojson", driver="GeoJSON")

print("Route and nodes successfully saved as shortest_route_with_nodes.geojson")

11.231
Route and nodes successfully saved as shortest_route_with_nodes.geojson


Load nodes gdf

In [None]:
import geopandas as gpd

# Load the geojson file and print to check
nodes_gdf = gpd.read_file('./data/geojson/building_nodes_with_poi.geojson')

print(nodes_gdf.head())

KeyboardInterrupt: 

In [None]:
# Fill the time_to_poi column with None
nodes_gdf['nearest_node'] = None
nodes_gdf['time_to_nearest_poi'] = None
nodes_gdf['block_color'] = None
nodes_gdf['nearest_poi'] = None
nodes_gdf['nearest_poi_node'] = None
print(nodes_gdf.head())

  Referencia       lon        lat nearest_node time_to_nearest_poi  \
0    8594095 -1.948183  43.299723         None                None   
1    8594099 -1.948484  43.299120         None                None   
2    8594100 -1.948365  43.298748         None                None   
3    8796114 -1.928046  43.315046         None                None   
4    8796136 -1.928175  43.313904         None                None   

  block_color nearest_poi nearest_poi_node                   geometry  
0        None        None             None  POINT (-1.94818 43.29972)  
1        None        None             None  POINT (-1.94848 43.29912)  
2        None        None             None  POINT (-1.94837 43.29875)  
3        None        None             None  POINT (-1.92805 43.31505)  
4        None        None             None   POINT (-1.92818 43.3139)  


Load POI gdf

In [None]:
poi = gpd.read_file('./data/poi/centros_salud.geojson')
print(poi.head())

                       nombre codigodelcentro     tipodecentro  \
0  Ambulatorio de Amara Berri  entity2C9E9AC2      Ambulatorio   
1  Ambulatorio de Amara Berri  entityD96EC52F  Centro de Salud   
2     Ambulatorio de Azpeitia  entity458B7E4D      Ambulatorio   
3     Ambulatorio de Azpeitia  entity88E353B4  Centro de Salud   
4      Ambulatorio de Beasain  entity24E39531  Centro de Salud   

                   horarioatencionciudadana  \
0  De lunes a viernes de 08:00 h a 20:00 h.   
1                                      None   
2       de lunes a viernes de 08:00 a 20:00   
3                                      None   
4                                      None   

                                     horarioespecial  \
0  Para urgencias leves fuera del horario del Cen...   
1                                               None   
2  para urgencias leves fuera del horario del Cen...   
3                                               None   
4                                       

In [None]:
poi['lon'] = poi['geometry'].x
poi['lat'] = poi['geometry'].y

Create functions to find nearest node

In [None]:
def assign_nearest_nodes(gdf, pt_network):
    # Ensure lon/lat are numeric
    gdf['lon'] = pd.to_numeric(gdf['lon'], errors='coerce')
    gdf['lat'] = pd.to_numeric(gdf['lat'], errors='coerce')

    # Drop rows with missing coordinates
    gdf = gdf.dropna(subset=['lon', 'lat']).copy()
    
    # Convert to numpy arrays
    lon_values = gdf['lon'].to_numpy()
    lat_values = gdf['lat'].to_numpy()

    # Assign nearest nodes
    try:
        gdf['nearest_node'] = pt_network.get_node_ids(lon_values, lat_values)
    except Exception as e:
        print(f"Error assigning nearest nodes: {e}")
        gdf['nearest_node'] = None  # Assign None in case of failure

    return gdf

In [None]:
nodes_gdf = assign_nearest_nodes(nodes_gdf, pt_network)
print(nodes_gdf.head())

  Referencia       lon        lat  nearest_node time_to_nearest_poi  \
0    8594095 -1.948183  43.299723         39049                None   
1    8594099 -1.948484  43.299120         39049                None   
2    8594100 -1.948365  43.298748         39049                None   
3    8796114 -1.928046  43.315046         16476                None   
4    8796136 -1.928175  43.313904         16476                None   

  block_color nearest_poi nearest_poi_node                   geometry  
0        None        None             None  POINT (-1.94818 43.29972)  
1        None        None             None  POINT (-1.94848 43.29912)  
2        None        None             None  POINT (-1.94837 43.29875)  
3        None        None             None  POINT (-1.92805 43.31505)  
4        None        None             None   POINT (-1.92818 43.3139)  


In [None]:
poi = assign_nearest_nodes(poi, pt_network)
print(poi.head())

                       nombre codigodelcentro     tipodecentro  \
0  Ambulatorio de Amara Berri  entity2C9E9AC2      Ambulatorio   
1  Ambulatorio de Amara Berri  entityD96EC52F  Centro de Salud   
2     Ambulatorio de Azpeitia  entity458B7E4D      Ambulatorio   
3     Ambulatorio de Azpeitia  entity88E353B4  Centro de Salud   
4      Ambulatorio de Beasain  entity24E39531  Centro de Salud   

                   horarioatencionciudadana  \
0  De lunes a viernes de 08:00 h a 20:00 h.   
1                                      None   
2       de lunes a viernes de 08:00 a 20:00   
3                                      None   
4                                      None   

                                     horarioespecial  \
0  Para urgencias leves fuera del horario del Cen...   
1                                               None   
2  para urgencias leves fuera del horario del Cen...   
3                                               None   
4                                       

Find nearest POI for each node

In [None]:
from shapely.geometry import Point

def nearest_poi(row, poi_df):
    # Asegúrate de usar la geometría para calcular la distancia
    distances = poi['geometry'].apply(lambda x: row.geometry.distance(x))
    nearest = poi.loc[distances.idxmin()]
    return nearest['nombre']  # O el identificador que quieras retornar

# Aplica la función sobre cada fila del GeoDataFrame
nodes_gdf['nearest_poi'] = nodes_gdf.apply(nearest_poi, poi_df=poi, axis=1)

KeyboardInterrupt: 

Assign the nearest node of the nearest POI

In [None]:
nodes_gdf['nearest_poi_node'] = None

# Get the nearest node of the nearest poi that is already in the poi dataframe
def nearest_poi_node(row):
    nearest_node = poi.loc[row['nearest_poi'] == poi['nombre']]['nearest_node']
    return nearest_node.values[0]

In [None]:
# Apply the function to each row in the GeoDataFrame
nodes_gdf['nearest_poi_node'] = nodes_gdf.apply(nearest_poi_node, axis=1)

In [None]:
print(nodes_gdf.head())

  Referencia       lon        lat  nearest_node time_to_nearest_poi  \
0    8594095 -1.948183  43.299723         39049                None   
1    8594099 -1.948484  43.299120         39049                None   
2    8594100 -1.948365  43.298748         39049                None   
3    8796114 -1.928046  43.315046         16476                None   
4    8796136 -1.928175  43.313904         16476                None   

  block_color                                        nearest_poi  \
0        None  Centro de Salud de Martutene (Centro Penitenci...   
1        None  Centro de Salud de Martutene (Centro Penitenci...   
2        None  Centro de Salud de Martutene (Centro Penitenci...   
3        None                           Centro de Salud de Altza   
4        None                           Centro de Salud de Altza   

   nearest_poi_node                   geometry  
0            115848  POINT (-1.94818 43.29972)  
1            115848  POINT (-1.94848 43.29912)  
2            1158

Find shortest path

In [None]:
# Define a function to compute shortest path length for each row
def compute_shortest_path(row, pt_network):
    # Extract nearest node and nearest POI node
    origin_node = row['nearest_node']
    destination_node = row['nearest_poi_node']
    
    # Calculate the shortest path length between the two nodes
    path_length = pt_network.shortest_path_length(origin_node, destination_node)
    print(f"Shortest path length: {path_length} for row {row.name}")
    
    return path_length

# Apply the function to each row of the GeoDataFrame
nodes_gdf['time_to_nearest_poi'] = nodes_gdf.apply(compute_shortest_path, pt_network=pt_network, axis=1)

Shortest path length: 13.293 for row 0
Shortest path length: 13.293 for row 1
Shortest path length: 13.293 for row 2
Shortest path length: 5.64 for row 3
Shortest path length: 5.64 for row 4
Shortest path length: 3.03 for row 5
Shortest path length: 1.733 for row 6
Shortest path length: 3.411 for row 7
Shortest path length: 3.411 for row 8
Shortest path length: 14.561 for row 9
Shortest path length: 13.606 for row 10
Shortest path length: 24.51 for row 11
Shortest path length: 18.884 for row 12
Shortest path length: 10.676 for row 13
Shortest path length: 10.676 for row 14
Shortest path length: 19.506 for row 15
Shortest path length: 7.469 for row 16
Shortest path length: 27.613 for row 17
Shortest path length: 10.676 for row 18
Shortest path length: 7.523 for row 19
Shortest path length: 9.458 for row 20
Shortest path length: 10.283 for row 21
Shortest path length: 26.601 for row 22
Shortest path length: 8.912 for row 23
Shortest path length: 10.477 for row 24
Shortest path length: 10

KeyboardInterrupt: 

In [None]:
# count rows with inf values
print(nodes_gdf[nodes_gdf['time_to_nearest_poi'] == float('inf')].shape[0])

# print the rows with inf values
print(nodes_gdf[nodes_gdf['time_to_nearest_poi'] == float('inf')])

0
Empty GeoDataFrame
Columns: [Referencia, lon, lat, nearest_node, time_to_nearest_poi, block_color, nearest_poi, nearest_poi_node, geometry]
Index: []


In [None]:
print(nodes_gdf)

      Referencia       lon        lat  nearest_node  time_to_nearest_poi  \
0        8594095 -1.948183  43.299723         29577               14.021   
1        8594099 -1.948484  43.299120         29577               14.021   
2        8594100 -1.948365  43.298748         29577               14.021   
3        8796114 -1.928046  43.315046         64886                4.959   
4        8796136 -1.928175  43.313904         64886                4.959   
...          ...       ...        ...           ...                  ...   
58763    6779028 -2.172469  43.165117         41162              155.734   
58764    6779053 -2.172669  43.165403         20882              154.897   
58765    6779097 -2.172889  43.164341         41166              155.162   
58766    6779001 -2.173457  43.165434         20881              154.120   
58767    6779052 -2.173675  43.165523         20881              154.120   

      block_color                                        nearest_poi  \
0            No

Assign color depending on time

In [None]:
# Assign colors to the blocks based on the time it takes to reach the nearest POI
nodes_gdf.loc[nodes_gdf['time_to_nearest_poi'] < 3, 'block_color'] = '#00572a'  # Dark Green
nodes_gdf.loc[(nodes_gdf['time_to_nearest_poi'] >= 3) & (nodes_gdf['time_to_nearest_poi'] < 6), 'block_color'] = '#7CB342'  # Green
nodes_gdf.loc[(nodes_gdf['time_to_nearest_poi'] >= 6) & (nodes_gdf['time_to_nearest_poi'] < 10), 'block_color'] = '#FFFF00'  # Yellow
nodes_gdf.loc[(nodes_gdf['time_to_nearest_poi'] >= 10) & (nodes_gdf['time_to_nearest_poi'] < 15), 'block_color'] = '#FFA500'  # Orange
nodes_gdf.loc[(nodes_gdf['time_to_nearest_poi'] >= 15) & (nodes_gdf['time_to_nearest_poi'] < 20), 'block_color'] = '#D50000'  # Red
nodes_gdf.loc[(nodes_gdf['time_to_nearest_poi'] >= 20) & (nodes_gdf['time_to_nearest_poi'] < 25), 'block_color'] = '#8f0340'  # Dark red
nodes_gdf.loc[nodes_gdf['time_to_nearest_poi'] >= 25, 'block_color'] = '#6a1717'  # Purple

In [None]:
# save gdf
#nodes_gdf.to_file('./outputs/pt/centros_salud_nodes_times_pt_3.geojson', driver='GeoJSON')