In [1]:
# Imports
import pandas as pd
import geopandas as gpd
import numpy as np
from collections import Counter
import networkx as nx
import igraph as ig
import osmnx as ox
import h3
import shapely.wkt

In [2]:
from shapely.geometry import Point

In [3]:
from scipy.spatial import cKDTree

In [4]:
df = pd.read_csv(r'D:\bike\code\metro\Preprocessing-bike data\metro_area.csv').drop(columns = 'Unnamed: 0')

In [5]:
geometry = df['geometry'].map(shapely.wkt.loads)
df = df.drop('geometry', axis=1)
gdf = gpd.GeoDataFrame(df, geometry=geometry)

In [6]:
gdf = gdf.set_crs("EPSG:4326")

In [7]:
gdf

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (114.11867 22.53208)
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (114.11815 22.53551)
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (114.11891 22.53968)
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (114.11694 22.54423)
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (114.10781 22.54180)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (113.80608 22.67356)
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (113.81366 22.62403)
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (113.85609 22.57507)
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (113.88040 22.55459)


In [16]:
def distance_cbd(gdf, gdf_loc):
    """  
    Returns a DataFrame with an additional line that contains the distance to a given point
    
    Calculates the following:
        
        Features:
        ---------
        - Distance to CBD
 
    Args:
        - gdf: geodataframe with trip origin waypoint
        - gdf_loc: location of Point of Interest (format: shapely.geometry.point.Point)  
    Returns:
        - gdf: a DataFrame of shape (number of columns(gdf)+1, len_df) with the 
          computed features
    Last update: 2/12/21. By Felix.
    """
    
    # create numpy array
    np_geom = gdf.geometry.values
    # 1.create new column in dataframe to assign distance to CBD array to
    gdf['feature_distance_cbd'] = np_geom[:].distance(gdf_loc.geometry.iloc[0])
   
    return gdf

In [17]:
def get_shortest_dist(graph_ig, osmids, orig_osmid, dest_osmid, weight='length'):
    # calculate shortest distance using igraph
    return graph_ig.shortest_paths(
        source=osmids.index(orig_osmid),
        target=osmids.index(dest_osmid),
        weights=weight)[0][0]



def nearest_neighbour(gdA, gdB):
    """
    Function to calculate for every entry in gdA, the nearest neighbour
    among the points in gdB

    taken from https://gis.stackexchange.com/questions/222315/geopandas-find-nearest-point-in-other-dataframe

    Args:
    - gdA: geodataframe with points in geometry column
    - gdB: geodataframe with points in geometry column

    Returns:
        - gdf_out: geodataframe wich is gdA + 2 columns containing
        the name of the closest point and the distance

    Last update: 13/04/21. By Felix.
    """
    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=False)
    gdf_out = pd.concat(
        [
            gdA.reset_index(drop=True),
            gdB_nearest,
            pd.Series(dist, name='distance')
        ],
        axis=1)
     
    return gdf_out


def convert_to_igraph(graph_nx, weight='length'):
    """
    Function to convert networkx (or osmnx) graph element to igraph

    Args:
    - graph_nx (networkx graph): multigraph object
    - weight (string) = 'length': attribute of the graph

    Returns:
        - G_ig (igraph element): converted graph
        - osmids (list): list with osm IDs of nodes

    Last update: 29/06/21. By Felix.
    """
    # retrieve list of osmid id's and relabel
    G_nx = graph_nx
    osmids = list(G_nx.nodes)
    G_nx = nx.relabel.convert_node_labels_to_integers(G_nx)
    # give each node its original osmid as attribute since we relabeled them
    osmid_values = {k: v for k, v in zip(G_nx.nodes, osmids)}
    nx.set_node_attributes(G_nx, osmid_values, "osmid")
    # convert networkx graph to igraph
    G_ig = ig.Graph(directed=True)
    G_ig.add_vertices(G_nx.nodes)
    G_ig.add_edges(G_nx.edges())
    G_ig.vs["osmid"] = osmids
    G_ig.es[weight] = list(nx.get_edge_attributes(G_nx, weight).values())
    return G_ig, osmids

def distance_cbd_shortest_dist(gdf, gdf_loc, graph):
    """  
    Returns a DataFrame with an additional line that contains the distance to a given point
    based on the shortest path calculated with igraph's shortest_path function.
    We convert to igraph in order to save 100ms per shortest_path calculation.
    For more info refer to the notebook shortest_path.ipynb or
    https://github.com/gboeing/osmnx-examples/blob/main/notebooks/14-osmnx-to-igraph.ipynb 
    
    Calculates the following:
        
        Features:
        ---------
        - Distance to CBD (based on graph network)
 
    Args:
        - gdf: geodataframe with trip origin waypoint
        - gdf_loc: location of Point of Interest (format: shapely.geometry.point.Point)
        - graph: Multigraph Object downloaded from osm  
    Returns:
        - gdf: a DataFrame of shape (number of columns(gdf)+1, len_gdf) with the 
          computed features
    Last update: 29/06/21. By Felix.
    """
    # then we have to convert the multigraph object to a dataframe
    gdf_nodes_4326, gdf_edges_4326 = ox.utils_graph.graph_to_gdfs(graph)
    
    gdf_4326 = gdf.to_crs(4326)
    gdf_loc_4326 = gdf_loc.to_crs(4326)

    # call nearest neighbour function
    gdf_orig_4326 = nearest_neighbour(gdf_4326, gdf_nodes_4326)
    gdf_dest_4326  = nearest_neighbour(gdf_loc_4326, gdf_nodes_4326)

    graph_ig, list_osmids = convert_to_igraph(graph)
    gdf['feature_distance_cbd'] = gdf_orig_4326.apply(lambda x: get_shortest_dist(graph_ig,
                                                                                     list_osmids, 
                                                                                     x.osmid, 
                                                                                     gdf_dest_4326.osmid.iloc[0], 
                                                                                     'length'),
                                                                                     axis=1)
    
    # add distance from hex center to nearest node (only for nodes where distance != inf)
    dist_start = gdf_orig_4326['distance'][gdf.feature_distance_cbd != np.inf]
    dist_end = gdf_dest_4326['distance'][0]
    gdf.feature_distance_cbd[gdf.feature_distance_cbd != np.inf] += dist_start + dist_end

    # check for nodes that could not be connected
    # create numpy array 
    np_geom = gdf.geometry[gdf.feature_distance_cbd == np.inf].values
    #assign distance to cbd array
    gdf.feature_distance_cbd[gdf.feature_distance_cbd == np.inf] = np_geom[:].distance(gdf_loc.geometry.iloc[0])

    print('Calculated distance to cbd based on shortest path')
    return gdf  



# distance_main_cbd

深圳中心坐标 Point(114.057891,22.54353)

In [18]:
point = Point(114.057891,22.54353)

In [19]:
points = gpd.GeoSeries(point)

In [20]:
gdf_loc = gpd.GeoDataFrame({'geometry':points})

In [21]:
gdf_loc = gdf_loc.set_crs('EPSG:4326')

In [22]:
gdf_loc = gdf_loc.to_crs('EPSG:32649')

In [23]:
gdf_loc

Unnamed: 0,geometry
0,POINT (814527.303 2496210.039)


In [24]:
gdf = gdf.to_crs('EPSG:32649')

In [25]:
distance_cbd(gdf, gdf_loc)

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,feature_distance_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),6384.437604
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),6267.608756
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),6296.435712
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),6079.537111
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),5142.905857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),29646.887310
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),26669.343897
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),21062.909443
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),18311.929207


In [26]:
gdf = gdf.rename(columns={'feature_distance_cbd':'distance_main_cbd'})

In [27]:
gdf

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,distance_main_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),6384.437604
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),6267.608756
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),6296.435712
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),6079.537111
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),5142.905857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),29646.887310
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),26669.343897
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),21062.909443
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),18311.929207


In [28]:
GHP = ox.graph_from_place("Shenzhen, Guangdong, China", network_type="bike")

In [29]:
gdf1 = distance_cbd_shortest_dist(gdf, gdf_loc, GHP)

  return graph_ig.shortest_paths(


Calculated distance to cbd based on shortest path


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_cbd[gdf.feature_distance_cbd != np.inf] += dist_start + dist_end
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_cbd[gdf.feature_distance_cbd == np.inf] = np_geom[:].distance(gdf_loc.geometry.iloc[0])


In [30]:
gdf1

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,distance_main_cbd,feature_distance_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),6384.437604,8669.184330
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),6267.608756,8608.449089
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),6296.435712,8160.483862
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),6079.537111,7906.856278
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),5142.905857,7117.979070
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),29646.887310,35404.262163
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),26669.343897,32640.468095
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),21062.909443,24665.157649
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),18311.929207,20860.532836


In [33]:
gdf1.to_csv(r'D:\bike\code\metro\Preprocessing-bike data\code\day_buffer\variable\distance_main_cbd.csv',encoding = 'utf-8-sig')

# distance_local_cbd

In [34]:
def get_shortest_dist(graph_ig, osmids, orig_osmid, dest_osmid, weight='length'):
    # calculate shortest distance using igraph
    return graph_ig.shortest_paths(
        source=osmids.index(orig_osmid),
        target=osmids.index(dest_osmid),
        weights=weight)[0][0]



def nearest_neighbour(gdA, gdB):
    """
    Function to calculate for every entry in gdA, the nearest neighbour
    among the points in gdB

    taken from https://gis.stackexchange.com/questions/222315/geopandas-find-nearest-point-in-other-dataframe

    Args:
    - gdA: geodataframe with points in geometry column
    - gdB: geodataframe with points in geometry column

    Returns:
        - gdf_out: geodataframe wich is gdA + 2 columns containing
        the name of the closest point and the distance

    Last update: 13/04/21. By Felix.
    """
    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=False)
    gdf_out = pd.concat(
        [
            gdA.reset_index(drop=True),
            gdB_nearest,
            pd.Series(dist, name='distance')
        ],
        axis=1)
     
    return gdf_out


def convert_to_igraph(graph_nx, weight='length'):
    """
    Function to convert networkx (or osmnx) graph element to igraph

    Args:
    - graph_nx (networkx graph): multigraph object
    - weight (string) = 'length': attribute of the graph

    Returns:
        - G_ig (igraph element): converted graph
        - osmids (list): list with osm IDs of nodes

    Last update: 29/06/21. By Felix.
    """
    # retrieve list of osmid id's and relabel
    G_nx = graph_nx
    osmids = list(G_nx.nodes)
    G_nx = nx.relabel.convert_node_labels_to_integers(G_nx)
    # give each node its original osmid as attribute since we relabeled them
    osmid_values = {k: v for k, v in zip(G_nx.nodes, osmids)}
    nx.set_node_attributes(G_nx, osmid_values, "osmid")
    # convert networkx graph to igraph
    G_ig = ig.Graph(directed=True)
    G_ig.add_vertices(G_nx.nodes)
    G_ig.add_edges(G_nx.edges())
    G_ig.vs["osmid"] = osmids
    G_ig.es[weight] = list(nx.get_edge_attributes(G_nx, weight).values())
    return G_ig, osmids



In [35]:
def distance_local_cbd(gdf, gdf_loc_local):
    """
    Function to caluclate location of closest local city center for each point. 
    
    Args:
    - gdf: geodataframe with points in geometry column
    - gdf_loc_local: geodataframe with points in geometry column
    Returns:
        - gdf_out: geodataframe with trips only on either weekdays or weekends
    Last update: 13/04/21. By Felix.
    """  
    # call nearest neighbour function
    gdf_out = nearest_neighbour(gdf, gdf_loc_local)
    # rename columns and drop unneccessary ones
    gdf_out = gdf_out.rename(columns={"distance": "distance_local_cbd"})
    return gdf_out


def distance_local_cbd_shortest_dist(gdf, gdf_loc_local, graph):
    """  
    Returns a DataFrame with an additional line that contains the distance to points in gdf_loc_local
    based on the shortest path calculated with igraph's shortest_path function.
    We convert to igraph in order to save 100ms per shortest_path calculation.
    For more info refer to the notebook shortest_path.ipynb or
    https://github.com/gboeing/osmnx-examples/blob/main/notebooks/14-osmnx-to-igraph.ipynb 
    Calculates the following:
        
        Features:
        ---------
        - Distance to local cbd (based on graph network)
    Args:
        - gdf: geodataframe with trip origin waypoint
        - gdf_loc: location of Points of Interest (format: shapely.geometry.point.Point)
        - graph: Multigraph Object downloaded from osm  
    Returns:
        - gdf: a DataFrame of shape (number of columns(gdf)+1, len_gdf) with the 
            computed features
    Last update: 01/07/21. By Felix.
    """


    # call nearest neighbour to find nearest local center
    gdf_out = nearest_neighbour(gdf, gdf_loc_local)
    # rename distance column
    gdf_out = gdf_out.rename(columns={'distance':'distance_crow'})
    # remove unnecessary columns

    # convert input gdf to crs
    gdf_4326 = gdf_out.to_crs(4326)
    gdf_loc_local_4326 = gdf_loc_local.to_crs(4326)

    # then we have to convert the multigraph object to a dataframe
    gdf_nodes_4326, gdf_edges_4326 = ox.utils_graph.graph_to_gdfs(graph)
    # call nearest neighbour function to find nearest node
    gdf_orig_4326 = nearest_neighbour(gdf_4326, gdf_nodes_4326)
    gdf_dest_4326  = nearest_neighbour(gdf_loc_local_4326, gdf_nodes_4326)

    # merge on node ID 
    gdf_merge_4326 =  gdf_orig_4326.merge(gdf_dest_4326,how='left',on=['osmid'])

    # convert to igraph
    graph_ig, list_osmids = convert_to_igraph(graph)
    
    # call get shortest dist func, where gdf_merge_3426.osmid_x is nearest node from starting point and osmid_y is 
    # nearest node from end destination (one of the neighbourhood centers)
    gdf['feature_distance_local_cbd'] = gdf_merge_4326.apply(lambda x: get_shortest_dist(graph_ig,
                                                                                    list_osmids, 
                                                                                     x.osmid,
                                                                                      gdf_dest_4326.osmid.iloc[0],
                                                                                    'length'),
                                                                                    axis=1)

    # add distance from hex center to nearest node (only for nodes where distance != inf)
    dist_start = gdf_orig_4326['distance'][gdf.feature_distance_local_cbd != np.inf]
    dist_end = gdf_dest_4326['distance'][0]
    gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd != np.inf] += dist_start + dist_end


    # check for nodes that could not be connected
    # create numpy array 
    np_geom = gdf.geometry[gdf.feature_distance_local_cbd == np.inf].values
    #assign distance to cbd array
    gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd == np.inf] = np_geom[:].distance(gdf_loc.geometry.iloc[0])
    
    
    print('Calculated distance to local cbd based on shortest path')
    return gdf 

In [36]:
df = pd.read_csv(r'D:\bike\code\metro\Preprocessing-bike data\metro_area.csv').drop(columns = 'Unnamed: 0')
geometry = df['geometry'].map(shapely.wkt.loads)
df = df.drop('geometry', axis=1)
gdf = gpd.GeoDataFrame(df, geometry=geometry)
gdf = gdf.set_crs("EPSG:4326")
gdf_1 = gdf.to_crs('EPSG:32649')
gdf_1

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993)
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082)
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339)
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511)
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183)
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877)
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417)
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139)


In [66]:
df = pd.read_csv(r"D:\bike\code\metro\Preprocessing-bike data\code\day_buffer\variable\distance\gdf_local_cbd_food.csv").drop(columns = 'Unnamed: 0')

geometry = df['geometry'].map(shapely.wkt.loads)
df = df.drop('geometry', axis=1)
gdf_loc_local = gpd.GeoDataFrame(df, geometry=geometry)

gdf_loc_local = gdf_loc_local.set_crs("EPSG:4326")
gdf_loc_local_1 = gdf_loc_local.to_crs("EPSG:32649")

In [67]:
result1 = distance_local_cbd_shortest_dist(gdf_1, gdf_loc_local_1, GHP)

  return graph_ig.shortest_paths(


Calculated distance to local cbd based on shortest path


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd != np.inf] += dist_start + dist_end
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd == np.inf] = np_geom[:].distance(gdf_loc.geometry.iloc[0])


In [68]:
result1

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,feature_distance_local_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),24093.959707
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),23471.107466
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),23023.142239
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),23337.880655
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),22226.237447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),22028.693540
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),19250.308472
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),10930.880026
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),7070.737213


In [69]:
result2 = distance_local_cbd(gdf_1, gdf_loc_local_1)

In [70]:
result2

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,feature_distance_local_cbd,index,distance_local_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),24093.959707,3,4269.031710
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),23471.107466,3,4111.040712
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),23023.142239,3,4101.471906
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),23337.880655,3,3864.058875
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),22226.237447,3,2939.266068
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),22028.693540,2,1050.225873
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),19250.308472,2,6570.756560
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),10930.880026,1,1833.446347
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),7070.737213,1,3067.819501


In [71]:
result2.describe()

Unnamed: 0,lg,lon,lat,index_right,hex_id,cluster_la,function_1,feature_distance_local_cbd,index,distance_local_cbd
count,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0
mean,3.051282,114.025704,22.59205,1297.247863,16389.589744,1.726496,2.444444,19796.814206,3.384615,4071.124341
std,2.178105,0.107286,0.074927,765.580802,9759.425288,1.049209,0.98873,11072.298815,2.703824,2354.729887
min,0.0,113.806076,22.476626,12.0,362.0,0.0,1.0,73.010348,0.0,26.583847
25%,2.0,113.93763,22.539066,644.0,8253.0,1.0,2.0,10083.705137,1.0,2257.312644
50%,3.0,114.0421,22.563708,1355.0,18011.0,2.0,2.0,20608.330607,3.0,3746.038558
75%,5.0,114.104562,22.638668,1972.75,24844.75,3.0,3.0,26901.15616,5.0,5379.131015
max,7.0,114.277422,22.784486,2731.0,36496.0,3.0,4.0,50011.669536,10.0,11132.853304


In [72]:
result2.to_csv(r'D:\bike\code\metro\Preprocessing-bike data\code\day_buffer\variable\distance_local_cbd_food.csv',encoding = 'utf-8-sig')

# distance_bus_station


In [73]:
df = pd.read_csv(r'D:\bike\code\metro\Preprocessing-bike data\metro_area.csv').drop(columns = 'Unnamed: 0')
geometry = df['geometry'].map(shapely.wkt.loads)
df = df.drop('geometry', axis=1)
gdf = gpd.GeoDataFrame(df, geometry=geometry)
gdf = gdf.set_crs("EPSG:4326")
gdf_1 = gdf.to_crs('EPSG:32649')
gdf_1

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993)
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082)
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339)
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511)
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183)
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877)
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417)
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139)


In [74]:
gdf_loc_local = gpd.read_file(r"D:\bike\data\bus_station_shp\bus_station_point.shp")

gdf_loc_local = gdf_loc_local.set_crs("EPSG:4326")
gdf_loc_local_1 = gdf_loc_local.to_crs("EPSG:32649")

In [75]:
gdf_loc_local_1

Unnamed: 0,match,id_station,location,name,sequence,id,busstops,lng,lat,geometry
0,0,BV10242435,"114.118955,22.531607",火车站,1,440300014163,"[{'id': 'BV10242435', 'location': '114.118955,...",114.118955,22.531607,POINT (820840.162 2495018.869)
1,1,BV11042896,"114.118993,22.534839",人民南地铁站,2,440300014163,"[{'id': 'BV10242435', 'location': '114.118955,...",114.118993,22.534839,POINT (820836.595 2495377.088)
2,2,BV11161011,"114.118797,22.537426",罗湖小学2,3,440300014163,"[{'id': 'BV10242435', 'location': '114.118955,...",114.118797,22.537426,POINT (820810.433 2495663.332)
3,3,BV11354437,"114.118689,22.540595",国贸1,4,440300014163,"[{'id': 'BV10242435', 'location': '114.118955,...",114.118689,22.540595,POINT (820791.982 2496014.257)
4,4,BV10382979,"114.121866,22.547661",东门3,5,440300014163,"[{'id': 'BV10242435', 'location': '114.118955,...",114.121866,22.547661,POINT (821102.628 2496804.076)
...,...,...,...,...,...,...,...,...,...,...
26401,7,BV10245462,"113.962624,22.5196",滨海沙河东立交,8,440300065771,"[{'id': 'BV10243592', 'location': '113.93169,2...",113.962624,22.5196,POINT (804774.642 2493360.943)
26402,8,BV10245463,"113.98391,22.519501",滨海深湾立交,9,440300065771,"[{'id': 'BV10243592', 'location': '113.93169,2...",113.98391,22.519501,POINT (806966.024 2493393.563)
26403,9,BV11451848,"113.995796,22.522141",深圳湾公园地铁站,10,440300065771,"[{'id': 'BV10243592', 'location': '113.93169,2...",113.995796,22.522141,POINT (808183.709 2493710.553)
26404,10,BV10244085,"113.999962,22.524563",红树林,11,440300065771,"[{'id': 'BV10243592', 'location': '113.93169,2...",113.999962,22.524563,POINT (808607.165 2493987.516)


In [76]:
result1 = distance_local_cbd_shortest_dist(gdf_1, gdf_loc_local_1, GHP)

  return graph_ig.shortest_paths(


Calculated distance to local cbd based on shortest path


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd != np.inf] += dist_start + dist_end
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf.feature_distance_local_cbd[gdf.feature_distance_local_cbd == np.inf] = np_geom[:].distance(gdf_loc.geometry.iloc[0])


In [77]:
result1

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,cluster_la,functional,function_1,geometry,feature_distance_local_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,0,Rural,4,POINT (820809.311 2495070.993),0.007544
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,0,Rural,4,POINT (820748.669 2495450.082),0.005303
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,1,Center,1,POINT (820816.745 2495913.339),0.004076
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,1,Center,1,POINT (820603.435 2496413.511),0.004491
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,1,Center,1,POINT (819669.497 2496124.436),0.004284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,2,Sub-center,2,POINT (788339.380 2510107.183),29646.887310
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,3,Outskirt,3,POINT (789223.295 2504633.877),26669.343897
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,1,Center,1,POINT (793691.303 2499293.417),21062.909443
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,1,Center,1,POINT (796235.679 2497072.139),18311.929207


In [78]:
result2 = distance_local_cbd(gdf_1, gdf_loc_local_1)

In [79]:
result2

Unnamed: 0,linename,stationnam,sl,poiid,lg,sp,lon,lat,index_right,hex_id,...,match,id_station,location,name,sequence,id,busstops,lng,lat.1,distance_local_cbd
0,地铁1号线(罗宝线),罗湖,"114.118666,22.532083",BV10249978,2,luo hu,114.118666,22.532083,2414,30444,...,0,BV10249978,"114.118666,22.532083",罗湖,1,440300024064,"[{'id': 'BV10249978', 'location': '114.118666,...",114.118666,22.532083,0.000000
1,地铁9号线,人民南,"114.118154,22.535514",BV10447615,0,ren min nan,114.118154,22.535514,2414,30444,...,29,BV10447615,"114.118154,22.535514",人民南,30,440300024055,"[{'id': 'BV10724147', 'location': '113.895455,...",114.118154,22.535514,0.000000
2,地铁1号线(罗宝线),国贸,"114.118909,22.539680",BV10243546,2,guo mao,114.118909,22.539680,2415,30445,...,1,BV10243546,"114.118826,22.539671",国贸,2,440300024064,"[{'id': 'BV10249978', 'location': '114.118666,...",114.118826,22.539671,8.603417
3,地铁1号线(罗宝线),老街,"114.116939,22.544232",BV10249964,2,lao jie,114.116939,22.544232,2411,30441,...,2,BV10249964,"114.116241,22.544301",老街,3,440300024064,"[{'id': 'BV10249978', 'location': '114.118666,...",114.116241,22.544301,72.266946
4,地铁1号线(罗宝线),大剧院,"114.107811,22.541800",BV10246013,3,DaJuYuan,114.107811,22.541800,2410,30440,...,3,BV10246013,"114.107811,22.5418",大剧院,4,440300024064,"[{'id': 'BV10249978', 'location': '114.118666,...",114.107811,22.5418,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,地铁11号线,福永,"113.806076,22.673562",BV10249452,0,FuYong,113.806076,22.673562,1433,18097,...,7,BV10249452,"113.806076,22.673562",福永,8,440300024057,"[{'id': 'BV10527861', 'location': '113.819959,...",113.806076,22.673562,0.000000
230,地铁11号线,机场,"113.813663,22.624026",BV10527864,6,ji chang,113.813663,22.624026,944,10976,...,9,BV10527864,"113.813663,22.624026",机场,10,440300024057,"[{'id': 'BV10527861', 'location': '113.819959,...",113.813663,22.624026,0.000000
231,地铁11号线,碧海湾,"113.856093,22.575073",BV10527859,6,bi hai wan,113.856093,22.575073,1960,24808,...,10,BV10527859,"113.856093,22.575073",碧海湾,11,440300024057,"[{'id': 'BV10527861', 'location': '113.819959,...",113.856093,22.575073,0.000000
232,地铁11号线,宝安,"113.880396,22.554590",BV10527865,6,bao an,113.880396,22.554590,2574,33438,...,11,BV10527865,"113.880396,22.55459",宝安,12,440300024057,"[{'id': 'BV10527861', 'location': '113.819959,...",113.880396,22.55459,0.000000


In [80]:
result2.describe()

Unnamed: 0,lg,lon,lat,index_right,hex_id,cluster_la,function_1,feature_distance_local_cbd,index,match,sequence,id,distance_local_cbd
count,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0
mean,3.051282,114.025704,22.59205,1297.247863,16389.589744,1.726496,2.444444,14070.853391,25385.57265,14.641026,15.641026,475661600000.0,5.813486
std,2.178105,0.107286,0.074927,765.580802,9759.425288,1.049209,0.98873,11658.706679,2681.36399,9.630924,9.630924,122758600000.0,21.960551
min,0.0,113.806076,22.476626,12.0,362.0,0.0,1.0,0.004045,942.0,0.0,1.0,440300000000.0,0.0
25%,2.0,113.93763,22.539066,644.0,8253.0,1.0,2.0,2714.061644,25754.25,6.25,7.25,440300000000.0,0.0
50%,3.0,114.0421,22.563708,1355.0,18011.0,2.0,2.0,11443.767498,25824.5,13.0,14.0,440300000000.0,0.0
75%,5.0,114.104562,22.638668,1972.75,24844.75,3.0,3.0,25756.302559,25899.75,22.0,23.0,440300000000.0,0.0
max,7.0,114.277422,22.784486,2731.0,36496.0,3.0,4.0,38167.196805,25977.0,52.0,53.0,900000100000.0,218.110339


In [81]:
result2.to_csv(r'D:\bike\code\metro\Preprocessing-bike data\code\day_buffer\variable\distance_local_cbd_bus.csv',encoding = 'utf-8-sig')