In [None]:
import pandas as pd

In [None]:
traj = pd.read_csv('./input/synthetic_data_100000.csv',usecols=['lon','lat','uid','time'])
traj = traj[traj['uid']==0]
traj.sort_values(by=['uid','time'],inplace=True)
traj = traj.dropna()
start_time = pd.to_datetime('2023-07-12')
traj['time'] = pd.to_datetime(traj['time'])
traj = traj[traj['time']>=start_time]
traj['timestamp'] = (traj['time'] - start_time).dt.total_seconds()/60
traj = traj[~((traj['timestamp']%(24*60)==0)&(traj['timestamp']/(24*60)>0))]
traj['elat'] = traj.groupby('uid')['lat'].shift(-1)
traj['elon'] = traj.groupby('uid')['lon'].shift(-1)
traj = traj.dropna()
traj['traj'] = traj.apply(lambda x: str(x['lon'])+str(x['lat'])+str(x['elon'])+str(x['elat']),axis=1)
traj.drop_duplicates(subset=['traj','uid'],keep='first',inplace=True)
traj.drop(columns=['traj'],inplace=True)
traj

In [None]:
import hashlib
def generate_unique_key(slon, slat, elon, elat):
    coordinates_str = f"{slon},{slat},{elon},{elat}"
    return hashlib.md5(coordinates_str.encode('utf-8')).hexdigest()
traj['md5'] = traj.apply(lambda x: generate_unique_key(x['lon'],x['lat'],x['elon'],x['elat']),axis=1)
traj['index'] = [i for i in range(len(traj))]
route_list = [0 for i in range(len(traj))]
route_index_dict = {row['md5']:row['index'] for _ , row in traj.iterrows()}


In [None]:
import osmnx as ox
import warnings
warnings.filterwarnings('ignore')
G = ox.load_graphml('./data/shanghai_road.graphml')
hwy_speeds={"residential": 35, "secondary": 50,
                    'primary': 50, "tertiary": 60}
 # 设置路网
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)  # 将路网转换为GeoDataFrame
# 连接两个节点只保留一条边
gdf_edges = gdf_edges.reset_index().drop_duplicates(
    subset=['u', 'v'], keep='first')
gdf_edges['key'] = 0
gdf_edges = gdf_edges.set_index(['u', 'v', 'key'])
# 重新构建路网
# 图属性,这里使用了之前的路网数据的图属性
G = ox.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=G.graph)
# 路网最大强连通子图
G = ox.utils_graph.get_largest_component(G,
                                            strongly=True  # 是否强连通
                                            )
# 计算每条边出行时长
G = ox.add_edge_speeds(G, hwy_speeds)
G = ox.add_edge_travel_times(G)

import numpy as np
def find_travel_path(G, slon, slat, elon, elat, md5 , shortest=1, weight='travel_time'):
    # 找到两点间的最短出行路径
    # 由给定的坐标获取最近节点
    orig = ox.distance.nearest_nodes(G, X=slon, Y=slat)
    dest = ox.distance.nearest_nodes(G, X=elon, Y=elat)
    if shortest == 1:
        # 找到最短路径
        travel_route = ox.shortest_path(
            G, orig, dest, weight=weight)
    if shortest > 1:
        # 前k最短路径中选择一个
        routes = ox.k_shortest_paths(
            G, orig, dest, k=shortest, weight=weight)
        routes = list(routes)
        travel_route = routes[np.random.choice(range(len(routes)))]
    # 获取路径上的行驶时间
    travel_time = ox.utils_graph.get_route_edge_attributes(
        G, travel_route, attribute='travel_time')
    length = ox.utils_graph.get_route_edge_attributes(
        G, travel_route, attribute='length')
    # 将路径和行驶时间组合成字典
    route = {
        'travel_route': travel_route,
        'travel_time': travel_time,
        'length': length,
        'has_path': len(travel_route) > 1,
        'md5': md5}
    route_list[route_index_dict[md5]] = route
    

In [None]:
traj.apply(lambda row: find_travel_path(G,row['lon'],row['lat'],row['elon'],row['elat'],row['md5']),axis=1)

In [None]:
import json
import pickle
def save_cache_file(cache_dir, route_list, route_index_dict):
    route_json = json.dumps(route_list)
    route_index_json = json.dumps(route_index_dict)
    with open(cache_dir+'route_cache.pkl','wb') as file:
        pickle.dump(route_json, file)
    with open(cache_dir+'route_index_cache.pkl','wb') as file:
        pickle.dump(route_index_json,file)
save_cache_file('./cache/',route_list,route_index_dict)

267195120