In [1]:
#加载数据

def load_data(edge_file_path, node_file_path):
    import pandas as pd
    from random import sample
    import json
    import math
    from geopy.distance import geodesic
    
    # Open file and create dataframe
    file = open(edge_file_path,"r")
    edges = pd.DataFrame()

    while True:
        line = file.readline()
        if not line:
            break
        tokens = line.strip().split('\t')
        eid = int(tokens[0])
        start_node= int(tokens[1])
        end_node= int(tokens[2])

        locations = []
        for i in range(int(tokens[3])):
            locations.append([float(tokens[4+i*2]),float(tokens[4+i*2+1])])#location中是折现中每个节点的的坐标

        last_p = locations[0]
        dense_locations = []
        for p in locations:
            distance = geodesic((last_p[0],last_p[1]), (p[0],p[1])).m
            if distance > 0:
                dense_locations.append(last_p)#dense_location中是包括所有location以及其插值的点

            if distance > 100:
                split_count = math.ceil(distance / 100 ) 
                part_lat = (p[0] - last_p[0])/split_count
                part_lon = (p[1] - last_p[1])/split_count
                for i in range(1,split_count):
                    dense_locations.append([last_p[0]+i*part_lat,last_p[1]+i*part_lon])

            last_p=p
        dense_locations.append(locations[-1])

        total_distance = 0
        last_p = locations[0]
        for p in locations:
            distance = geodesic((last_p[0],last_p[1]), (p[0],p[1])).m   
            last_p=p
            total_distance = total_distance +  distance

        df = pd.DataFrame({
            'eid':[eid],
            'start_node':[start_node],
            'end_node':[end_node],
            'total_distance':total_distance,
            'locations': json.dumps(locations),
            'dense_locations': json.dumps(dense_locations)
        })
        edges = pd.concat([edges, df], ignore_index=True)
    file.close()

    # Load nodes
    nodes = pd.read_csv(node_file_path,sep = '\t',names=['nid','lat','lon'])
    
    return edges, nodes



edges, nodes = load_data("edgeOSM.txt","nodeOSM.txt")

In [2]:
# 生成图

def create_networkx_graph(edges, nodes):
    '''
    Creates a NetworkX DiGraph from a dataframe of edges and a dataframe of nodes
    
    Parameters:
    edges (pandas dataframe): dataframe of edges
    nodes (pandas dataframe): dataframe of nodes
    
    Returns:
    G (NetworkX DiGraph): a NetworkX DiGraph with edges and nodes added
    '''
    import networkx as nx
    
    G = nx.DiGraph()
    for i in range(len(edges)):
        row = edges.iloc[i,:]#取edges第i行的数据
        G.add_edge(row['start_node'],row['end_node'],
                   weight=row['total_distance'],
                   edge_id=int(row['eid']),
                   locations=row['locations'],
                   dense_locations=row['dense_locations'])
    for i in range(len(nodes)):
        row = nodes.iloc[i,:]
        G.add_node(row['nid'],
                   node_id=int(row['nid']),
                   lat=row['lat'],
                   lon=row['lon'])
    
    return G


G = create_networkx_graph(edges, nodes)

In [34]:
#http://localhost:8888/?token=d86c0c184d2d2117534b95627aa78009722d412cb16f3da0#随机路径

import random

def generate_path(graph):
    '''
    # 从随机点开始随机漫步生成简单路径
    '''
    start_node = random.choice(list(graph.nodes()))
    current_node = start_node
    path = [current_node]
    while True:
        neighbors = list(graph.neighbors(current_node))
        if not neighbors:
            print('走到路尽头')
            break
        unvisited_neighbors = [n for n in neighbors if n not in path]#未拜访的邻居
        if not unvisited_neighbors:
            print('没有没走过的点了')
            break
        next_node = random.choice(unvisited_neighbors)
        path.append(next_node)
        current_node = next_node
    return path

path = generate_path(G)#node的序号

print('节点数：{}，路径: {}'.format(len(path), path))

没有没走过的点了
节点数：22，路径: [902, 904, 905, 906, 907, 908, 909, 910, 917, 860, 858, 916, 1641, 1497, 1496, 915, 1639, 1623, 1624, 1640, 1643, 1644]


In [35]:
# 生成轨迹

import json
from geopy.distance import geodesic

def generate_traj(G, path):
    start = None
    end = None
    traj = [] 
    coord = []
    for i in range(1, len(path)):
        start = path[i-1]
        end = path[i]
        locations = json.loads(G[start][end]['dense_locations'])#取边上的插值后的坐标点
        edge_id = G[start][end]['edge_id']#取边的id
        edge_ids = [edge_id] * len(locations)#对于有n个插值点的边取n个边id
        locations = json.loads(G[start][end]['dense_locations'])
        if len(traj) == 0 :
            traj = edge_ids
            coord = locations
            continue
        del edge_ids[0] #删除第一个点（即上一段边的最后一个点）
        del locations[0]
        traj = traj + edge_ids
        coord = coord + locations
    
    dis = 0
    for i in range(1,len(coord)):
        n1 = coord[i-1]
        n2 = coord[i]
        dis += geodesic((n1[0],n1[1]), (n2[0],n2[1])).m
    return traj, coord, dis




traj, coord, dis = generate_traj(G, path)

print('距离：',dis)
print('坐标点个数：',len(traj))
import json
print(json.dumps(traj))
print(json.dumps(coord))

距离： 290.2461614965205
坐标点个数： 22
[1909, 1909, 821, 1898, 822, 1900, 823, 2942, 2867, 833, 785, 831, 1753, 4356, 1539, 1749, 1747, 1751, 1724, 1750, 1903, 1756]
[[40.5476783, -3.5503876], [40.5472896, -3.5504187], [40.5471567, -3.5504306], [40.5471102, -3.5504348], [40.5469994, -3.5504624], [40.5469594, -3.5504723], [40.546888, -3.5504923], [40.5467922, -3.5505438], [40.5468139, -3.5504678], [40.5468356, -3.5503918], [40.5468319, -3.5502282], [40.5468007, -3.5501506], [40.5467654, -3.5500999], [40.5466955, -3.5500325], [40.5466266, -3.5499957], [40.5465617, -3.5499841], [40.5467131, -3.5498365], [40.5469782, -3.5494188], [40.5468056, -3.5498249], [40.546778, -3.5499064], [40.546767, -3.549945], [40.5467602, -3.5500266]]


In [5]:
import random

def sample_lists(list1, list2, k):
    """
    从两个等长列表中采样元素，同样下标的一起采样出来，顺序保持原样。
    :param list1: List，第一个列表。
    :param list2: List，第二个列表。
    :param k: int，采样元素的数量。
    :return: Tuple，包含两个列表，每个列表中包含 k 个采样的元素。
    """
    if k <=2 :
        return list1, list2
    
    n = len(list1)    
    indices = random.sample(range(n), k=k)  # 从下标范围内随机采样 k 个下标

    # 保证顺序不变
    sorted_indices = sorted(indices)
    
    # 从两个列表中采样元素，并按照下标顺序存储到新的列表中
    sampled1 = [list1[i] for i in sorted_indices]
    sampled2 = [list2[i] for i in sorted_indices]

    return sampled1, sampled2

list1 = [1, 2, 3, 4, 5]
list2 = ['a', 'b', 'c', 'd', 'e']

sampled1, sampled2 = sample_lists(list1, list2, k=3)

print(sampled1)  # 输出：[2, 4, 5]
print(sampled2)  # 输出：['b', 'd', 'e']

[2, 3, 4]
['b', 'c', 'd']


In [6]:
# 路径采样
traj, coord = sample_lists(traj, coord, min(256,int(len(traj)*0.6)))

print("长度",len(traj))
print(json.dumps(traj))
print(json.dumps(coord))

长度 27
[4181, 2836, 2638, 1951, 2800, 2800, 2781, 2766, 2647, 1957, 2306, 1891, 2709, 2697, 2645, 2683, 2683, 2671, 2953, 2643, 2629, 2617, 2603, 2570, 2947, 2943, 2545]
[[40.5745115, -3.5094918], [40.5754143, -3.5092835], [40.5755938, -3.5092351], [40.5762532, -3.5090701], [40.5777559, -3.5086624], [40.5784176, -3.508495], [40.5795258, -3.5082032], [40.5796489, -3.5081725], [40.5798394, -3.5081345], [40.5804517, -3.5079711], [40.5804988, -3.5079592], [40.5814554, -3.507714], [40.5822709, -3.5075635], [40.5822896, -3.5075602], [40.58237, -3.5075494], [40.58385745, -3.5073406], [40.5843366, -3.5072762], [40.5853783, -3.5071154], [40.5858032, -3.5070702], [40.5859552, -3.5070635], [40.5862493, -3.5071127], [40.5866636, -3.5072934], [40.5870873, -3.5075509], [40.587507, -3.5077585], [40.5876703, -3.5078037], [40.5880063, -3.5078427], [40.5881369, -3.5078298]]


In [7]:
#噪声

import numpy as np

def add_gaussian_noise(pos, sigma):
    """
    给定一个地理坐标添加零均值高斯噪声

    pos: 地理坐标，形如 [longitude, latitude]
    sigma: 高斯噪声的标准差，大概可以认为是便宜的距离（米)

    返回添加噪声后的新坐标，形如 [new_longitude, new_latitude]
    """
    # 生成高斯噪声
    noise = np.random.normal(0, sigma, 2)
    
    # 将地理坐标转换为米制坐标，这里简化为近似计算
    x = pos[0] * 111000
    y = pos[1] * 111000
    
    # 对坐标点添加噪声
    x_noisy = x + noise[0]
    y_noisy = y + noise[1]
    
    # 将米制坐标转换为地理坐标
    new_longitude = x_noisy / 111000
    new_latitude = y_noisy / 111000
    
    return [new_longitude, new_latitude]


noisy_coord = [add_gaussian_noise(c, np.random.randint(10, 25)) for c in coord]
print(json.dumps(noisy_coord))

[[40.57473427090424, -3.509334310599005], [40.57536818067149, -3.5092866508727], [40.57545962894158, -3.509165178621186], [40.57635891656375, -3.5089058377658113], [40.577889813213474, -3.5089184772610613], [40.5783516710605, -3.508536598043174], [40.579907264726785, -3.5081753049362856], [40.57969977844731, -3.5083692047493185], [40.57965453177172, -3.5079773652907713], [40.58037455167919, -3.507629917998255], [40.580472828045515, -3.507820676002099], [40.58140510110583, -3.5080268752436967], [40.58233302713317, -3.507440528869762], [40.58197339655893, -3.5075011020958384], [40.582405093540885, -3.5078371397152837], [40.58381047210786, -3.5075286458005803], [40.584272445869075, -3.5073646738091058], [40.58553877067066, -3.5072944489894105], [40.58594230916487, -3.5071870760591], [40.585819551538535, -3.5070593095835476], [40.58629989839638, -3.507234243667552], [40.586793983385945, -3.5073113298197467], [40.58698111136293, -3.5074274604878175], [40.58799711920431, -3.50778989601342], 

In [8]:
# 一步到位 生成一条路径及其对应轨迹和坐标信息

path = generate_path(G)  # 路径
traj, coord, dis = generate_traj(G, path) # 详细轨迹
sampled_traj, sampled_coord = sample_lists(traj, coord, min(256,int(len(traj)*0.6))) #采样
noisy_coord = [add_gaussian_noise(c, np.random.randint(10, 25)) for c in sampled_coord]  #加噪

print('距离：',dis)
print('坐标点个数：',len(traj))
print("采样后个数：",len(sampled_traj))
print(json.dumps(traj))
print(json.dumps(sampled_traj))
print(json.dumps(sampled_coord))
print(json.dumps(noisy_coord))

距离： 2515.0043096784184
坐标点个数： 63
采样后个数： 37
[3604, 3604, 2036, 2036, 2034, 4031, 4029, 1376, 4032, 1375, 1374, 1371, 4033, 1368, 4034, 1364, 1362, 1362, 4035, 3510, 1357, 224, 217, 4023, 3502, 226, 3507, 3500, 1352, 3498, 1348, 220, 222, 1340, 4004, 3496, 3494, 1337, 4005, 3987, 1323, 3988, 3989, 4036, 4000, 1311, 1308, 4017, 1309, 1312, 4015, 1314, 3601, 1317, 3996, 1324, 3998, 212, 214, 214, 1365, 1365, 4027]
[3604, 3604, 2036, 4031, 4029, 1376, 4032, 1375, 1374, 4033, 4034, 1362, 1357, 224, 217, 4023, 226, 3507, 3500, 3498, 220, 222, 4004, 3496, 3494, 3988, 3989, 4036, 1311, 1308, 4017, 1312, 4015, 1317, 3996, 1324, 1365]
[[40.5462305, -3.4737381], [40.5454283, -3.4734806], [40.5443901, -3.473137], [40.5434371, -3.4728082], [40.5426414, -3.4726064], [40.5421806, -3.472469], [40.542032, -3.4724386], [40.5418667, -3.4724254], [40.5417214, -3.4724179], [40.5413399, -3.4723226], [40.5408501, -3.4722148], [40.5391683, -3.4718771], [40.5392901, -3.4716041], [40.5393191, -3.4714826], [40.53

In [23]:
# load from file
locations=[]
import json
from geopy.distance import geodesic

# file = open("data5.txt","r")
# while True:
#     line = file.readline()
#     if not line:
#         break
#     tokens = line.strip().split(' ')
#     locations.append([float(tokens[0]),float(tokens[1])])
# file.close()

coords = noisy_coord

#predict = map_mapping(transformer, coords)
#full_predict = gen_full_edges(predict)

#print(json.dumps(full_predict))
#print('预测路径边数 -> ',len(full_predict))
print(type(coords))
print(len(coords))
print(coords[0])


<class 'list'>
37
[40.54653225487297, -3.4736978538962284]


In [20]:
import folium
# define the world map
world_map = folium.Map(location=coords[0],zoom_start=13)

#画原始轨迹
for i in range(1,len(coords)):
    points = [coords[i-1], coords[i]]
    folium.PolyLine(points, color='red', weight=2).add_to(world_map)
    
    
world_map

In [21]:
world_map.save('test_01.html')