In [38]:
import pandas as pd
import numpy as np
from math import radians, atan2, degrees, sin, cos, sqrt

# 地球半径，单位：km
EARTH_RADIUS = 6371.0


def calculate_bearing(dep_lat, dep_lng, arr_lat, arr_lng):
    # 将经纬度转换为弧度
    dep_lat = radians(dep_lat)
    dep_lng = radians(dep_lng)
    arr_lat = radians(arr_lat)
    arr_lng = radians(arr_lng)

    # 计算经纬度差
    d_lng = arr_lng - dep_lng

    # 计算目标点在北极轴上的投影
    x = cos(arr_lat) * sin(d_lng)
    y = cos(dep_lat) * sin(arr_lat) - sin(dep_lat) * cos(arr_lat) * cos(d_lng)

    # 计算方位角
    bearing = atan2(x, y)

    # 将弧度转换为角度
    bearing = degrees(bearing)

    # 确保方位角在0到360度之间
    bearing = (bearing + 360) % 360

    return bearing


# 读取 CSV 文件
df = pd.read_csv("data\\sh_price_info_with_distance.csv")

# 计算每行的运行方向
#df['direction'] = np.arctan2(df['dep_lng'] - df['arr_lng'], df['arr_lat'] - df['dep_lat']) * (180 / np.pi)

# 计算每行的运行方向
directions = []
for index, row in df.iterrows():
    direction = calculate_bearing(
        row['dep_lat'], row['arr_lng'], row['arr_lat'], row['dep_lng'])
    directions.append(direction)

# 将运行方向添加到DataFrame中
df['direction'] = directions


# 将方向值四舍五入到最近的45度
df['direction'] = np.round(df['direction'] / 45) * 45

# 将运行方向进行调整，使得范围在0到360度之间，且正北方向是0度，东方向是90度
df['direction'] = (df['direction'] + 360) % 360

# 处理360度，将其视为0度
df['direction'] = df['direction'] % 360

# 如果方向是360度，则将其转换为0度
df.loc[df['direction'] == 360, 'direction'] = 0

# 将运行方向进行取整处理
# df['direction'] = (np.round(df['direction'] / 45) * 45).astype(int)
#df = df[df['arrival_station'] == "沈阳"]


df['distance_km'] = np.round(df['distance_km'] / 200) 

#df = df[df['distance_km'] >5]
#df = df[df['direction'] ==0]

# 显示结果
df = df[['direction', 'distance_km']]

df

Unnamed: 0,direction,distance_km
0,45.0,2.0
1,45.0,5.0
2,45.0,2.0
3,45.0,1.0
4,45.0,4.0
...,...,...
6131,90.0,1.0
6132,45.0,3.0
6133,45.0,3.0
6134,135.0,6.0


In [39]:
# 对DataFrame进行分组计数
import json


# 将距离大于8的值设为8
df.loc[df['distance_km'] > 8, 'distance_km'] = 8

grouped = df.groupby(['distance_km', 'direction']).size(
).reset_index(name='count')

# 构建JSON数据
json_data = {}
for distance in range(int(df['distance_km'].max()) + 1):
    json_data[distance] = {}
    for direction in [0, 45, 90, 135,180,225,270,315]:
        count = grouped[(grouped['distance_km'] == distance) & (
            grouped['direction'] == direction)]['count'].sum()
        json_data[distance][direction] = count



json_data

{0: {0: 0, 45: 124, 90: 365, 135: 288, 180: 16, 225: 0, 270: 0, 315: 0},
 1: {0: 8, 45: 761, 90: 527, 135: 538, 180: 156, 225: 0, 270: 0, 315: 0},
 2: {0: 26, 45: 198, 90: 256, 135: 221, 180: 185, 225: 0, 270: 0, 315: 0},
 3: {0: 43, 45: 295, 90: 176, 135: 137, 180: 79, 225: 0, 270: 0, 315: 0},
 4: {0: 5, 45: 256, 90: 102, 135: 146, 180: 20, 225: 0, 270: 0, 315: 0},
 5: {0: 87, 45: 176, 90: 74, 135: 80, 180: 0, 225: 0, 270: 0, 315: 0},
 6: {0: 35, 45: 32, 90: 128, 135: 77, 180: 0, 225: 0, 270: 0, 315: 0},
 7: {0: 21, 45: 15, 90: 109, 135: 18, 180: 0, 225: 0, 270: 0, 315: 0},
 8: {0: 10, 45: 42, 90: 100, 135: 14, 180: 0, 225: 0, 270: 0, 315: 0}}

In [3]:
import json
import pandas as pd

# 读取CSV文件
df = pd.read_csv('data/sh_price_info_with_distance.csv')

# 初始化JSON结构的字典
train_data = {}

# 遍历每行数据
for index, row in df.iterrows():
    dep_station = row['departure_station']
    arr_station = row['arrival_station']

    # 如果departure_station还未在字典中，添加它
    if dep_station not in train_data:
        train_data[dep_station] = {}

    # 如果arrival_station还未在当前departure_station的字典中，添加它
    if arr_station not in train_data[dep_station]:
        train_data[dep_station][arr_station] = {
             'count': 0}

    # 递增计数
    train_data[dep_station][arr_station]['count'] += 1

# 将字典转换为JSON格式
json_output = json.dumps(train_data, ensure_ascii=False,)

json_output

'{"上海": {"蚌埠": {"count": 21}, "北京": {"count": 3}, "滁州北": {"count": 9}, "常州": {"count": 91}, "德州": {"count": 10}, "静海": {"count": 2}, "济南": {"count": 4}, "昆山": {"count": 13}, "广阳": {"count": 4}, "明光": {"count": 3}, "南京": {"count": 69}, "宿州": {"count": 8}, "青县": {"count": 2}, "苏州": {"count": 105}, "滕州": {"count": 3}, "天津西": {"count": 4}, "无锡": {"count": 100}, "徐州": {"count": 23}, "杨村": {"count": 1}, "南通": {"count": 2}, "海安": {"count": 11}, "上海西": {"count": 11}, "姜堰": {"count": 4}, "泰州": {"count": 8}, "扬州": {"count": 5}, "南通西": {"count": 9}, "江都": {"count": 3}, "如皋": {"count": 4}, "盐城": {"count": 9}, "东台": {"count": 4}, "淮安东": {"count": 18}, "宿迁": {"count": 8}, "徐州东": {"count": 13}, "建湖": {"count": 4}, "泗阳": {"count": 6}, "北京南": {"count": 6}, "开封": {"count": 9}, "洛阳": {"count": 9}, "商丘": {"count": 9}, "西安": {"count": 10}, "郑州": {"count": 15}, "宝鸡": {"count": 7}, "甘谷": {"count": 3}, "陇西": {"count": 3}, "兰州": {"count": 7}, "天水": {"count": 6}, "咸阳": {"count": 3}, "杨陵": {"count": 3}, "董家口": {