# 为什么选择单车停放热点区域？

在共享单车热点区域识别中，共识别了三类热点区域，包括：取车热点、还车热点和单车停放热点。

取车热点、还车热点仅在早晚高峰存在，而单车停放热点则在24小时均有分布，满足连续观测的条件。

7天×18小时×n=23605个单车停放热点区域，其中有些区域在多个时段均出现，有些区域仅出现在个别时段，有些热点区域的形态随时间而扩张或收缩......

此文档将解决如何将这23605个热点区域Polygon进行聚合的问题。

# 数据读取

In [1]:
import pandas as pd
import geopandas
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os

#获取文件名字
files = os.listdir(r'result\parkinghotspot')

#读取文件
data = []
for i in files:
    print(i)
    tmp = geopandas.read_file(r'result\parkinghotspot\\'+i)
    tmp['from'] = i     #保留文件名groupby
    data.append(tmp)

data = pd.concat(data)

ph0510.json
ph0511.json
ph0512.json
ph056.json
ph057.json
ph058.json
ph059.json


In [3]:
datas = data[['from', 'time', 'group', 'radius', 'x', 'y']]
datas['geometry'] = geopandas.points_from_xy(datas['x'], datas['y'])
datas = geopandas.GeoDataFrame(datas)

datas = datas.set_crs("EPSG:4326")
datas['x'] = datas.to_crs("EPSG:2416")['geometry'].x
datas['y'] = datas.to_crs("EPSG:2416")['geometry'].y

In [4]:
datas.head()

Unnamed: 0,from,time,group,radius,x,y,geometry
0,ph0510.json,2019-05-10T06:00:00,0,520.13,40641380.0,3460555.0,POINT (121.48495 31.25787)
1,ph0510.json,2019-05-10T06:00:00,1,379.71,40632860.0,3451007.0,POINT (121.39357 31.17259)
2,ph0510.json,2019-05-10T06:00:00,2,286.73,40631780.0,3461918.0,POINT (121.38436 31.27128)
3,ph0510.json,2019-05-10T06:00:00,3,2156.79,40636130.0,3457851.0,POINT (121.42951 31.23411)
4,ph0510.json,2019-05-10T06:00:00,4,884.73,40644340.0,3456942.0,POINT (121.51485 31.22473)


In [10]:
len(datas)

23605

# 聚合

## DBSCAN

In [6]:
test = datas.copy()

In [7]:
from sklearn.cluster import DBSCAN

model = DBSCAN(eps=50, min_samples=3)

model.fit(test[['x', 'y']].values)

# 得到DBSCAN预测的分类便签
labels = model.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

test['group'] = labels

print('number of clusters: %d' % n_clusters_)

number of clusters: 922


In [8]:
test = test[test['group']!=-1]

# 各热点区域的中心点经纬度，同 test.centroid.x, test.centroid.y
test['x'] = test['geometry'].x
test['y'] = test['geometry'].y

In [9]:
test

Unnamed: 0,from,time,group,radius,x,y,geometry
0,ph0510.json,2019-05-10T06:00:00,0,520.13,121.484954,31.257873,POINT (121.48495 31.25787)
1,ph0510.json,2019-05-10T06:00:00,1,379.71,121.393573,31.172592,POINT (121.39357 31.17259)
2,ph0510.json,2019-05-10T06:00:00,2,286.73,121.384359,31.271281,POINT (121.38436 31.27128)
4,ph0510.json,2019-05-10T06:00:00,3,884.73,121.514846,31.224733,POINT (121.51485 31.22473)
5,ph0510.json,2019-05-10T06:00:00,4,401.05,121.433303,31.164949,POINT (121.43330 31.16495)
...,...,...,...,...,...,...,...
3394,ph059.json,2019-05-09T23:00:00,155,152.29,121.381150,31.173798,POINT (121.38115 31.17380)
3395,ph059.json,2019-05-09T23:00:00,150,178.41,121.424136,31.161319,POINT (121.42414 31.16132)
3396,ph059.json,2019-05-09T23:00:00,105,152.62,121.426305,31.163320,POINT (121.42630 31.16332)
3397,ph059.json,2019-05-09T23:00:00,864,167.17,121.437512,31.179207,POINT (121.43751 31.17921)


In [11]:
test.to_file(r'result\CenterPoint.json', driver='GeoJSON')

## 用各簇的中心点、当量半径绘制圆形区域

In [14]:
test.groupby(['group'])['radius'].quantile(0.5).reset_index()

Unnamed: 0,group,radius
0,0,537.15
1,1,354.80
2,2,350.95
3,3,886.77
4,4,379.39
...,...,...
917,917,159.02
918,918,1480.56
919,919,181.00
920,920,285.31


In [15]:
test.groupby(['group'])['radius'].mean().reset_index()

Unnamed: 0,group,radius
0,0,533.825714
1,1,325.498171
2,2,344.281681
3,3,891.441333
4,4,369.523077
...,...,...
917,917,162.506000
918,918,1472.403333
919,919,180.420000
920,920,305.910000


In [13]:
# 各簇的中心点
cenx = test.groupby(['group'])['x'].mean().reset_index()
ceny = test.groupby(['group'])['y'].mean().reset_index()

# 各簇的平均当量半径
# 这里是否要使用中位数而不是平均值？平均值受极值影响较大
radius = test.groupby(['group'])['radius'].mean().reset_index()

In [14]:
from shapely.geometry import Point,Polygon,shape


res = pd.concat([cenx, ceny[['y']], radius[['radius']]], axis=1)
res = geopandas.GeoDataFrame(res)
res['geometry'] = geopandas.points_from_xy(res['x'], res['y'])
res = res.set_crs('EPSG:4326')
res = res.to_crs('EPSG:2416')

res['x'] = res.to_crs("EPSG:2416")['geometry'].x
res['y'] = res.to_crs("EPSG:2416")['geometry'].y

geometry = res.apply(lambda r: r['geometry'].buffer(r['radius']), axis=1)

res['geometry'] = geometry
res = res.to_crs('EPSG:4326')

In [15]:
res.to_file(r'result\researchObject_overlap.json', driver='GeoJSON')

## 去重

researchObject_overlap.json --> researchObject.geojson