# 这个文件用来生成dataset的坐标和target部分

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point


In [2]:
df1 = pd.read_csv('dataset/02_NSW_DC_fast_chargers_2023_08-22.csv')
df2 = pd.read_csv('dataset/02_NSW_destination_chargers_2023_08_16.csv',encoding='ISO-8859-1')

In [3]:
df3 = pd.read_csv('dataset/002_neg_point.csv')

In [4]:
df4 = pd.read_csv('dataset/02_approved_Charger_list_v1.csv')
df5 = pd.read_csv('dataset/002_Type5.csv')

In [5]:
boundary_df = gpd.read_file('dataset/boundary.geojson')

###### step1. 整合正类数据，即存在于02_NSW_DC_fast_chargers_2023_08-22.csv(Type1)和02_NSW_destination_chargers_2023_08_16.csv(Type2)的坐标点

In [6]:
df1 = df1[['Latitude', 'Longitude']]
df1['Type'] = 1

df2 = df2[['Latitude', 'Longitude']]
df2['Type'] = 2

result1 = pd.concat([df1, df2], ignore_index=True)

In [7]:
result1

Unnamed: 0,Latitude,Longitude,Type
0,-34.508903,144.842785,1
1,-34.746497,146.550076,1
2,-35.354863,145.725113,1
3,-34.639541,143.564708,1
4,-33.923735,147.203554,1
...,...,...,...
1043,-35.537269,144.966563,2
1044,-31.960975,141.462880,2
1045,-34.451028,150.447443,2
1046,-33.688731,149.564345,2


###### step2. 整合正类数据和负类数据，负类数据存在于05_neg_point.csv(Type3)

In [8]:
result2 = pd.concat([result1, df3], ignore_index=True)

In [9]:
result2

Unnamed: 0,Latitude,Longitude,Type
0,-34.508903,144.842785,1
1,-34.746497,146.550076,1
2,-35.354863,145.725113,1
3,-34.639541,143.564708,1
4,-33.923735,147.203554,1
...,...,...,...
1558,-30.484001,144.869469,3
1559,-30.378665,144.918849,3
1560,-30.376385,144.968866,3
1561,-30.298122,144.945973,3


###### step3. 加上需要预测的坐标点，Type记为4(Approved)，5(自己找的)

In [10]:
df_temp = df4[['lat', 'lng']]
df_temp['Type'] = 4
df_temp = df_temp.rename(columns={'lat': 'Latitude', 'lng': 'Longitude'})

df_temp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['Type'] = 4


Unnamed: 0,Latitude,Longitude,Type
0,-32.924818,151.751885,4
1,-33.278848,151.405538,4
2,-33.376148,151.473669,4
3,-33.796629,150.848800,4
4,-33.957864,151.139995,4
...,...,...,...
81,-31.909129,152.460275,4
82,-29.056524,152.018188,4
83,-35.130380,147.341831,4
84,-34.843652,148.914140,4


In [11]:
result3 = pd.concat([result2, df_temp], ignore_index=True)
result3

Unnamed: 0,Latitude,Longitude,Type
0,-34.508903,144.842785,1
1,-34.746497,146.550076,1
2,-35.354863,145.725113,1
3,-34.639541,143.564708,1
4,-33.923735,147.203554,1
...,...,...,...
1644,-31.909129,152.460275,4
1645,-29.056524,152.018188,4
1646,-35.130380,147.341831,4
1647,-34.843652,148.914140,4


In [None]:
result3 = pd.concat([result3,df5], ignore_index=True)

###### step3. 对坐标点进行筛选，过滤掉不属于NSW州的坐标点

In [17]:
# 转换DataFrame到GeoDataFrame
gdf = gpd.GeoDataFrame(result3, geometry=gpd.points_from_xy(result3.Longitude, result3.Latitude))

# 使用within方法来检查点是否在NSW边界内
gdf['is_in_nsw'] = gdf.apply(lambda row: boundary_df.contains(row.geometry).any(), axis=1)


In [18]:
gdf

Unnamed: 0,Latitude,Longitude,Type,geometry,is_in_nsw
0,-34.508903,144.842785,1,POINT (144.84278 -34.50890),True
1,-34.746497,146.550076,1,POINT (146.55008 -34.74650),True
2,-35.354863,145.725113,1,POINT (145.72511 -35.35486),True
3,-34.639541,143.564708,1,POINT (143.56471 -34.63954),True
4,-33.923735,147.203554,1,POINT (147.20355 -33.92374),True
...,...,...,...,...,...
1644,-31.909129,152.460275,4,POINT (152.46028 -31.90913),True
1645,-29.056524,152.018188,4,POINT (152.01819 -29.05652),True
1646,-35.130380,147.341831,4,POINT (147.34183 -35.13038),True
1647,-34.843652,148.914140,4,POINT (148.91414 -34.84365),True


In [19]:
gdf2= gdf[gdf['is_in_nsw']!=True]
gdf2

Unnamed: 0,Latitude,Longitude,Type,geometry,is_in_nsw
1073,-35.472848,148.954759,3,POINT (148.95476 -35.47285),False
1092,-35.83432,149.005869,3,POINT (149.00587 -35.83432),False
1093,-35.155038,149.102642,3,POINT (149.10264 -35.15504),False
1094,-35.203094,149.004652,3,POINT (149.00465 -35.20309),False
1095,-35.235115,148.950757,3,POINT (148.95076 -35.23512),False
1096,-35.275124,148.885839,3,POINT (148.88584 -35.27512),False
1097,-35.302602,148.864424,3,POINT (148.86442 -35.30260),False
1098,-35.313378,148.925184,3,POINT (148.92518 -35.31338),False
1099,-35.322869,148.913968,3,POINT (148.91397 -35.32287),False
1351,-36.875812,48.398102,3,POINT (48.39810 -36.87581),False


In [20]:
# 筛选在NSW州内的点
gdf_in_nsw = gdf[gdf['is_in_nsw']]

# 删除不需要的'is_in_nsw'列
gdf_in_nsw = gdf_in_nsw.drop(columns=['is_in_nsw','geometry'])

# 现在 gdf_in_nsw 只包含在NSW边界内的点
gdf_in_nsw

Unnamed: 0,Latitude,Longitude,Type
0,-34.508903,144.842785,1
1,-34.746497,146.550076,1
2,-35.354863,145.725113,1
3,-34.639541,143.564708,1
4,-33.923735,147.203554,1
...,...,...,...
1644,-31.909129,152.460275,4
1645,-29.056524,152.018188,4
1646,-35.130380,147.341831,4
1647,-34.843652,148.914140,4


In [21]:
gdf_in_nsw.to_csv('dataset/902_coordinate.csv', index=False)