In [None]:
import geopandas as gpd
import pandas as pd
import folium
import pyproj
from shapely.geometry import Point
import shapely
import matplotlib.pyplot as plt
import re
from tqdm.notebook import tqdm
import numpy as np
import seaborn as sns

In [None]:
import matplotlib.font_manager as fm
import matplotlib as mpl

fm.fontManager.addfont('config/NanumSquareR.ttf')
prop = fm.FontProperties(fname='config/NanumSquareR.ttf')

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = prop.get_name()
plt.rcParams['axes.unicode_minus'] = False

box_kwargs = dict(boxprops=dict(alpha=0.5), fliersize = 0)
strip_kwargs = dict(size=10)
bar_kwargs = dict(capsize = 0.1)
cormap_kwargs = dict(annot = True, cmap='coolwarm', vmin=-1, vmax=1)

In [None]:
# 프로젝션 정의
xy4326 = pyproj.CRS('epsg:4326')  
xy5179 = pyproj.CRS('epsg:5179')  
xy5186 = pyproj.CRS('epsg:5186')

# Transformer 생성
transformer = pyproj.Transformer.from_crs(xy5186, xy4326)
# transformer_86 = pyproj.Transformer.from_crs(xy5186, xy5186)
transformer_inv = pyproj.Transformer.from_crs(xy4326, xy5186)

In [None]:
processing_data = pd.read_csv('data/test_data.csv')

In [None]:
transformer.transform(processing_data.loc[0,['x','y']].values[0], processing_data.loc[0,['x','y']].values[1])

In [None]:
processing_final = processing_data.assign(
    latitude = lambda df: df.apply(lambda row: transformer.transform(row.x, row.y)[0], axis=1),
    logitude = lambda df: df.apply(lambda row: transformer.transform(row.x, row.y)[1], axis=1),
)
processing_geo = gpd.GeoDataFrame(processing_final.assign(geometry = lambda df:df.apply(lambda row:shapely.geometry.Point((row.x, row.y)), axis=1)))
sgg_select = processing_geo.filter(regex='시도|시군구').drop_duplicates()\
.apply(lambda row: f"{row['시도']} {row['시군구'].split()[-1]}" if "부천" not in row['시군구'] 
       else f"{row['시도']} {' '.join(row['시군구'].split()[1:])}" , axis=1).tolist()

In [None]:
seoul_geo = gpd.read_file('data/seoul_geo/LARD_ADM_SECT_SGG_11_202405.shx', encoding = 'cp949').query("SGG_NM in @sgg_select")
gyeonggi_geo = gpd.read_file('data/gyeonggi_geo/LARD_ADM_SECT_SGG_41_202405.shx', encoding = 'cp949').query("SGG_NM in @sgg_select")
incheon_geo = gpd.read_file('data/incheon_geo/LARD_ADM_SECT_SGG_28_202405.shx', encoding = 'cp949').query("SGG_NM in @sgg_select")

In [None]:
total_geo = shapely.union_all([shapely.union_all(seoul_geo.geometry.values), shapely.union_all(incheon_geo.geometry.values), shapely.union_all(gyeonggi_geo.geometry.values)])

In [None]:
sgg_select

In [None]:
def find_coordinates(address, search_type ='road'):
    import time
    import requests
    # time.sleep(1)
    apiurl = "https://api.vworld.kr/req/address?"
    params = {
    	"service": "address",
    	"request": "getcoord",
    	"crs": "epsg:5186",
    	"address": address,
    	"format": "json",
    	"type": search_type,
    	"key": "9D2B20D9-1DAC-36BA-9D42-01A1AD1F49A4"
    }
    response = requests.get(apiurl, params=params)
    if response.status_code == 200:
        result = response.json()['response']
        # print(result)
        if 'result' in result.keys():
            return result['result']['point']['x'], result['result']['point']['y']
        else:
            print(address)
            return 0,0
    else:
        print(response.status_code)
        print(address)
        return 0,0

In [None]:
find_coordinates('서울특별시 동대문구 왕산로 지하205(전농동)')

In [None]:
'|'.join(['서울.*'+i.split()[-1] for i in sgg_select if '서울' in i]+['인천.*'+i.split()[-1] for i in sgg_select if '인천' in i]+['부천'])

In [None]:
search_regex = '|'.join(['서울.*'+i.split()[-1] for i in sgg_select if '서울' in i]+['인천.*'+i.split()[-1] for i in sgg_select if '인천' in i]+['부천'])
station_data = pd.read_excel('data/전체_도시철도역사정보_20240630.xlsx').filter(regex='^역사명$|^노선명$|역사도로명주소')\
.assign(
    address = lambda df: df['역사도로명주소'].apply(lambda x: re.sub('\(.*\)', '',x)),
    YN= lambda df: df['역사도로명주소'].apply(lambda x: True if re.search(search_regex, x) else False),
    YN2= lambda df: df['노선명'].apply(lambda x: False if re.search('|'.join(['대구', '부산', '광주', '대전']), x) else True)
    ).query('YN & YN2').sort_values('노선명').groupby('역사명').first().reset_index()

In [None]:
coords = [find_coordinates(address) for address in tqdm(station_data['address'].values)]

In [None]:
error_case = """인천광역시 남동구 고잔동 970-14
인천광역시 계양구 게양대로 73
인천광역시 서구 에코로 163""".split('\n')
# +"""
# """.split('\n') # 추가 에러(502) 발생역

In [None]:
station_data.query('역사도로명주소 in @error_case')

In [None]:

add_coordis = pd.DataFrame(dict(역사명=['남동인더스파크역', '작전', '청라국제도시'], 
                                coordi=[find_coordinates(i, 'parcel') for i in 
                                        ['인천광역시 남동구 고잔동 970-14', '인천 계양구 작전동 878', '인천 서구 청라동 7-5']]))\
.assign(
    x=lambda df: df.coordi.apply(lambda k: k[0]),
    y=lambda df: df.coordi.apply(lambda k: k[1])
).drop(columns='coordi').set_index('역사명')

In [None]:
add_coordis

In [None]:
df_coordi = pd.DataFrame(dict(역사명 = station_data['역사명'], 
                              x = [i[0] if i[0]!=0 else np.nan for i in coords], 
                              y = [i[1] if i[0]!=0 else np.nan for i in coords])).set_index('역사명')


In [None]:
station_final= station_data.merge(df_coordi.combine_first(add_coordis).reset_index(), on='역사명')

In [None]:
station_final.head()

In [None]:
station_geo = gpd.GeoDataFrame(station_final.assign(
    geometry = lambda df:df.apply(lambda row: Point((row.x, row.y)), axis=1),
    within = lambda df: df.geometry.apply(lambda x: shapely.within(x, total_geo))
    ).query('within'))

In [None]:
line_list = sorted(station_geo['노선명'].unique())
colors = sns.color_palette('tab20')[:len(line_list)]

In [None]:
fig, ax = plt.subplots()
seoul_geo.plot(ax=ax, color='lightgray')
gyeonggi_geo.plot(ax = ax, color='lightgray')
incheon_geo.plot(ax = ax, color='lightgray')
processing_geo.plot(ax=ax, color='r')
for t, c in zip(line_list, colors):
    station_geo.query('노선명==@t').plot(ax=ax, color=c, alpha = 0.4)

In [None]:
cross_tab= processing_geo.filter(regex='번호|geometry').merge(station_geo.filter(regex='역사명|geometry'),how='cross')\
.assign(distance = lambda df:df.apply(lambda row: shapely.distance(row.geometry_x, row.geometry_y), axis=1)).reset_index(drop=True)

In [None]:
cross_tab.loc[cross_tab.groupby('번호').distance.idxmin(),['번호', '역사명', 'distance']]

In [None]:
processing_geo.merge(cross_tab.loc[cross_tab.groupby('번호').distance.idxmin(),['번호', '역사명', 'distance']], on='번호').head()

# 이상 케이스 체크

In [None]:
# 이상케이스 체크
cross_tab.query('번호==51').query('distance<3000').drop(columns = 'geometry_x').rename(columns = {'geometry_x':'geometry'})

In [None]:
fig, ax = plt.subplots()
seoul_geo.query('ADM_SECT_C=="11500"').plot(ax=ax, color='lightgray')
processing_geo.query('번호==51').plot(ax=ax, color='r')
# gpd.GeoSeries({'geometry': Point(find_coordinates('화곡동 1089-54', 'parcel'))}).plot(ax=ax)
# train_geo.query('역사명=="화곡"').plot(ax=ax, color='g')
cross_tab.query('번호==51 & distance<3000').drop(columns = 'geometry_x').rename(columns = {'geometry_y':'geometry'}).plot(ax = ax, color='r')
cross_tab.query('번호==51& distance<3000').drop(columns = 'geometry_x').rename(columns = {'geometry_y':'geometry'}).apply(lambda row: ax.annotate(text=row['역사명'], xy=row.geometry.centroid.coords[0], ha='center'), axis=1)
plt.show()