* 전철 목록
    * https://ko.wikipedia.org/wiki/%EC%88%98%EB%8F%84%EA%B6%8C_%EC%A0%84%EC%B2%A0%EC%97%AD_%EB%AA%A9%EB%A1%9D
* 버스 정류장 데이터
    * https://data.seoul.go.kr/dataList/OA-15067/S/1/datasetView.do?utm_source=chatgpt.com
* 자치구별 고령인구 데이터
    * https://data.seoul.go.kr/dataList/10821/S/2/datasetView.do

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rc
import warnings
import os
import re
import time

if os.name == 'nt':
    print("This is a Windows environment.")
    rc('font', family='Malgun Gothic')
elif os.name == 'posix':
    print("This is a Mac or Linux environment.")
    rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False

warnings.filterwarnings("ignore", category=UserWarning)

This is a Mac or Linux environment.


In [2]:
driver = webdriver.Chrome()
base_url = 'https://ko.wikipedia.org/wiki/%EC%88%98%EB%8F%84%EA%B6%8C_%EC%A0%84%EC%B2%A0%EC%97%AD_%EB%AA%A9%EB%A1%9D'

driver.get(base_url)

In [3]:
station_list = []

for i in range(4, 33, 2):
    results = driver.find_elements(By.CSS_SELECTOR, f'#mw-content-text > div.mw-content-ltr.mw-parser-output > table:nth-child({i}) > tbody > tr')
    
    for result in results:
        text = result.text
        if re.search(r'[a-zA-Z]', text) == None:
            pass
        else:
            station_name = text.split()[0]
            station_list.append(station_name)
    time.sleep(2)

index = station_list.index("신촌(2)")
station_list[index] = '신촌'
station_list.remove("신촌(경)")
index = station_list.index("양평(중)")
station_list[index] = '양평'
station_list.remove("양평(5)")
index = station_list.index("운동장·송담대")
station_list[index] = '용인중앙시장'
        
print(len(station_list))

649


In [4]:
station_list[:5]

['가능', '가락시장', '가산디지털단지', '가양', '가오리']

In [5]:
import requests
import json

def convert_name(name):
    
    path = '/v2/local/search/keyword.json'
    api_host = 'https://dapi.kakao.com'
    api_key = '275fa9c9b83625900bee1dc000fd7dcb'
    url = api_host+path
    
    header = {
        "Authorization" : f'KakaoAK {api_key}'
    }
    
    body = {
    "format" : "json",
    'query' : name,
    }

    try:
        response = requests.get(url=url, params=body, headers=header, timeout=10)  # 10초 타임아웃
        response.raise_for_status()  # HTTP 오류가 발생하면 예외 발생
    except requests.exceptions.Timeout:
        print("Request timed out")
        return {}
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return {}

    parsed_data = json.loads(response.text)

    return parsed_data

In [6]:
addr_list = []
name_list = []
x_list = []
y_list = []

for station in station_list:
    data = convert_name(station + "역")
    count = data['meta']['total_count']
    documents = data['documents']
    for doc in documents:
        if doc['category_group_code'] == 'SW8':
            address = doc['road_address_name']
            name = doc['place_name']
            x = doc['x']
            y = doc['y']
            addr_list.append(address)
            name_list.append(name)
            x_list.append(x)
            y_list.append(y)
            break
        elif '기차역' in doc['category_name']:
            address = doc['address_name']
            name = doc['place_name']
            x = doc['x']
            y = doc['y']
            addr_list.append(address)
            name_list.append(name)
            x_list.append(x)
            y_list.append(y)
            break

print(len(addr_list), len(name_list), len(x_list), len(y_list))

649 649 649 649


In [7]:
raw_subway = pd.DataFrame(data={
    "역이름" : name_list,
    '주소' : addr_list,
    '위도' : x_list,
    '경도' : y_list
})

In [8]:
raw_subway.head()

Unnamed: 0,역이름,주소,위도,경도
0,가능역 1호선,경기 의정부시 평화로 633,127.044288264841,37.7483963196456
1,가락시장역 8호선,서울 송파구 송파대로 지하 257,127.118262745146,37.4930992522183
2,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,126.882661758356,37.4803959660982
3,가양역 9호선,서울 강서구 양천로 지하 485,126.85442142615852,37.56143311719883
4,가오리역 우이신설선,서울 강북구 삼양로 426,127.01676896839822,37.6415440572233


In [9]:
subway = raw_subway[raw_subway['주소'].str.contains("서울")]
subway.reset_index(drop=True, inplace=True)
subway

Unnamed: 0,역이름,주소,위도,경도
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,127.118262745146,37.4930992522183
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,126.882661758356,37.4803959660982
2,가양역 9호선,서울 강서구 양천로 지하 485,126.85442142615852,37.56143311719883
3,가오리역 우이신설선,서울 강북구 삼양로 426,127.01676896839822,37.6415440572233
4,가좌역,서울 서대문구 남가좌동 296-10,126.914824171209,37.5687541714336
...,...,...,...,...
301,회기역 1호선,서울 동대문구 회기로 196,127.058048369273,37.5897962196601
302,회현역 4호선,서울 중구 퇴계로 지하 54,126.9784372569283,37.55876114587941
303,효창공원앞역 6호선,서울 용산구 백범로 지하 287,126.96139810075,37.5393087503306
304,흑석역 9호선,서울 동작구 현충로 지하 90,126.963463063008,37.5091654719404


In [11]:
raw_bus = pd.read_csv('../data/서울시 버스정류소 위치정보.csv', encoding="cp949")
raw_bus.head()

Unnamed: 0,노드 ID,정류소번호,정류소명,X좌표,Y좌표,정류소 타입
0,100000001,1001,종로2가사거리,126.987752,37.569808,중앙차로
1,100000002,1002,창경궁.서울대학교병원,126.996522,37.579433,중앙차로
2,100000003,1003,명륜3가.성대입구,126.998251,37.582581,중앙차로
3,100000004,1004,종로2가.삼일교,126.987613,37.568579,중앙차로
4,100000005,1005,혜화동로터리.여운형활동터,127.001744,37.586243,중앙차로


In [12]:
raw_bus.shape

(11290, 6)

In [13]:
bus = raw_bus[['정류소번호', '정류소명', "X좌표", "Y좌표"]]
bus.shape

(11290, 4)

In [14]:
bus.head()

Unnamed: 0,정류소번호,정류소명,X좌표,Y좌표
0,1001,종로2가사거리,126.987752,37.569808
1,1002,창경궁.서울대학교병원,126.996522,37.579433
2,1003,명륜3가.성대입구,126.998251,37.582581
3,1004,종로2가.삼일교,126.987613,37.568579
4,1005,혜화동로터리.여운형활동터,127.001744,37.586243


In [15]:
subway.head()

Unnamed: 0,역이름,주소,위도,경도
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,127.118262745146,37.4930992522183
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,126.882661758356,37.4803959660982
2,가양역 9호선,서울 강서구 양천로 지하 485,126.85442142615852,37.56143311719883
3,가오리역 우이신설선,서울 강북구 삼양로 426,127.01676896839822,37.6415440572233
4,가좌역,서울 서대문구 남가좌동 296-10,126.914824171209,37.5687541714336


In [29]:
gdf_subway = gpd.GeoDataFrame(subway[['역이름', '주소']], 
                 geometry=[Point(val['위도'], val['경도']) for _, val in subway.iterrows()],
                 crs="EPSG:4326"
)
gdf_subway.head()

Unnamed: 0,역이름,주소,geometry
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,POINT (127.11826 37.4931)
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,POINT (126.88266 37.4804)
2,가양역 9호선,서울 강서구 양천로 지하 485,POINT (126.85442 37.56143)
3,가오리역 우이신설선,서울 강북구 삼양로 426,POINT (127.01677 37.64154)
4,가좌역,서울 서대문구 남가좌동 296-10,POINT (126.91482 37.56875)


In [30]:
bus.head()

Unnamed: 0,정류소번호,정류소명,X좌표,Y좌표
0,1001,종로2가사거리,126.987752,37.569808
1,1002,창경궁.서울대학교병원,126.996522,37.579433
2,1003,명륜3가.성대입구,126.998251,37.582581
3,1004,종로2가.삼일교,126.987613,37.568579
4,1005,혜화동로터리.여운형활동터,127.001744,37.586243


In [31]:
gdf_bus = gpd.GeoDataFrame(bus[['정류소번호', '정류소명']], 
                 geometry=[Point(val['X좌표'], val['Y좌표']) for _, val in bus.iterrows()],
                 crs="EPSG:4326"
)
gdf_bus.head()

Unnamed: 0,정류소번호,정류소명,geometry
0,1001,종로2가사거리,POINT (126.98775 37.56981)
1,1002,창경궁.서울대학교병원,POINT (126.99652 37.57943)
2,1003,명륜3가.성대입구,POINT (126.99825 37.58258)
3,1004,종로2가.삼일교,POINT (126.98761 37.56858)
4,1005,혜화동로터리.여운형활동터,POINT (127.00174 37.58624)


In [34]:
gdf_subway['buffer'] = gdf_subway['geometry'].to_crs(epsg=3857).buffer(1000)
gdf_subway.head()

Unnamed: 0,역이름,주소,geometry,buffer
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,POINT (127.11826 37.4931),"POLYGON ((14151740.279 4508063.158, 14151735.4..."
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,POINT (126.88266 37.4804),"POLYGON ((14125513.297 4506281.011, 14125508.4..."
2,가양역 9호선,서울 강서구 양천로 지하 485,POINT (126.85442 37.56143),"POLYGON ((14122369.598 4517654.94, 14122364.78..."
3,가오리역 우이신설선,서울 강북구 삼양로 426,POINT (127.01677 37.64154),"POLYGON ((14140442.044 4528911.037, 14140437.2..."
4,가좌역,서울 서대문구 남가좌동 296-10,POINT (126.91482 37.56875),"POLYGON ((14129093.601 4518683.091, 14129088.7..."


In [35]:
gdf_bus['geometry'] = gdf_bus['geometry'].to_crs(epsg=3857)
gdf_bus.head()

Unnamed: 0,정류소번호,정류소명,geometry
0,1001,종로2가사거리,POINT (14136211.89 4518831.097)
1,1002,창경궁.서울대학교병원,POINT (14137188.162 4520182.982)
2,1003,명륜3가.성대입구,POINT (14137380.633 4520625.175)
3,1004,종로2가.삼일교,POINT (14136196.427 4518658.528)
4,1005,혜화동로터리.여운형활동터,POINT (14137769.472 4521139.591)


In [36]:
gdf_subway

Unnamed: 0,역이름,주소,geometry,buffer
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,POINT (127.11826 37.4931),"POLYGON ((14151740.279 4508063.158, 14151735.4..."
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,POINT (126.88266 37.4804),"POLYGON ((14125513.297 4506281.011, 14125508.4..."
2,가양역 9호선,서울 강서구 양천로 지하 485,POINT (126.85442 37.56143),"POLYGON ((14122369.598 4517654.94, 14122364.78..."
3,가오리역 우이신설선,서울 강북구 삼양로 426,POINT (127.01677 37.64154),"POLYGON ((14140442.044 4528911.037, 14140437.2..."
4,가좌역,서울 서대문구 남가좌동 296-10,POINT (126.91482 37.56875),"POLYGON ((14129093.601 4518683.091, 14129088.7..."
...,...,...,...,...
301,회기역 1호선,서울 동대문구 회기로 196,POINT (127.05805 37.5898),"POLYGON ((14145037.246 4521638.75, 14145032.43..."
302,회현역 4호선,서울 중구 퇴계로 지하 54,POINT (126.97844 37.55876),"POLYGON ((14136174.977 4517279.719, 14136170.1..."
303,효창공원앞역 6호선,서울 용산구 백범로 지하 287,POINT (126.9614 37.53931),"POLYGON ((14134278.187 4514548.459, 14134273.3..."
304,흑석역 9호선,서울 동작구 현충로 지하 90,POINT (126.96346 37.50917),"POLYGON ((14134508.058 4510317.527, 14134503.2..."


In [37]:
result = []

for idx, station in gdf_subway.iterrows():
    station_name = station['역이름']
    buffer = station['buffer']
    within_buffer = gdf_bus[gdf_bus.geometry.within(buffer)]
    
    result.append(len(within_buffer))

In [39]:
gdf_subway['반경내정류장개수'] = result
gdf_subway.head()

Unnamed: 0,역이름,주소,geometry,buffer,반경내정류장개수
0,가락시장역 8호선,서울 송파구 송파대로 지하 257,POINT (127.11826 37.4931),"POLYGON ((14151740.279 4508063.158, 14151735.4...",34
1,가산디지털단지역 7호선,서울 금천구 벚꽃로 309,POINT (126.88266 37.4804),"POLYGON ((14125513.297 4506281.011, 14125508.4...",70
2,가양역 9호선,서울 강서구 양천로 지하 485,POINT (126.85442 37.56143),"POLYGON ((14122369.598 4517654.94, 14122364.78...",65
3,가오리역 우이신설선,서울 강북구 삼양로 426,POINT (127.01677 37.64154),"POLYGON ((14140442.044 4528911.037, 14140437.2...",60
4,가좌역,서울 서대문구 남가좌동 296-10,POINT (126.91482 37.56875),"POLYGON ((14129093.601 4518683.091, 14129088.7...",61


In [41]:
gu_list = []
for i, v in gdf_subway.iterrows():
    addr = v['주소']
    gu = addr.split()[1]
    gu_list.append(gu)

In [42]:
gdf_subway['구별'] = gu_list

In [45]:
bus_stop = gdf_subway.groupby(["구별"])['반경내정류장개수'].sum().reset_index()

In [46]:
bus_stop.head()

Unnamed: 0,구별,반경내정류장개수
0,강남구,993
1,강동구,590
2,강북구,631
3,강서구,777
4,관악구,435


In [48]:
raw_pop = pd.read_excel('../data/자치구별+고령인구(추계인구)_20241019165325.xlsx')
raw_pop.head()

Unnamed: 0,자치구별(1),자치구별(2),2023,2023.1,2023.2
0,자치구별(1),자치구별(2),고령인구,고령인구,고령인구
1,자치구별(1),자치구별(2),소계,남자,여자
2,합계,소계,1691853,749118,942735
3,,종로구,27187,11986,15201
4,,중구,24374,10637,13737


In [49]:
pop = raw_pop.drop(index=[0, 1, 2], columns=['자치구별(1)'])
pop.columns = ["구별", "노령인구", "노령인구_남자", "노령인구_여자"]
pop.reset_index(drop=True, inplace=True)

In [50]:
pop.head()

Unnamed: 0,구별,노령인구,노령인구_남자,노령인구_여자
0,종로구,27187,11986,15201
1,중구,24374,10637,13737
2,용산구,37740,16395,21345
3,성동구,47907,21034,26873
4,광진구,55697,24952,30745


In [51]:
bus_stop = bus_stop.merge(right=pop, right_on="구별", left_on="구별")
bus_stop.drop(columns=['노령인구_남자', '노령인구_여자'], inplace=True)
bus_stop.head()

Unnamed: 0,구별,반경내정류장개수,노령인구
0,강남구,993,81561
1,강동구,590,80131
2,강북구,631,65980
3,강서구,777,98380
4,관악구,435,84596


In [53]:
bus_stop['버스정류장개수(10K)'] = bus_stop['반경내정류장개수'] / bus_stop['노령인구'] * 10000
bus_stop.head()

Unnamed: 0,구별,반경내정류장개수,노령인구,버스정류장개수(10K)
0,강남구,993,81561,121.749366
1,강동구,590,80131,73.629432
2,강북구,631,65980,95.635041
3,강서구,777,98380,78.979467
4,관악구,435,84596,51.420871


In [55]:
bus_stop.to_csv('../data/output/traffic_f.csv', index=False)