In [1]:
# visualization
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
fe = fm.FontEntry(
    fname=r'/usr/share/fonts/truetype/nanum/NanumGothic.ttf', # ttf 파일이 저장되어 있는 경로
    name='NanumBarunGothic')                        # 이 폰트의 원하는 이름 설정
fm.fontManager.ttflist.insert(0, fe)              # Matplotlib에 폰트 추가
plt.rcParams.update({'font.size': 10, 'font.family': 'NanumBarunGothic'}) # 폰트 설정
plt.rc('font', family='NanumBarunGothic')
import seaborn as sns

# utils
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
import warnings;warnings.filterwarnings('ignore')

# Model
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

import eli5
from eli5.sklearn import PermutationImportance
from sklearn.preprocessing import MinMaxScaler

In [2]:
bus_path = '/data/ephemeral/home/data/bus_feature.csv'
subway_path = '/data/ephemeral/home/data/subway_feature.csv'

In [3]:
bus = pd.read_csv(bus_path)
subway = pd.read_csv(subway_path)


In [4]:
bus.head(5)

Unnamed: 0,노드 ID,정류소번호,정류소명,X좌표,Y좌표,정류소 타입
0,100000001,1001,종로2가사거리,126.987752,37.569808,중앙차로
1,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183,중앙차로
2,100000003,1003,명륜3가.성대입구,126.998251,37.582581,중앙차로
3,100000004,1004,종로2가.삼일교,126.987613,37.568579,중앙차로
4,100000005,1005,혜화동로터리.여운형활동터,127.001744,37.586243,중앙차로


In [5]:
subway.head(5)

Unnamed: 0,역사_ID,역사명,호선,위도,경도
0,9996,미사,5호선,37.560927,127.193877
1,9995,강일,5호선,37.55749,127.17593
2,4929,김포공항,김포골드라인,37.56236,126.801868
3,4928,고촌,김포골드라인,37.601243,126.770345
4,4927,풍무,김포골드라인,37.612488,126.732387


In [6]:
bus.shape, subway.shape

((12584, 6), (768, 5))

In [13]:
api_key = '971ef584c17f920176f1b00084b63111'

In [30]:
import requests

def get_district_from_coordinates(x, y, api_key):
    url = "https://dapi.kakao.com/v2/local/geo/coord2regioncode.json"
    headers = {"Authorization": f"KakaoAK {api_key}"}
    params = {"x": x, "y": y}

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        result = response.json()
        print(result)
        if result['documents']:
            for document in result['documents']:
                if document['region_type'] == 'H':
                    return [document['region_1depth_name'], document['region_2depth_name'], document['region_3depth_name']]
    return None

In [31]:
subway['위도'][0]

37.560927

In [34]:
result1 = []
result2 = []
result3 = []
for i in range(10):
    r1, r2, r3 = get_district_from_coordinates(subway['경도'][i], subway['위도'][i], api_key=api_key)
    result1.append(r1)
    result2.append(r2)
    result3.append(r3)
    
print(result1)
print(result2)
print(result3)

{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '4145010900', 'address_name': '경기도 하남시 망월동', 'region_1depth_name': '경기도', 'region_2depth_name': '하남시', 'region_3depth_name': '망월동', 'region_4depth_name': '', 'x': 127.1894750978691, 'y': 37.56809946143883}, {'region_type': 'H', 'code': '4145061000', 'address_name': '경기도 하남시 미사1동', 'region_1depth_name': '경기도', 'region_2depth_name': '하남시', 'region_3depth_name': '미사1동', 'region_4depth_name': '', 'x': 127.18880799273204, 'y': 37.56031593309407}]}
{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '1174011000', 'address_name': '서울특별시 강동구 강일동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '강동구', 'region_3depth_name': '강일동', 'region_4depth_name': '', 'x': 127.17391966164308, 'y': 37.564944692060216}, {'region_type': 'H', 'code': '1174052600', 'address_name': '서울특별시 강동구 상일2동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '강동구', 'region_3depth_name': '상일2동', 'region_4depth_name': '', 'x'

In [35]:
subway[['시도', '구', '동']] = subway.apply(
    lambda row: pd.Series(get_district_from_coordinates(row['경도'], row['위도'], api_key)), 
    axis=1
)

{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '4145010900', 'address_name': '경기도 하남시 망월동', 'region_1depth_name': '경기도', 'region_2depth_name': '하남시', 'region_3depth_name': '망월동', 'region_4depth_name': '', 'x': 127.1894750978691, 'y': 37.56809946143883}, {'region_type': 'H', 'code': '4145061000', 'address_name': '경기도 하남시 미사1동', 'region_1depth_name': '경기도', 'region_2depth_name': '하남시', 'region_3depth_name': '미사1동', 'region_4depth_name': '', 'x': 127.18880799273204, 'y': 37.56031593309407}]}
{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '1174011000', 'address_name': '서울특별시 강동구 강일동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '강동구', 'region_3depth_name': '강일동', 'region_4depth_name': '', 'x': 127.17391966164308, 'y': 37.564944692060216}, {'region_type': 'H', 'code': '1174052600', 'address_name': '서울특별시 강동구 상일2동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '강동구', 'region_3depth_name': '상일2동', 'region_4depth_name': '', 'x'

In [37]:
subway.sample(10)

Unnamed: 0,역사_ID,역사명,호선,위도,경도,시도,구,동
251,2742,장승배기,7호선,37.504898,126.93915,서울특별시,동작구,상도2동
659,420,혜화,4호선,37.582336,127.001844,서울특별시,종로구,혜화동
685,331,남부터미널(예술의전당),3호선,37.485013,127.016189,서울특별시,서초구,서초3동
443,1855,서현,분당선,37.385126,127.123592,경기도,성남시 분당구,서현1동
244,2749,철산,7호선,37.47605,126.867911,경기도,광명시,철산3동
465,1808,동암,경인선,37.471408,126.702896,인천광역시,부평구,십정2동
54,4514,보평,에버라인선,37.258965,127.218457,경기도,용인시 처인구,유림동
239,2754,부천종합운동장,7호선,37.50538,126.797337,경기도,부천시 원미구,춘의동
693,323,약수,3호선,37.554867,127.010541,서울특별시,중구,다산동
66,4501,기흥,에버라인선,37.275449,127.116665,경기도,용인시 기흥구,구갈동


In [39]:
bus[['시도', '구', '동']] = bus.apply(
    lambda row: pd.Series(get_district_from_coordinates(row['X좌표'], row['Y좌표'], api_key)), 
    axis=1
)

{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '1111013800', 'address_name': '서울특별시 종로구 종로2가', 'region_1depth_name': '서울특별시', 'region_2depth_name': '종로구', 'region_3depth_name': '종로2가', 'region_4depth_name': '', 'x': 126.98895203098539, 'y': 37.570680075003054}, {'region_type': 'H', 'code': '1111061500', 'address_name': '서울특별시 종로구 종로1.2.3.4가동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '종로구', 'region_3depth_name': '종로1.2.3.4가동', 'region_4depth_name': '', 'x': 126.99028726084354, 'y': 37.574435539666744}]}
{'meta': {'total_count': 2}, 'documents': [{'region_type': 'B', 'code': '1111016600', 'address_name': '서울특별시 종로구 연건동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '종로구', 'region_3depth_name': '연건동', 'region_4depth_name': '', 'x': 126.99966943116526, 'y': 37.578674216494214}, {'region_type': 'H', 'code': '1111061500', 'address_name': '서울특별시 종로구 종로1.2.3.4가동', 'region_1depth_name': '서울특별시', 'region_2depth_name': '종로구', 'region_3depth_name': '종로

In [40]:
bus.sample(10)

Unnamed: 0,노드 ID,정류소번호,정류소명,X좌표,Y좌표,정류소 타입,시도,구,동
3196,107900322,8845,중앙하이츠빌아파트,127.010595,37.602535,마을버스,서울특별시,성북구,정릉2동
12460,124900005,25550,성내주유소,127.122264,37.532709,마을버스,서울특별시,강동구,성내1동
2157,105000558,6311,한천로입구,127.067432,37.599049,일반차로,서울특별시,동대문구,이문1동
863,102000155,3249,용산전자상가입구,126.962222,37.53457,일반차로,서울특별시,용산구,원효로1동
10794,121000965,22641,서초IC(가상),127.026605,37.482359,가상정류장,서울특별시,서초구,서초2동
7475,115000641,16705,마곡엠밸리4단지,126.824981,37.568932,일반차로,서울특별시,강서구,가양1동
8633,117900178,18967,금천폭포공원,126.903705,37.447212,마을버스,서울특별시,금천구,시흥3동
1124,103000024,4123,마장동금호아파트,127.041859,37.563071,일반차로,서울특별시,성동구,마장동
12525,124900048,25675,브라운스톤,127.130148,37.548872,마을버스,서울특별시,강동구,천호2동
688,101000270,2693,남산밀레니엄힐튼호텔,126.975975,37.555467,일반차로,서울특별시,중구,회현동


In [42]:
bus.to_csv('/data/ephemeral/home/code/code/data/bus.csv', index = False)
subway.to_csv('/data/ephemeral/home/code/code/data/subway.csv', index = False)

In [43]:
bus = pd.read_csv('/data/ephemeral/home/code/code/data/bus.csv')
subway = pd.read_csv('/data/ephemeral/home/code/code/data/subway.csv')

In [64]:
bus.head(5)

Unnamed: 0,노드 ID,정류소번호,정류소명,X좌표,Y좌표,정류소 타입,시도,구,동
0,100000001,1001,종로2가사거리,126.987752,37.569808,중앙차로,서울특별시,종로구,종로1.2.3.4가동
1,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183,중앙차로,서울특별시,종로구,종로1.2.3.4가동
2,100000003,1003,명륜3가.성대입구,126.998251,37.582581,중앙차로,서울특별시,종로구,혜화동
3,100000004,1004,종로2가.삼일교,126.987613,37.568579,중앙차로,서울특별시,종로구,종로1.2.3.4가동
4,100000005,1005,혜화동로터리.여운형활동터,127.001744,37.586243,중앙차로,서울특별시,종로구,혜화동


In [45]:
subway.head(5)

Unnamed: 0,역사_ID,역사명,호선,위도,경도,시도,구,동
0,9996,미사,5호선,37.560927,127.193877,경기도,하남시,미사1동
1,9995,강일,5호선,37.55749,127.17593,서울특별시,강동구,상일2동
2,4929,김포공항,김포골드라인,37.56236,126.801868,서울특별시,강서구,방화2동
3,4928,고촌,김포골드라인,37.601243,126.770345,경기도,김포시,고촌읍
4,4927,풍무,김포골드라인,37.612488,126.732387,경기도,김포시,사우동


In [67]:
bus_count = bus[bus['시도'] == '서울특별시'].groupby(['구']).size()

In [68]:
subway_count = subway[subway['시도'] == '서울특별시'].groupby(['구']).size()

In [70]:
print(len(bus_count), len(subway_count))

25 25


In [75]:
subway_count.index

Index(['강남구', '강동구', '강북구', '강서구', '관악구', '광진구', '구로구', '금천구', '노원구', '도봉구',
       '동대문구', '동작구', '마포구', '서대문구', '서초구', '성동구', '성북구', '송파구', '양천구', '영등포구',
       '용산구', '은평구', '종로구', '중구', '중랑구'],
      dtype='object', name='구')

In [76]:
subway_dict = {}
for i in range(len(subway_count)):
    subway_dict[subway_count.index[i]] = subway_count[i]
print(subway_dict)

{'강남구': 35, '강동구': 14, '강북구': 11, '강서구': 21, '관악구': 10, '광진구': 11, '구로구': 13, '금천구': 4, '노원구': 14, '도봉구': 8, '동대문구': 11, '동작구': 18, '마포구': 24, '서대문구': 9, '서초구': 20, '성동구': 19, '성북구': 14, '송파구': 30, '양천구': 8, '영등포구': 23, '용산구': 18, '은평구': 15, '종로구': 14, '중구': 25, '중랑구': 13}


In [77]:
bus_dict = {}
for i in range(len(subway_count)):
    bus_dict[bus_count.index[i]] = bus_count[i]
print(bus_dict)

{'강남구': 619, '강동구': 440, '강북구': 487, '강서구': 696, '관악구': 508, '광진구': 322, '구로구': 619, '금천구': 394, '노원구': 586, '도봉구': 429, '동대문구': 347, '동작구': 481, '마포구': 610, '서대문구': 527, '서초구': 726, '성동구': 514, '성북구': 655, '송파구': 519, '양천구': 407, '영등포구': 611, '용산구': 379, '은평구': 567, '종로구': 475, '중구': 247, '중랑구': 397}
