In [4]:
import pandas as pd
import numpy as np
import selenium
import sqlalchemy
from urllib import parse
import requests
from bs4 import BeautifulSoup as BS
import os
import json
from tqdm import tqdm
import pickle

In [5]:
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}

def ev_cs_get_by_metro_code(result_file_name:str, API_KEY:str, metro_code:str, city_code=None)->bool:
    ev_api_url = "https://bigdata.kepco.co.kr/openapi/v1/EVcharge.do?{}{}apiKey={}&returnType=json"
    metroCd = f"metroCd={metro_code}&"
    cityCd = "" if city_code==None else f"cityCd={city_code}&"
    try:
        req = requests.get(ev_api_url.format(metroCd, cityCd, API_KEY))
        soup = BS(req.text)
        data = json.loads(soup.text)
        result_path = "./results"
        if not os.path.isdir(result_path):
            os.mkdir(result_path)
        with open(f"{result_file_name}", "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False)
            
    except Exception as e:
        print(f"[debug] > exception : {e}")
        return False
    return True
    

In [6]:
metro_codes =  {
    "서울특별시" : "11",
    "부산광역시" : "21",
    "대구광역시" : "22",
    "인천광역시" : "23",
    "광주광역시" : "24",
    "대전광역시" : "25",
    "울산광역시" : "26",
    "경기도" : "31",
    "강원도" : "32",
    "충청북도" : "33",
    "충청남도" : "34",
    "전라북도" : "35",
    "전라남도" : "36",
    "경상북도" : "37",
    "경상남도" : "38",
    "제주특별자치도" : "39"
}

In [7]:
with open("api_key.json", "r") as f:
    key_json = json.load(f)
API_KEY = key_json['API_KEY']

In [8]:

result_file_name_template = "ev_cs_{}.json"

result_path = "./results"
if not os.path.isdir(result_path):
    os.mkdir(result_path)
for key in metro_codes.keys():
    assert ev_cs_get_by_metro_code(f"{result_path}/"+result_file_name_template.format(key), API_KEY, metro_code=metro_codes[key]), print(f"[debug] > {key} extraction failed")
    print(f"[debug] > {key} extraction success")

[debug] > 서울특별시 extraction success
[debug] > 부산광역시 extraction success
[debug] > 대구광역시 extraction success
[debug] > 인천광역시 extraction success
[debug] > 광주광역시 extraction success
[debug] > 대전광역시 extraction success
[debug] > 울산광역시 extraction success
[debug] > 경기도 extraction success
[debug] > 강원도 extraction success
[debug] > 충청북도 extraction success
[debug] > 충청남도 extraction success
[debug] > 전라북도 extraction success
[debug] > 전라남도 extraction success
[debug] > 경상북도 extraction success
[debug] > 경상남도 extraction success
[debug] > 제주특별자치도 extraction success


In [9]:
with open("db_info.json", "r") as f:
    db_info = json.load(f)
user = db_info['user']
password = db_info['password']
host = db_info['host']
port = db_info['port']
database = db_info['database']
password = parse.quote_plus(password)
engine = sqlalchemy.create_engine(f"mysql://{user}:{password}@{host}:{port}/{database}?charset=utf8mb4")

In [10]:

listdata = []
for key in tqdm(metro_codes.keys()):
    filename = result_file_name_template.format(key)
    with open(f"{result_path}/{filename}", "r") as f:
        file = json.load(f)
    for info in file["data"]:
        listdata.append(info)

  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:00<00:00, 1298.09it/s]


In [11]:
data = pd.DataFrame(listdata)
with open("./results/EV_CS_DATA.pkl", "wb") as f:
    pickle.dump(data, f)

In [25]:
data.rename(columns={"rapidCnt": "rapidChargerCount", "slowCnt":"slowChargerCount"}, inplace=True)
data.head(5)

Unnamed: 0,metro,city,stnPlace,stnAddr,rapidChargerCount,slowChargerCount,carType
0,서울특별시,강남구,LH강남힐스테이트,서울특별시 강남구 자곡동 자곡로3길 21,1,4,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
1,서울특별시,강남구,LH서울지사,서울특별시 강남구 선릉로 121길 12,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
2,서울특별시,강남구,가람아파트,서울특별시 강남구 일원동 일원로 127,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
3,서울특별시,강남구,강남구청 공영주차장,서울특별시 강남구 삼성동 16-1,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
4,서울특별시,강남구,강남데시앙파크,서울특별시 강남구 세곡동 헌릉로590길 63,1,1,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."


In [43]:
testdf = data.head(50)

In [49]:
row_num = 10
testaddr = testdf.loc[row_num, 'stnAddr']

In [None]:
with open("naver_geocoding_api.json", "r") as f:
    naver_api_keys = json.load(f)

def geocoding(address:str):
    try:
        with requests.Session() as nsession:
            nv_gc_api_url_template = "https://naveropenapi.apigw.ntruss.com/map-geocode/v2/geocode{}"
            nv_gc_api_headers = {
                "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
                "X-NCP-APIGW-API-KEY-ID" : naver_api_keys['clientID'],
                "X-NCP-APIGW-API-KEY" : naver_api_keys['clientSecret']
            }
            nv_gc_req = nsession.get(nv_gc_api_url_template)
        lat_lng = [geo.latitude, geo.longitude]
        return lat_lng
    except Exception as e:
        print(f"[debug] > Exception: {e}")
        return [0,0]

In [48]:


import time
for idx, row in tqdm(testdf.iterrows()):
    addr = f"{row.loc['stnAddr'].strip()} {row.loc['stnPlace']}"
    lat_lng = geocoding(addr)
    print(f"{addr} : [{lat_lng[0]}, {lat_lng[1]}]")
    time.sleep(1)
    break

0it [00:00, ?it/s]

[debug] > Exception: 'NoneType' object has no attribute 'latitude'
서울특별시 강남구 자곡동 자곡로3길 21 LH강남힐스테이트 : [0, 0]





In [33]:
len(data['stnAddr'])-len(data['stnAddr'].drop_duplicates())

124

In [13]:
carTypeKeys = data.carType.value_counts().keys().tolist()
carTypeList = []
for types in carTypeKeys:
    car_type_list = types.split(",")
    carTypeList += list(set(car_type_list) - set(carTypeList))
carTypeList.remove('')

In [14]:
def get_restarea_info()->pd.DataFrame:
    with requests.Session() as session:
        restarea_api_url_template = "https://data.ex.co.kr/openapi/restinfo/hiwaySvarInfoList?key={}&type={}"
        restarea_api_key = key_json['RESTAREA_API_KEY']
        restarea_api_type = "json"
        restarea_req = session.get(restarea_api_url_template.format(restarea_api_key, restarea_api_type))
        with open("./results/rest_areas.json", "w", encoding='utf-8') as f:
            json.dump(restarea_req.json(), f, ensure_ascii=False)
    return pd.DataFrame(restarea_req.json()['list']) 

In [15]:
with open("./results/rest_areas.json", "r", encoding='utf-8') as f:
    restarea_json = json.load(f)
restarea_df = pd.DataFrame(restarea_json['list'])

In [16]:
restarea_df.head(3)

Unnamed: 0,svarAddr,routeCd,routeNm,svarCd,svarNm,hdqrCd,hdqrNm,mtnofCd,mtnofNm,svarGsstClssCd,svarGsstClssNm,gudClssCd,gudClssNm,pstnoCd,cocrPrkgTrcn,fscarPrkgTrcn,dspnPrkgTrcn,bsopAdtnlFcltCd,rprsTelNo
0,경북 경주시 건천읍 경부고속도로 77,10,경부선,485,건천(부산)주유소,600000,대구경북본부,610200,대구,1,주유소,1,하행,780900,0,0,0,B00200,547514600
1,경북 경주시 건천읍 방내리 14,10,경부선,54,건천(부산)휴게소,600000,대구경북본부,610200,대구,0,휴게소,1,하행,780900,73,43,4,A00078,547516890
2,경북 경주시 건천읍 방내리 28-1,10,경부선,53,건천(서울)주유소,600000,대구경북본부,610200,대구,1,주유소,0,상행,780900,0,0,0,B00053,547518256


In [17]:
restarea_df.pstnoCd.head(20)

0     780900
1     780900
2     780900
3     780900
4     702801
5     702800
6     712830
7     712830
8     780851
9     38196 
10    373830
11    373830
12    449900
13    17100 
14    449902
15    740880
16    740882
17    740882
18    740882
19    31053 
Name: pstnoCd, dtype: object

In [18]:
from geopy.geocoders import Nominatim
geo_local = Nominatim(user_agent='South Korea')

def geocoding(address:str):
    try:
        geo = geo_local.geocode(address)
        lat_lng = [geo.latitude, geo.longitude]
        return lat_lng
    except:
        return [0,0]

In [19]:
row_num = 25
testpost = restarea_df.loc[row_num, 'pstnoCd']
testaddr = restarea_df.loc[row_num, 'svarAddr']

In [22]:
restarea_df.shape

(573, 19)

In [20]:
test_df = restarea_df.head(50)

14.469696969696969

In [21]:
test_df.svarAddr.apply(geocoding)

0     [35.8399871, 129.0885073]
1                        [0, 0]
2                        [0, 0]
3                        [0, 0]
4                        [0, 0]
5                        [0, 0]
6                        [0, 0]
7                        [0, 0]
8                        [0, 0]
9                        [0, 0]
10                       [0, 0]
11                       [0, 0]
12                       [0, 0]
13                       [0, 0]
14                       [0, 0]
15                       [0, 0]
16                       [0, 0]
17                       [0, 0]
18                       [0, 0]
19                       [0, 0]
20                       [0, 0]
21                       [0, 0]
22                       [0, 0]
23        [37.44499, 127.04933]
24        [37.44499, 127.04933]
25                       [0, 0]
26                       [0, 0]
27                       [0, 0]
28                       [0, 0]
29                       [0, 0]
30    [36.4238767, 127.4242613]
31    [3

In [143]:
lat, lng = geocoding(testpost)
print(f"{testpost} : {lat}, {lng}")

463470 : 0, 0


In [102]:
restarea_df.columns.tolist()

['routeCd',
 'svarAddr',
 'routeNm',
 'hdqrNm',
 'mtnofNm',
 'svarCd',
 'svarNm',
 'hdqrCd',
 'mtnofCd',
 'svarGsstClssCd',
 'svarGsstClssNm',
 'gudClssCd',
 'gudClssNm',
 'pstnoCd',
 'cocrPrkgTrcn',
 'fscarPrkgTrcn',
 'dspnPrkgTrcn',
 'bsopAdtnlFcltCd',
 'rprsTelNo']

In [107]:
unitCodes = restarea_df.svarCd.tolist()

In [114]:
# rest area location infos

# API keeps getting status code 500: back-end server error
test_unitCode = unitCodes[5]

restarea_loc_api_url_template = "https://www.bigdata-transportation.kr/api?apiKey={}{}{}"
restarea_loc_api_key = key_json['RESTAREA_LOC_API_KEY']
restarea_loc_api_productId = f"&productId={key_json['RESTAREA_LOC_API_PRODUCT_ID']}"
restarea_loc_api_unitCode = f"&unitCode={test_unitCode}"
with requests.Session() as session2:
    restarea_loc_req = session2.get(restarea_loc_api_url_template.format(restarea_loc_api_key, restarea_loc_api_productId, restarea_loc_api_unitCode))
    assert restarea_loc_req.status_code==200, print("[debug] > connection error")
    try:
        restarea_loc_api_json = restarea_loc_req.json()
        assert restarea_loc_api_json['resultType']=="200", print(f"[debug] > result error: {restarea_loc_api_json['msg']}")
    except Exception as e:
        print(f"[debug] > Exception: {e}")
    finally:
        with open("./results/rest_area_locs.json", "w", encoding='utf-8') as f:
            json.dump(restarea_loc_api_json, f, ensure_ascii=False)

In [95]:
restarea_df.shape

(573, 19)