In [126]:
import pandas as pd
import numpy as np
import selenium
import sqlalchemy
from urllib import parse
import requests
from bs4 import BeautifulSoup as BS
import os
import json
from tqdm import tqdm
import pickle
from geopy.geocoders import Nominatim
import subprocess

In [8]:
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}

def ev_cs_get_by_metro_code(result_file_name:str, API_KEY:str, metro_code:str, city_code=None)->bool:
    ev_api_url = "https://bigdata.kepco.co.kr/openapi/v1/EVcharge.do?{}{}apiKey={}&returnType=json"
    metroCd = f"metroCd={metro_code}&"
    cityCd = "" if city_code==None else f"cityCd={city_code}&"
    try:
        req = requests.get(ev_api_url.format(metroCd, cityCd, API_KEY))
        soup = BS(req.text)
        data = json.loads(soup.text)
        result_path = "./results"
        if not os.path.isdir(result_path):
            os.mkdir(result_path)
        with open(f"{result_file_name}", "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False)
            
    except Exception as e:
        print(f"[debug] > exception : {e}")
        return False
    return True
    

In [9]:
metro_codes =  {
    "서울특별시" : "11",
    "부산광역시" : "21",
    "대구광역시" : "22",
    "인천광역시" : "23",
    "광주광역시" : "24",
    "대전광역시" : "25",
    "울산광역시" : "26",
    "경기도" : "31",
    "강원도" : "32",
    "충청북도" : "33",
    "충청남도" : "34",
    "전라북도" : "35",
    "전라남도" : "36",
    "경상북도" : "37",
    "경상남도" : "38",
    "제주특별자치도" : "39"
}

In [10]:
with open("api_key.json", "r") as f:
    key_json = json.load(f)

In [11]:

result_file_name_template = "ev_cs_{}.json"

API_KEY = key_json['API_KEY']

result_path = "./results"
if not os.path.isdir(result_path):
    os.mkdir(result_path)
for key in metro_codes.keys():
    assert ev_cs_get_by_metro_code(f"{result_path}/"+result_file_name_template.format(key), API_KEY, metro_code=metro_codes[key]), print(f"[debug] > {key} extraction failed")
    print(f"[debug] > {key} extraction success")

[debug] > 서울특별시 extraction success
[debug] > 부산광역시 extraction success
[debug] > 대구광역시 extraction success
[debug] > 인천광역시 extraction success
[debug] > 광주광역시 extraction success
[debug] > 대전광역시 extraction success
[debug] > 울산광역시 extraction success
[debug] > 경기도 extraction success
[debug] > 강원도 extraction success
[debug] > 충청북도 extraction success
[debug] > 충청남도 extraction success
[debug] > 전라북도 extraction success
[debug] > 전라남도 extraction success
[debug] > 경상북도 extraction success
[debug] > 경상남도 extraction success
[debug] > 제주특별자치도 extraction success


In [12]:
with open("db_info.json", "r") as f:
    db_info = json.load(f)
user = db_info['user']
password = db_info['password']
host = db_info['host']
port = db_info['port']
database = db_info['database']
password = parse.quote_plus(password)
engine = sqlalchemy.create_engine(f"mysql://{user}:{password}@{host}:{port}/{database}?charset=utf8mb4")

In [13]:

listdata = []
for key in tqdm(metro_codes.keys()):
    filename = result_file_name_template.format(key)
    with open(f"{result_path}/{filename}", "r") as f:
        file = json.load(f)
    for info in file["data"]:
        listdata.append(info)

  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:00<00:00, 1506.85it/s]


In [14]:
data = pd.DataFrame(listdata)
with open("./results/EV_CS_DATA.pkl", "wb") as f:
    pickle.dump(data, f)

In [15]:
with open("./results/EV_CS_DATA.pkl", "rb") as f:
    data = pickle.load(f)

In [16]:
data.rename(columns={"rapidCnt": "rapidChargerCount", "slowCnt":"slowChargerCount"}, inplace=True)
data.head(5)

Unnamed: 0,metro,city,stnPlace,stnAddr,rapidChargerCount,slowChargerCount,carType
0,서울특별시,강남구,LH강남힐스테이트,서울특별시 강남구 자곡동 자곡로3길 21,1,4,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
1,서울특별시,강남구,LH서울지사,서울특별시 강남구 선릉로 121길 12,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
2,서울특별시,강남구,가람아파트,서울특별시 강남구 일원동 일원로 127,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
3,서울특별시,강남구,강남구청 공영주차장,서울특별시 강남구 삼성동 16-1,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."
4,서울특별시,강남구,강남데시앙파크,서울특별시 강남구 세곡동 헌릉로590길 63,1,1,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E..."


In [52]:
testdf = data.head(50)

In [114]:
row_num = 2
testaddr = testdf.loc[row_num, 'stnAddr']
print(testaddr)

서울특별시 강남구 일원동 일원로 127


In [103]:
testaddr = "서울특별시 강남구 일원로 127 (일원동, 가람아파트)"

In [152]:
with open("naver_geocoding_api.json", "r") as f:
    naver_api_keys = json.load(f)

def log(msg, flag=None):
    if flag==None:
        flag = 0
    head = ["debug", "error", "status"]
    from time import gmtime, strftime
    now = strftime("%H:%M:%S", gmtime())
    if not os.path.isfile("./debug.log"):
        assert subprocess.call(f"echo \"[{now}][{head[flag]}] > {msg}\" > debug.log", shell=True)==0, print(f"[error] > shell command failed to execute")
    else: assert subprocess.call(f"echo \"[{now}][{head[flag]}] > {msg}\" >> debug.log", shell=True)==0, print(f"[error] > shell command failed to execute")

def convert_address(address)->str:
    url = "https://www.juso.go.kr/support/AddressMainSearch.do?searchKeyword={}"
    try:
        with requests.Session() as session:
            header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}
            juso_req = session.get(url.format(parse.quote(address)), headers=header)
            assert juso_req.status_code==200, log(f"status code exited with {juso_req.status_code}")
            juso_soup = BS(juso_req.text)
            converted_address = juso_soup.find("div", class_="search_list").find("span", class_="roadNameText").text.strip().replace("\xa0", " ")
        return converted_address
    except Exception as e:
        log(f"exception occured: {e}")
        return None

def get_ngeocoding(address:str):
    try:
        with requests.Session() as nsession:
            nv_gc_api_url_template = "https://naveropenapi.apigw.ntruss.com/map-geocode/v2/geocode?query={}"
            nv_gc_api_headers = {
                "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
                "X-NCP-APIGW-API-KEY-ID" : naver_api_keys['clientID'],
                "X-NCP-APIGW-API-KEY" : naver_api_keys['clientSecret'],
                "Accept" : "application/json"
            }
            caddress = convert_address(address)
            caddress = caddress if caddress != None else address
            nv_gc_api_url = nv_gc_api_url_template.format(parse.quote(caddress))
            nv_gc_req = nsession.get(nv_gc_api_url, headers=nv_gc_api_headers)
            assert nv_gc_req.status_code==200, log(f"connection error with status code {nv_gc_req.status_code}")
        naver_json = nv_gc_req.json()
        assert naver_json['status']=="OK", log(f"status error")
        return naver_json
    except Exception as e:
        log(f"Exception: {e}")
        return None

def geocoding(address):
    try:
        rjson = get_ngeocoding(address)
        assert rjson!=None, log(f"naver json fetch failed")
        with open("./results/naver_geocoding_result.json", "w", encoding="utf-8") as f:
            json.dump(rjson, f, ensure_ascii=False)
        assert rjson['meta']['totalCount'] > 0, log(f"naver json fetched {rjson['meta']['totalCount']} address")
        lat, lng = rjson['addresses'][0]['x'], rjson['addresses'][0]['y']
        return [lat, lng]
    except Exception as e:
        log(f"Exception: {e}")
        return [0, 0]

In [None]:
tqdm.pandas()
data['coordinates'] = data.progress_apply(lambda x: geocoding(x['stnAddr']), axis=1)

In [160]:
data.shape

(4594, 8)

In [161]:
emptydf = data[data['coordinates'].apply(lambda x: x[0]==0 and x[1]==0)]

In [162]:
emptydf.shape

(195, 8)

In [163]:
emptydf['coordinates'] = emptydf.progress_apply(lambda x: geocoding(x['stnAddr']), axis=1)

100%|██████████| 195/195 [02:02<00:00,  1.59it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emptydf['coordinates'] = emptydf.progress_apply(lambda x: geocoding(x['stnAddr']), axis=1)


In [165]:
emptydf.head(5)

Unnamed: 0,metro,city,stnPlace,stnAddr,rapidChargerCount,slowChargerCount,carType,coordinates
202,서울특별시,노원구,인재개발원,서울특별시 노원구 노원길 130,1,1,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[0, 0]"
322,서울특별시,서대문구,연희동 홍연2교 옆 노상주차장,서울특별시 서대문구 연희동 736,1,0,"SM3 Z.E,테슬라","[126.9293529, 37.5765416]"
327,서울특별시,서초구,LH서초5단지,서울특별시 서초구 우면동 368-1,1,2,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.0241635, 37.4584617]"
330,서울특별시,서초구,국립 중앙도서관,서울특별시 서초구 반포동 산 94-1,2,0,"SM3 Z.E,테슬라","[127.0023096, 37.4967079]"
341,서울특별시,서초구,방배1차 e편한세상,서울특별시 서초구 방배동 884-2,0,2,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[126.9996167, 37.4883031]"


In [164]:
emptydf2 = emptydf[emptydf['coordinates'].apply(lambda x: x[0]==0 and x[1]==0)]
print(emptydf2.shape)

(35, 8)


In [131]:
with open("./results/evca_dataframe_with_coor.pkl", "wb") as f:
    pickle.dump(data, f)

In [130]:
data['coordinates'].head(5)

0    [127.0884362, 37.4711594]
1    [127.0409418, 37.5145517]
2    [127.0852629, 37.4825117]
3    [127.0475020, 37.5173050]
4    [127.1012224, 37.4603217]
Name: coordinates, dtype: object

In [None]:
testdf

In [48]:
import time
for idx, row in tqdm(testdf.iterrows()):
    addr = f"{row.loc['stnAddr'].strip()} {row.loc['stnPlace']}"
    lat_lng = geocoding(addr)
    print(f"{addr} : [{lat_lng[0]}, {lat_lng[1]}]")
    time.sleep(1)
    break

0it [00:00, ?it/s]

[debug] > Exception: 'NoneType' object has no attribute 'latitude'
서울특별시 강남구 자곡동 자곡로3길 21 LH강남힐스테이트 : [0, 0]





In [33]:
len(data['stnAddr'])-len(data['stnAddr'].drop_duplicates())

124

In [13]:
carTypeKeys = data.carType.value_counts().keys().tolist()
carTypeList = []
for types in carTypeKeys:
    car_type_list = types.split(",")
    carTypeList += list(set(car_type_list) - set(carTypeList))
carTypeList.remove('')

Unnamed: 0,metro,city,stnPlace,stnAddr,rapidChargerCount,slowChargerCount,carType,coordinates
0,서울특별시,강남구,LH강남힐스테이트,서울특별시 강남구 자곡동 자곡로3길 21,1,4,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.0884362, 37.4711594]"
1,서울특별시,강남구,LH서울지사,서울특별시 강남구 선릉로 121길 12,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.0409418, 37.5145517]"
2,서울특별시,강남구,가람아파트,서울특별시 강남구 일원동 일원로 127,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.0852629, 37.4825117]"
3,서울특별시,강남구,강남구청 공영주차장,서울특별시 강남구 삼성동 16-1,1,0,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.0475020, 37.5173050]"
4,서울특별시,강남구,강남데시앙파크,서울특별시 강남구 세곡동 헌릉로590길 63,1,1,"SM3 Z.E,레이EV,소울EV,닛산리프,아이오닉EV,BMW i3,스파크EV,볼트E...","[127.1012224, 37.4603217]"
