# Import

In [6]:
import socket
from urllib.parse import urlencode, quote_plus

import pandas as pd
import requests

from db.mongo import MyMongo
# from api.kakao_geocode import get_geocode_from_address

# 함수

In [None]:
def get_distance_from_coords(lat1, lon1, lat2, lon2):

    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c * 1000

    return distance


def get_kakao_api_key():
    hname = socket.gethostname()
    api_key = ''
    if hname == 'ideapad':
        file_path = '/home/jake/Private/kakao_api_key.txt'
    elif hname == 'danbi-mac.local':
        file_path = '/Users/jake/Private/kakao_api_key.txt'

    with open(file_path) as f:
        api_key = f.read()
    return api_key.strip()


def fetch_geo_response_from_kakao(addr, url_type='address'):
    urls = {
        'address': 'https://dapi.kakao.com/v2/local/search/address.json',
        'keyword': 'https://dapi.kakao.com/v2/local/search/keyword.json'
    }

    def get_response_by_type(url_type, response):
        # print(response)
        if not response or response['meta']['total_count'] == 0:
            return None

        doc = response['documents'][0]
        if url_type == 'address':
            return doc['road_address'] or doc['address']

        if url_type == 'keyword':
            return doc

    url = urls[url_type]
    key = get_kakao_api_key()
    headers = {'Authorization': f'KakaoAK {key}'}
    payload = {'query': addr, }
    encoded = urlencode(payload, quote_via=quote_plus)
    url2 = f'{url}?{encoded}'
    res = requests.get(url2, headers=headers)

    return get_response_by_type(url_type, res.json())


# 데이터 가져오기

In [7]:
with MyMongo() as db:
    cvs_tobacco = db.get_df_from_table('cvs', 'cvs')

# cvs_tobacco = pd.read_csv('cvs_all_lat_lng.tsv', sep='\t', dtype=object).drop_duplicates(['관리번호'])

print(f'cvs_tobacco: {len(cvs_tobacco)}')  # before: 67602, after: 65541

<--Mongo Connected.
Mongo Connection Closed.-->
cvs_tobacco: 65541


In [8]:
# cols = ['영업상태명', '상세영업상태명', '폐업일자', '휴업시작일자', '휴업종료일자', '재개업일자', '소재지전화', '사업장명', '최종수정시점', '데이터갱신구분', '데이터갱신일자', '업태구분명',
#         '좌표정보(X)', '좌표정보(Y)', '위생업태명', '남성종사자수', '여성종사자수', '영업장주변구분명', '등급구분명', '급수시설구분명', '총종업원수', '본사종업원수',
#         '건물소유구분명', '보증액', '월세액', '다중이용업소여부', '시설총규모', '전통업소지정번호', '전통업소주된음식', '홈페이지']

cols = ['관리번호', '사업장명', '상세영업상태명', '좌표정보(X)', '좌표정보(Y)', '도로명전체주소', '지정일자', '폐업일자', '최종수정시점', '데이터갱신일자', '업태구분명', 'lat', 'lng']

cvs_tobacco.columns

Index(['_id', 'lat', 'lng', '개방서비스ID', '개방서비스명', '개방자치단체코드', '관리번호', '데이터갱신구분',
       '데이터갱신일자', '도로명우편번호', '도로명전체주소', '민원종류명', '번호', '사업장명', '상세영업상태명',
       '상세영업상태코드', '소재지면적', '소재지우편번호', '소재지전체주소', '소재지전화', '업태구분명', '영업상태구분코드',
       '영업상태명', '인허가일자', '인허가취소일자', '재개업일자', '좌표정보(X)', '좌표정보(Y)', '지정일자',
       '최종수정시점', '폐업일자', '휴업시작일자', '휴업종료일자'],
      dtype='object')

In [9]:
len(cvs_tobacco.loc[~cvs_tobacco['lat'].isna(), cols])
# cvs_tobacco.loc[::, cols]

9822

In [None]:
i = 0

for idx, row in cvs_tobacco.loc[cvs_tobacco['lat'].isna()]:
    addr = row['도로명전체주소']
    name = row['사업장명']
    doc = None

    if addr:
        doc = fetch_geo_response_from_kakao(addr)
    elif name:
        doc = fetch_geo_response_from_kakao(name, url_type='keyword')
    if doc:
        cvs_tobacco.at[idx, 'lat'] = doc['y']
        cvs_tobacco.at[idx, 'lng'] = doc['x']
    if i == 20:
        with MyMongo() as db:
            db.update_one_bulk('cvs', 'cvs',)