# Import

In [35]:
import socket
from urllib.parse import urlencode, quote_plus

import pandas as pd
import requests

from db.mongo import MyMongo
# from api.kakao_geocode import get_geocode_from_address

# 함수

In [29]:
def get_distance_from_coords(lat1, lon1, lat2, lon2):

    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c * 1000

    return distance


def get_kakao_api_key():
    hname = socket.gethostname()
    api_key = ''
    if hname == 'ideapad':
        file_path = '/home/jake/Private/kakao_api_key.txt'
    elif hname == 'danbi-mac.local':
        file_path = '/Users/jake/Private/kakao_api_key.txt'

    with open(file_path) as f:
        api_key = f.read()
    return api_key.strip()


def fetch_geo_response_from_kakao(addr, url_type='address'):
    urls = {
        'address': 'https://dapi.kakao.com/v2/local/search/address.json',
        'keyword': 'https://dapi.kakao.com/v2/local/search/keyword.json'
    }

    def get_response_by_type(url_type, response):
        # print(response)
        if not response or response['meta']['total_count'] == 0:
            return None

        doc = response['documents'][0]
        if url_type == 'address':
            return doc['road_address'] or doc['address']

        if url_type == 'keyword':
            return doc

    url = urls[url_type]
    key = get_kakao_api_key()
    headers = {'Authorization': f'KakaoAK {key}'}
    payload = {'query': addr, }
    encoded = urlencode(payload, quote_via=quote_plus)
    url2 = f'{url}?{encoded}'
    res = requests.get(url2, headers=headers)

    return get_response_by_type(url_type, res.json())


# 데이터 가져오기

In [2]:
tobacco_path = 'cvs_from_tobacco.tsv'
cvs_tobacco = pd.read_csv(tobacco_path, sep='\t', dtype=object).drop_duplicates(['관리번호'])

print(f'cvs_tobacco: {len(cvs_tobacco)}')  # before: 67602, after: 65541

cvs_tobacco: 65541


# Columns

In [25]:
# cols = ['영업상태명', '상세영업상태명', '폐업일자', '휴업시작일자', '휴업종료일자', '재개업일자', '소재지전화', '사업장명', '최종수정시점', '데이터갱신구분', '데이터갱신일자', '업태구분명',
#         '좌표정보(X)', '좌표정보(Y)', '위생업태명', '남성종사자수', '여성종사자수', '영업장주변구분명', '등급구분명', '급수시설구분명', '총종업원수', '본사종업원수',
#         '건물소유구분명', '보증액', '월세액', '다중이용업소여부', '시설총규모', '전통업소지정번호', '전통업소주된음식', '홈페이지']

cols = ['관리번호', '사업장명', '상세영업상태명', '좌표정보(X)', '좌표정보(Y)', '도로명전체주소', '지정일자', '폐업일자', '최종수정시점', '데이터갱신일자', '업태구분명', 'lat', 'lng']

cvs_tobacco.columns

Index(['개방서비스ID', '개방서비스명', '개방자치단체코드', '관리번호', '데이터갱신구분', '데이터갱신일자',
       '도로명우편번호', '도로명전체주소', '민원종류명', '번호', '사업장명', '상세영업상태명', '상세영업상태코드',
       '소재지면적', '소재지우편번호', '소재지전체주소', '소재지전화', '업태구분명', '영업상태구분코드', '영업상태명',
       '인허가일자', '인허가취소일자', '재개업일자', '좌표정보(X)', '좌표정보(Y)', '지정일자', '최종수정시점',
       '폐업일자', '휴업시작일자', '휴업종료일자', 'lat', 'lng'],
      dtype='object')

# 상세영업상태명:

## 정상영업: 36684개
## 폐업처리: 27710개

In [4]:
cvs_tobacco['상세영업상태명'].value_counts()

정상영업        36684
폐업처리        27710
지정취소          694
직권취소          320
영업정지           85
휴업처리           38
임시소매기간만료       10
Name: 상세영업상태명, dtype: int64

# 영업상태별 DF 생성

In [10]:
cvs_active = cvs_tobacco.loc[cvs_tobacco['상세영업상태명'].str.contains('정상영업')]; cvs_active.loc[::, cols]

Unnamed: 0,관리번호,사업장명,상세영업상태명,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y)
0,2006300010105600037,세븐일레븐 종로허브점,정상영업,,,20080221000000.0,2018-08-31 23:59:59.0,,198658.943470079,452604.254902527
1,1999300007605600150,GS25동숭점,정상영업,19990628,,20120404135825.0,2018-08-31 23:59:59.0,,200156.610752518,453486.14839667897
2,2007300010105600043,이마트24 광화문시대점,정상영업,20070625,,20180622133108.0,2018-08-31 23:59:59.0,,197559.791065754,452388.315239125
3,2007300012905600006,세븐일레븐 독립문역점,정상영업,20070807,,20130502163622.0,2018-08-31 23:59:59.0,,196230.717211362,452476.18106458295
4,2008300012905600082,세븐일레븐 종로배화점,정상영업,20081007,,20081007145235.0,2018-08-31 23:59:59.0,,197231.292851585,452764.45729597396
5,2003300007605600130,세븐일레븐 종로향기점,정상영업,20031216,,20150731160736.0,2018-08-31 23:59:59.0,,201843.179937827,452443.27937024704
6,2007300012905600047,GS25 명륜아남점,정상영업,20071119,,20071119102124.0,2018-08-31 23:59:59.0,,199944.16729832898,453791.442084652
7,2007300012905600048,GS25 혜화대명점,정상영업,20071120,,20090430153802.0,2018-08-31 23:59:59.0,,200031.226482075,453557.400004758
8,2007300012905600055,GS25 평창문화로점,정상영업,20071204,,20180503150542.0,2018-08-31 23:59:59.0,,197865.966333333,456429.34466666705
9,2004300007605600095,세븐일레븐 종로숭인점,정상영업,20040830,,20090831141431.0,2018-08-31 23:59:59.0,,201923.40606958,452887.220420862


In [11]:
cvs_quit = cvs_tobacco.loc[cvs_tobacco['상세영업상태명'].str.contains('폐업처리')]; cvs_quit.loc[::, cols]

Unnamed: 0,관리번호,사업장명,상세영업상태명,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y)
225,2003300007605600035,미니스톱 혜화점,폐업처리,,20060605.0,20080221000000.0,2018-08-31 23:59:59.0,,199947.810511547,453528.91270822205
226,2006300007605600007,미니스톱 종각역점,폐업처리,20060127,20080325.0,20080325174729.0,2018-08-31 23:59:59.0,,198572.839145325,452022.373156033
227,2006300007605600008,GS25 종로용비점,폐업처리,20060203,20100225.0,20100225173905.0,2018-08-31 23:59:59.0,,197567.747824202,452347.973860449
228,2006300007605600009,GS25 종로숭인점,폐업처리,,20061201.0,20080221000000.0,2018-08-31 23:59:59.0,,201960.951300031,452506.17859595397
229,2008300012905600064,GS25종로리버,폐업처리,20080709,20140502.0,20140502153724.0,2018-08-31 23:59:59.0,,198614.61995563,451842.501561085
243,2004300007605600079,미니스톱경희궁점,폐업처리,20040721,20151012.0,20151012163236.0,2018-08-31 23:59:59.0,,197465.223921125,452366.61389989796
244,1999300007605600163,씨유 종로효제점,폐업처리,19990723,20171101.0,20171101160936.0,2018-08-31 23:59:59.0,,,
245,1999300007605600143,GS25 종각점,폐업처리,19990624,20080229.0,20080229172350.0,2018-08-31 23:59:59.0,,,
246,2006300010105600019,미니스톱 창신역점,폐업처리,,20061201.0,20080221000000.0,2018-08-31 23:59:59.0,,201304.223298708,452882.754347238
247,2006300010105600022,GS25 대학로점,폐업처리,20060824,20090116.0,20090116172116.0,2018-08-31 23:59:59.0,,200242.66173302103,453287.29409741296


# 구분선----------

# 카카오에서 좌표 가져오기

In [30]:
for idx, row in cvs_tobacco.iterrows():
    addr = row['도로명전체주소']
    name = row['사업장명']
    doc = None

    if addr:
        doc = fetch_geo_response_from_kakao(addr)
    elif name:
        doc = fetch_geo_response_from_kakao(name, url_type='keyword')
    if doc:
        cvs_tobacco.at[idx, 'lat'] = doc['y']
        cvs_tobacco.at[idx, 'lng'] = doc['x']

KeyboardInterrupt: 

In [32]:
len(cvs_tobacco.loc[~cvs_tobacco['lat'].isna(), cols])
# cvs_tobacco.loc[::, cols]

9822

In [33]:
cvs_tobacco.loc[~cvs_tobacco['lat'].isna(), cols].head()

Unnamed: 0,관리번호,사업장명,상세영업상태명,좌표정보(X),좌표정보(Y),도로명전체주소,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,lat,lng
0,2006300010105600037,세븐일레븐 종로허브점,정상영업,198658.943470079,452604.254902527,"서울특별시 종로구 삼일대로 461 (경운동,운현궁SK허브 101동 102호)",,,20080221000000.0,2018-08-31 23:59:59.0,,37.57559556952654,126.98582687874092
1,1999300007605600150,GS25동숭점,정상영업,200156.610752518,453486.14839667897,서울특별시 종로구 동숭길 140 (혜화동),19990628.0,,20120404135825.0,2018-08-31 23:59:59.0,,37.58366929825265,127.0025632117959
2,2007300010105600043,이마트24 광화문시대점,정상영업,197559.791065754,452388.315239125,"서울특별시 종로구 사직로8길 42, 102-2호 (내수동,광화문시대 상가1층)",20070625.0,,20180622133108.0,2018-08-31 23:59:59.0,,37.5736728979999,126.97294992549394
3,2007300012905600006,세븐일레븐 독립문역점,정상영업,196230.717211362,452476.18106458295,서울특별시 종로구 통일로 248 (무악동),20070807.0,,20130502163622.0,2018-08-31 23:59:59.0,,37.57456546325696,126.95812421174286
4,2008300012905600082,세븐일레븐 종로배화점,정상영업,197231.292851585,452764.457295974,서울특별시 종로구 필운대로 12 (필운동),20081007.0,,20081007145235.0,2018-08-31 23:59:59.0,,37.57716379872892,126.96943884968032


# 파일 저장

In [39]:
# cvs_active.loc[::, cols].to_csv('cvs_active.tsv', sep='\t', index=False)
# cvs_quit.loc[::, cols].to_csv('cvs_quit.tsv', sep='\t', index=False)
# cvs_tobacco.loc[::, cols].to_csv('cvs_all.tsv', sep='\t', index=False)
# cvs_tobacco.loc[::, cols].to_csv('cvs_all_lat_lng.tsv', sep='\t', index=False)
with MyMongo() as db:
    db.delete_and_insert_df('cvs', 'cvs', cvs_tobacco)

<--Mongo Connected.
{}
Deleted rows: 0
Inserted rows: 65541
Mongo Connection Closed.-->


# 영업중인 지에스: 12643개

In [6]:
idx_tobacco_gs0 = cvs_tobacco['사업장명'].str.contains('지에스')
idx_tobacco_gs1 = cvs_tobacco['사업장명'].str.contains('GS')
idx_tobacco_active = cvs_tobacco['상세영업상태명'].str.contains('정상영업')

cvs_tobacco.loc[(idx_tobacco_gs0 | idx_tobacco_gs1) & (idx_tobacco_active), cols]

Unnamed: 0,관리번호,사업장명,상세영업상태명,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y)
1,1999300007605600150,GS25동숭점,정상영업,19990628,,20120404135825.0,2018-08-31 23:59:59.0,,200156.610752518,453486.14839667897
6,2007300012905600047,GS25 명륜아남점,정상영업,20071119,,20071119102124.0,2018-08-31 23:59:59.0,,199944.16729832898,453791.442084652
7,2007300012905600048,GS25 혜화대명점,정상영업,20071120,,20090430153802.0,2018-08-31 23:59:59.0,,200031.226482075,453557.400004758
8,2007300012905600055,GS25 평창문화로점,정상영업,20071204,,20180503150542.0,2018-08-31 23:59:59.0,,197865.966333333,456429.34466666705
10,2010300012905600067,GS25 종로방통대점,정상영업,20100806,,20100830164822.0,2018-08-31 23:59:59.0,,200309.35812063699,453044.212129777
12,2008300012905600071,GS25종로관훈점,정상영업,20080812,,20080812195838.0,2018-08-31 23:59:59.0,,198500.44755914,452243.43365493603
13,2004300007605600116,GS25 동묘역점,정상영업,20041126,,20091120113929.0,2018-08-31 23:59:59.0,,201350.47337553801,452279.74426029995
16,2008300012905600029,GS25 종로안국점,정상영업,20080408,,20080408170518.0,2018-08-31 23:59:59.0,,198685.005097689,452737.39991695795
20,2018300016905600075,지에스25종로영풍점,정상영업,20181129,,20181129111120.0,2018-12-01 02:20:16.0,,198351.81873748198,451921.60302604
28,2006300010105600061,GS25 창신역점,정상영업,20061221,,20150210172221.0,2018-08-31 23:59:59.0,,201304.223298708,452882.754347238


# 영업중인 씨유: 12301개

In [7]:
idx_tobacco_cu0 = cvs_tobacco['사업장명'].str.contains('씨유')
idx_tobacco_cu1 = cvs_tobacco['사업장명'].str.contains('CU')
# idx_tobacco_cu2 = cvs_tobacco['사업장명'].str.contains('훼미리마트')  # 검색결과: 0
# idx_tobacco_cu3 = cvs_tobacco['사업장명'].str.contains('패밀리마트')  # 검색결과: 0
idx_tobacco_active = cvs_tobacco['상세영업상태명'].str.contains('정상영업')

cvs_tobacco.loc[(idx_tobacco_cu0 | idx_tobacco_cu1) & (idx_tobacco_active), cols]

Unnamed: 0,관리번호,사업장명,상세영업상태명,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y)
14,2008300012905600022,씨유 종로5가역점,정상영업,20080320,,20120727105320.0,2018-08-31 23:59:59.0,,199936.591919229,452096.36720458604
17,2010300012905600034,씨유 종로성대점,정상영업,20100503,,20120730111835.0,2018-08-31 23:59:59.0,,199667.34921991,453570.52997439104
19,2002300007605600033,씨유 종로제일점,정상영업,20020325,,20170321100321.0,2018-08-31 23:59:59.0,,198979.900757892,452173.28417043
21,2012300012905600086,씨유 성균관대 인문관점,정상영업,20121004,,20121005090436.0,2018-08-31 23:59:59.0,,199231.345644103,454019.36089002
30,2002300007605600051,씨유 마로니에점,정상영업,20020604,,20120730112725.0,2018-08-31 23:59:59.0,,200099.913137334,452901.85254681896
32,2009300012905600079,씨유 종로구청점,정상영업,20091009,,20120723125130.0,2018-08-31 23:59:59.0,,198205.621520751,452254.21760316705
33,2009300012905600080,씨유 종로신영점,정상영업,20091019,,20120720113731.0,2018-08-31 23:59:59.0,,196557.46211867602,455479.388346619
34,2006300010105600004,씨유 종로3가점,정상영업,20060718,,20120718125451.0,2018-08-31 23:59:59.0,,198984.839771292,452270.384433732
42,2018300016905600054,씨유 종로동숭점,정상영업,20180914,,20180917092231.0,2018-09-17 23:59:59.0,,200163.533936465,453385.896458574
44,2018300016905600058,씨유 동대문종합시장점,정상영업,20180927,,20180928090131.0,2018-09-28 23:59:59.0,,200542.25824337898,452057.12685958296


# 영업중인 이마트24: 2570개

In [8]:
idx_tobacco_emart0 = cvs_tobacco['사업장명'].str.contains('이마트24')
idx_tobacco_emart1 = cvs_tobacco['사업장명'].str.contains('emart24')

idx_tobacco_active = cvs_tobacco['상세영업상태명'].str.contains('정상영업')

cvs_tobacco.loc[(idx_tobacco_emart0 | idx_tobacco_emart1) & (idx_tobacco_active), cols]

Unnamed: 0,관리번호,사업장명,상세영업상태명,지정일자,폐업일자,최종수정시점,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y)
2,2007300010105600043,이마트24 광화문시대점,정상영업,20070625,,20180622133108.0,2018-08-31 23:59:59.0,,197559.791065754,452388.315239125
47,2018300016905600064,이마트24 행촌독립문점,정상영업,20181029,,20181029173631.0,2018-10-31 02:38:21.0,,196550.626191895,452473.82908988796
75,2017300016905600049,이마트24정독천수점,정상영업,20170628,,20181023174801.0,2018-10-25 02:37:49.0,,198353.454278001,453041.478323415
105,2016300016905600015,이마트24 이화사거리점,정상영업,20160219,,20171204133345.0,2018-08-31 23:59:59.0,,200158.82002238,452691.97984761
134,2017300016905600072,이마트24 삼청동점,정상영업,20170925,,20170925134551.0,2018-08-31 23:59:59.0,,198286.187157832,453260.691369535
152,2018300016905600016,이마트24 광화문 포스탈점,정상영업,20180227,,20180227133500.0,2018-08-31 23:59:59.0,,197995.21879340897,451945.047247204
180,2017300016905600095,(주)이마트24 성대후문점,정상영업,20171129,,20171129172313.0,2018-08-31 23:59:59.0,,199428.158333717,454084.366462445
192,2018300016905600029,이마트24 종로창신점,정상영업,20180601,,20180601182141.0,2018-08-31 23:59:59.0,,201056.87717041897,452038.303804553
198,2018300016905600024,이마트24 종로낙원점,정상영업,20180420,,20180516110409.0,2018-08-31 23:59:59.0,,198966.78218634,452232.345396899
204,2018300016905600006,이마트24 종로3가점,정상영업,20180130,,20180130180947.0,2018-08-31 23:59:59.0,,199216.03021164399,452287.079932058


# 구분선------------