In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from urllib.parse import urlparse
import requests
import json
import pandas as pd
import pyproj
import warnings
import time 

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",'Content-Type':"application/json","accept":"*/*",'accept-encoding':'gzip, deflate, br','accept-language':"ko"}

def parse_url(dest):
    """

    """

    warnings.filterwarnings('ignore')


    driver = webdriver.Chrome("./chromedriver.exe")

    driver.get(f"https://map.naver.com/v5/search/{dest}?")

    time.sleep(2)

    curr_url = driver.current_url
    driver.close()

    return urlparse(curr_url).query[2:].split(",")[:2]


def coordinate_transform(x,y):
    """

    """
    epsg3857 = pyproj.Proj(init='epsg:3857')
    wgs84 = pyproj.Proj(init='epsg:4326')

    return pyproj.transform(epsg3857,wgs84, x,y)


def get_biz_code(transformed_x,transformed_y):
    """

    """
    
    page=1
    biz_code=[]
    while(True):
        r = requests.get(url=f'https://map.naver.com/v5/api/search?caller=pcweb&query=%EC%9D%8C%EC%8B%9D%EC%A0%90&type=all&searchCoord={transformed_x};{transformed_y}&page={page}&displayCount=200&isPlaceRecommendationReplace=true&lang=ko',headers=headers).json()

        if 'error' in r.keys():
            break

        for value in r['result']['place']['list']:
            biz_code.append(value['id'])

        page+=1

    return biz_code
    
def parsing_profile(place_profile):
    """

    """

    # 식당 기본 상세정보
    place_default_profile = place_profile[['id','name','category','x','y','fullAddress','fullRoadAddress','phone','description','imageURL','bizhourInfo','reviewCount']].copy()

    place_default_profile.rename(columns={'id':'busId','name':'restaurantName','category':'restaurantCategory','phone':'tellNumber','bizhourInfo':'businessHourInfo'},inplace=True)
    place_default_profile['siCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[0])
    place_default_profile['guCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[1])
    place_default_profile['dongCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[2])

    return place_default_profile

def parsing_menus(place_profile):
    """

    """

    # menus
    menus = pd.DataFrame(place_profile['menus'][0])
    menus['bus_id'] = place_profile['id'][0]

    for i in range(1,len(place_profile)):
        df = pd.DataFrame(place_profile['menus'][i])
        df['bus_id'] = place_profile['id'][i]
        menus = pd.concat([menus,df])

    return menus[['bus_id','name','price']].reset_index(drop=True) 


def parsing_options(place_info):
    """

    """


    options = pd.DataFrame(place_info[0]['options'])
    options['bus_id'] = place_info[0]['id']


    for idx in range(1,len(place_info)):
        df = pd.DataFrame(place_info[idx]['options'])
        df['bus_id'] = place_info[idx]['id']
        options = pd.concat([options,df])

    return options.reset_index(drop=True,inplace=True)

def parsing_images(place_info):
    """

    """

    # images 테이블
    images_list = []
    for i in range(len(place_info)):
        for j in range(len(place_info[i]['images'])):
            place_info[i]['images'][j]['bus_id'] = place_info[i]['id']
        images_list.append(place_info[i]['images'])

    images = pd.DataFrame(images_list[0])

    for i in range(1,len(images_list)):
        images = pd.concat([images,pd.DataFrame(images_list[i])])

    return images.reset_index(drop=True)[['bus_id','number','url','modDate']]
    

def get_place_info(biz_code):
    """

    """

    place_info = [requests.get(url=f'https://map.naver.com/v5/api/sites/summary/{code}?lang=ko',
    headers=headers).json() for code in biz_code]

    place_profile = pd.DataFrame(place_info)[['id','name','category' ,'x','y','fullAddress','fullRoadAddress','phone','description','images','imageURL','categories','bizhourInfo','menus','reviewCount']]

    profile = parsing_profile(place_profile)
    menus = parsing_menus(place_profile)
    options = parsing_options(place_info)
    images = parsing_images(place_info)

    return profile,menus,options,images


def save(profile,menus,options,images):
    """
    
    """
   
    if len(profile) != 0:
        post_method(profile,'createRestaurantInfo')
   
    # if len(menus) != 0:
    #     post_method(menus,'createRestaurantMenus')

    # if len(options) != 0:
    #     post_method(options,'createRestaurantOptions')

    # if len(images) != 0:
    #     post_method(images,'createRestaurantImages')

        
def post_method(df, api):
    """
    
    """
    base_url = "http://localhost:8080/restaurant/"

    payload = [{f"{k}":f"{v}" for k, v in row.items()} for _,row in df.iterrows()]

    for row in payload:
        requests.post(headers=headers, url = base_url + api ,data=json.dumps(row, indent=4, ensure_ascii=False).encode('utf-8'))

def get_restaurant(dest):

    x,y = parse_url(dest)

    transformed_x,transformed_y = coordinate_transform(x,y)

    biz_code = get_biz_code(transformed_x,transformed_y)

    profile,menus,options,images = get_place_info(biz_code)

    save(profile,menus,options,images)




if __name__ == "__main__":

    get_restaurant("연신내역")

In [6]:
pip install warnings

Note: you may need to restart the kernel to use updated packages.


