In [6]:
# !pip install selenium urllib requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from urllib.parse import urlparse
import requests
import json
import pandas as pd
import pyproj
import warnings
import time 
import platform

headers = {'Content-Type':"application/json"
    ,"accept":"*/*",'accept-encoding':'gzip, deflate, br','accept-language':"ko","User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"}

if "mac" in platform.platform():
    driver = webdriver.Chrome("./chromedriver")
else:
    driver = webdriver.Chrome("./chromedriver.exe")

def parse_url(dest):
    """
        Naver Map 지도 검색장소 x,y 좌표 parsing
    """

    warnings.filterwarnings('ignore')


    driver = webdriver.Chrome("./chromedriver")

    driver.get(f"https://map.naver.com/v5/search/{dest}?")

    time.sleep(8)

    curr_url = driver.current_url
    driver.close()

    return urlparse(curr_url).query[2:].split(",")[:2]


def coordinate_transform(x,y):
    """
        Naver Map 좌표계 transform
    """
    epsg3857 = pyproj.Proj(init='epsg:3857')
    wgs84 = pyproj.Proj(init='epsg:4326')

    return pyproj.transform(epsg3857,wgs84, x,y)


def get_biz_code(transformed_x,transformed_y):
    """
        restaurant buisness_code list 추출
    """
    
    biz_code=[]
    for page in range(1,20):
        r = requests.get(url=f'https://map.naver.com/v5/api/search?caller=pcweb&query=%EC%9D%8C%EC%8B%9D%EC%A0%90&type=all&searchCoord={transformed_x};{transformed_y}&page={page}&displayCount=200&isPlaceRecommendationReplace=true&lang=ko',headers=headers).json()

        if 'error' in r.keys():
            break

        for value in r['result']['place']['list']:
            biz_code.append(value['id'])


    return biz_code
    
##########################################################################################################################

def parsing_profile(place_profile,avgRating,authorCount):
    """
        restaurant 상세정보 전처리
    """

    # 식당 기본 상세정보
    place_default_profile = place_profile[['id','name','category','x','y','fullAddress','fullRoadAddress','phone','description','imageURL','bizhourInfo','reviewCount']].copy()

    place_default_profile.rename(columns={'id':'busId','name':'restaurantName','category':'restaurantCategory','phone':'tellNumber','bizhourInfo':'businessHourInfo'},inplace=True)
    place_default_profile['siCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[0])
    place_default_profile['guCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[1])
    place_default_profile['dongCode'] = place_default_profile['fullAddress'].apply(lambda x: x.split(" ")[2])
    
    place_default_profile['avgRating'] = avgRating
    place_default_profile['authorCount'] = authorCount

    return place_default_profile

def parsing_menus(place_profile):
    """
        restaurant menus 전처리
    """

    # menus
    menus = pd.DataFrame(place_profile['menus'][0])
    menus['busId'] = place_profile['id'][0]

    for i in range(1,len(place_profile)):
        df = pd.DataFrame(place_profile['menus'][i])
        df['busId'] = place_profile['id'][i]
        menus = pd.concat([menus,df])

    menus = menus[['busId','name','price']].reset_index(drop=True)
    menus['price'] = menus['price'].apply(lambda x : x.replace("원",""))
    return menus.rename(columns={'name':'menuName','price':'menuPrice'})


def parsing_options(place_info):
    """
        restaurant options 전처리
    """

    options = pd.DataFrame(place_info[0]['options'])
    options['bus_id'] = place_info[0]['id']


    for idx in range(1,len(place_info)):
        df = pd.DataFrame(place_info[idx]['options'])
        df['bus_id'] = place_info[idx]['id']
        options = pd.concat([options,df])

    options.reset_index(drop=True,inplace=True)

    options.rename(columns={'id':'optionId','name':'optionName','order':'orderCount','bus_id':'busId','iconURL':'iconUrl'}, inplace=True)

    options['optionId'] = options['optionId'].apply(lambda x : int(x))
    options['orderCount'] = options['orderCount'].apply(lambda x : int(x))

    return options

def parsing_images(place_info):
    """
      restaurant images 전처리
    """

    # images 테이블
    images_list = []
    for i in range(len(place_info)):
        for j in range(len(place_info[i]['images'])):
            place_info[i]['images'][j]['bus_id'] = place_info[i]['id']
        images_list.append(place_info[i]['images'])

    images = pd.DataFrame(images_list[0])

    for i in range(1,len(images_list)):
        images = pd.concat([images,pd.DataFrame(images_list[i])])

    return images.reset_index(drop=True)[['bus_id','number','url','modDate']]
    
###########################################################################################################

def get_place_review(biz_code):
    """
        restaurant reviews 크롤링
    """
    graph_url = "https://pcmap-api.place.naver.com/graphql"

    places = []
    avgRating = []
    authorCount = []
    for bus_id in biz_code:
        try:
            payload = {
            "operationName" : "getVisitorReviewStats",

            "query": "query getVisitorReviewStats($id: String, $itemId: String, $businessType: String = \"place\") {\n  visitorReviewStats(input: {businessId: $id, itemId: $itemId, businessType: $businessType}) {\n    id\n    name\n    apolloCacheId\n    review {\n      avgRating\n      totalCount\n      scores {\n        count\n        score\n        __typename\n      }\n      starDistribution {\n        count\n        score\n        __typename\n      }\n      imageReviewCount\n      authorCount\n      maxSingleReviewScoreCount\n      maxScoreWithMaxCount\n      __typename\n    }\n    analysis {\n      themes {\n        code\n        label\n        count\n        __typename\n      }\n      menus {\n        label\n        count\n        __typename\n      }\n      votedKeyword {\n        totalCount\n        reviewCount\n        userCount\n        details {\n          category\n          code\n          iconUrl\n          iconCode\n          displayName\n          count\n          previousRank\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    visitorReviewsTotal\n    ratingReviewsTotal\n    __typename\n  }\n}\n",

            "variables": {"businessType": "restaurant", "id": f"{bus_id}"},
            "businessType": "restaurant",
            "id": f"{bus_id}"
            }

            res = json.loads(requests.post(headers=headers, url=graph_url, data=json.dumps(payload)).text)['data']['visitorReviewStats']
            time.sleep(0.5)

            details = res['analysis']['votedKeyword']['details']

            for detail in details:
                detail['busId'] = bus_id
                places.append(detail) 
            
            avgRating.append(float(res['review']['avgRating']))
            authorCount.append(int(res['review']['authorCount'])) 

        except:
            places.append({
                'category':'', 'code':''
                , 'iconUrl':'', 'iconCode':'', 'displayName':'', 'count':0,
                'previousRank':'', '__typename':'', 'busId':None
            })

            avgRating.append(0)
            authorCount.append(0)
            pass

        votedKeywords = pd.DataFrame(places)[['code','displayName','count','busId']]
        votedKeywords.dropna(axis=0,inplace=True)

        votedKeywords['count'] = votedKeywords['count'].apply(lambda x : int(x))

        votedKeywords.rename(columns={'code':'keywordsCode','displayName':'keywordsName','count':'keywordsCount'}, inplace=True)
            
    return votedKeywords, avgRating, authorCount

def get_place_info(biz_code,avgRating,authorCount):
    """
        restaurant 상세정보 크롤링
    """

    place_info = [requests.get(url=f'https://map.naver.com/v5/api/sites/summary/{code}?lang=ko',
    headers=headers).json() for code in biz_code]

    place_profile = pd.DataFrame(place_info)[['id','name','category' ,'x','y','fullAddress','fullRoadAddress','phone','description','images','imageURL','categories','bizhourInfo','menus','reviewCount']]

    profile = parsing_profile(place_profile,avgRating,authorCount)
    menus = parsing_menus(place_profile)
    options = parsing_options(place_info)
    images = parsing_images(place_info)

    return profile,menus,options,images

###########################################################################################################

def save(profile,menus,options,images,votedKeywords):
    """
        restaurant data 저장과정 실행.
    """
   
    if len(profile) != 0:
        post_method(profile,'admin/createRestaurantInfo')
   
    if len(menus) != 0:
        post_method(menus,'admin/createMenus')

    if len(options) != 0:
        post_method(options,'admin/createOptions')

    # if len(images) != 0:
    #     post_method(images,'admin/createRestaurantImages')

    if len(votedKeywords) != 0:
        post_method(votedKeywords,'admin/createKeywords')

        
def post_method(df, api):
    """
        restaurant data 백엔드 전송.
    """
    # base_url = "http://43.200.202.164:8080/"
    base_url = "http://localhost:8080/"

    payload = [{f"{k}":f"{v}" for k, v in row.items()} for _,row in df.iterrows()]

    for row in payload:
        requests.post(headers=headers, url = base_url + api ,data=json.dumps(row, indent=4, ensure_ascii=False).encode('utf-8'))


###########################################################################################################

def get_restaurant(dest):

    # # url에서 해당장소 x,y좌표 파싱.
    # x,y = parse_url(dest)

    # # x,y좌표 epsg:4326 -> epsg:3857 좌표계로 변환.
    # transformed_x,transformed_y = coordinate_transform(x,y)

    # # 변환된 좌표계로 해당 지역 주변 맛집 business_id파싱.
    # biz_code = get_biz_code(transformed_x,transformed_y)
    biz_code = pd.read_csv('./busId_list.csv').iloc[:,1:].iloc[:,0]

    # business_id로 API 호출 및 응답.
    keywords ,avgRating, authorCount = get_place_review(biz_code)
    profile,menus,options,images = get_place_info(biz_code, avgRating, authorCount)

    # # 전처리된 데이터 백엔드 서버로 전송.
    save(profile,menus,options,images,keywords) 




if __name__ == "__main__":


    get_restaurant("합정역")