In [9]:
import time
import re
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd
import googlemaps
import urllib.parse

station_list=['뚝섬역', '서촌', '강남역', '서울역', '해방촌', '수유역', '경복궁', '암사동 유적']

# KAKAO_API_KEY = "1a6063efba1d306ac8725b784b11fa13"
GOOGLE_API_KEY = "AIzaSyCtPF92it_bqVkfPKeKbKHmmo9ig2RyaPE"
results=[]

def switch_left():
    driver.switch_to.default_content()
    iframe = driver.find_element(By.XPATH, '//*[@id="searchIframe"]')
    driver.switch_to.frame(iframe)

def switch_right():
    driver.switch_to.default_content()
    iframe = driver.find_element(By.XPATH, '//*[@id="entryIframe"]')
    driver.switch_to.frame(iframe)

def clean_address(address):
    # # (우) 우편번호 제거
    # address = re.sub(r'\s*\(우\)\d{5}.*$', '', address)

    # # 쉼표 또는 하이픈으로 구분된 '층' 관련 정보 제거
    # address = re.sub(r'[,/-]?\s*\d+[\-,\d]*\s*층.*$', '', address)  # 예: , 1층 / 1,2층 / 1-2층
    # address = re.sub(r'[,/-]?\s*\d+\s*호.*$', '', address)          # 예: , 101호

    # # 도로명 주소만 추출 (길 포함 가능)
    # match = re.search(r'([가-힣\d\s\-]*로\d*(길\s?\d+)?\s?\d*)', address)
    # address = match.group(0) if match else address.strip()

    return address.strip()

def extract_number(text):
    match = re.search(r'\d+', text.replace(',', ''))
    return int(match.group(0)) if match else 0

def get_lat_lon(address):

    gmaps = googlemaps.Client(key=GOOGLE_API_KEY)
    geocode = gmaps.geocode(address=address)
    lat = geocode[0].get("geometry")["location"]["lat"]
    lon = geocode[0].get("geometry")["location"]["lng"]

    # time.sleep(1)
    # url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    # headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    # response = requests.get(url, headers=headers)

    # if response.status_code == 429:
    #     time.sleep(60)
    #     return get_lat_lon(address)

    # result = response.json()
    # if "documents" not in result or not result["documents"]:
    #     return None, None

    # match_first = result["documents"][0]["address"]
    # lat = float(match_first["y"])
    # lon = float(match_first["x"])
    return lat, lon


# Selenium 설정
options = webdriver.ChromeOptions()
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3')
options.add_argument('window-size=1380,900')
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(3)

for place in station_list:
    search_url = f"https://map.naver.com/p/search/{urllib.parse.quote(place + ' 놀거리')}"
    driver.get(search_url)
    time.sleep(3)

    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="searchIframe"]')))
        switch_left()
    except:
        print(f"[{place}] 검색결과 없음 - 스킵")
        continue

    try:
        scrollable_element = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, "_pcmap_list_scroll_container"))
        )
    except Exception as e:
        print(f"[{place}] 스크롤 요소 탐색 실패: {e} - 스킵")
        continue

    # 스크롤 내리기
    scroll_pause_time = 1
    max_scroll_attempts = 30
    last_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)

    for _ in range(max_scroll_attempts):
        driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_element)
        time.sleep(scroll_pause_time)
        new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)
        if new_height == last_height:
            break
        last_height = new_height

    elements = driver.find_elements(By.XPATH, '//*[@id="_pcmap_list_scroll_container"]//li')

    for index, element in enumerate(elements):
        try:
            switch_left()
            clickable = element.find_element(By.CLASS_NAME, 'YgcU0')
            clickable.click()
            time.sleep(2)
        except:
            print(f"{index+1}번째 장소 클릭 실패")
            continue

        try:
            switch_right()
            time.sleep(2)

            soup = BeautifulSoup(driver.page_source, "html.parser")

            title = soup.find("div", class_="LylZZ v8v5j")
            store_name = title.find("span", class_="GHAhO").get_text(strip=True) if title else "놀거리명 없음"

             # 놀거리명 제외
            exclude_names = ["놀거리명 없음"]

            if store_name in exclude_names:
                continue

            category = title.find("span", class_="lnJFt").get_text(strip=True) if title else "카테고리 없음"

            # 일부 카테고리 제외
            exclude_categories = ["카테고리 없음", "카페,디저트", "일식당", "양식", "한식", "샤브샤브", "중식당", "이자카야", "베이커리", "요리주점", "베트남음식",
                                  "브런치", "카페", "햄버거", "양꼬치", "스파게티,파스타전문", "우동,소바", "멕시코,남미음식", "오니기리", "피자", "치킨,닭강정", "고기뷔페",
                                  "스터디카페", "초콜릿전문점", "소고기구이", "곱창,막창,양", "백숙,삼계탕", "막국수", "이탈리아음식", "아이스크림", "일본식라면"]

            if category in exclude_categories:
                continue

            visitor_review_element = soup.find("a", string=lambda text: text and "방문자 리뷰" in text)
            visitor_review = extract_number(visitor_review_element.get_text(strip=True)) if visitor_review_element else 0

            blog_review_element = soup.find("a", string=lambda text: text and "블로그 리뷰" in text)
            blog_review = extract_number(blog_review_element.get_text(strip=True)) if blog_review_element else 0

            total_reviews = visitor_review + blog_review

            address_element = soup.find("span", class_="LDgIH")
            address = clean_address(address_element.get_text(strip=True)) if address_element else "주소 없음"

            lat, lon = get_lat_lon(address)

            print(f"\n[지역: {place}] {index+1}. {store_name} · {category}")
            print(f"리뷰 수: {total_reviews}")
            print(f"주소: {address}")
            print(f"위도: {lat}, 경도: {lon}")
            print("-" * 50)

            results.append({
                "name": store_name,
                "station": place,
                "category" : category,
                "review": total_reviews,
                "address": address,
                "latitude": lat,
                "longitude": lon
            })

            # mycursor.execute("""
            #     INSERT INTO enjoy
            #     VALUES(%s, %s, %s, %s, %s, %s, %s)
            # """, (store_name, place, total_reviews, address, lat, lon))
            # ojk.commit()

        except Exception as ex:
            print(f"{index+1}번째 장소 정보 수집 실패: {ex}")
            print("-" * 50)
            continue

driver.quit()
df = pd.DataFrame(results)
df.to_csv('sample_enjoy_results.csv', index=False, encoding='utf-8-sig')
print("CSV 저장 완료: sample_enjoy_results.csv")



[지역: 뚝섬역] 1. 뚝섬미술관 · 미술관
리뷰 수: 8608
주소: 서울 성동구 아차산로 33 지하 1층
위도: 37.547351, 경도: 127.0484429
--------------------------------------------------

[지역: 뚝섬역] 4. 섬세이 테라리움 · 전시관
리뷰 수: 5077
주소: 서울 성동구 서울숲2길 44-1 지하1층 섬세이 테라리움
위도: 37.5462075, 경도: 127.0435608
--------------------------------------------------

[지역: 뚝섬역] 6. 성수아트홀 · 공연장
리뷰 수: 1001
주소: 서울 성동구 뚝섬로1길 43
위도: 37.5454156, 경도: 127.0469729
--------------------------------------------------

[지역: 뚝섬역] 9. 무비랜드 · 영화관
리뷰 수: 293
주소: 서울 성동구 연무장길 5-5
위도: 37.5441644, 경도: 127.0502997
--------------------------------------------------

[지역: 뚝섬역] 11. 인더무드성수 셀프사진관 · 셀프,대여스튜디오
리뷰 수: 146
주소: 서울 성동구 왕십리로16가길 10 지하1층
위도: 37.5503473, 경도: 127.0448514
--------------------------------------------------

[지역: 뚝섬역] 12. 라쿵 베이커리 · 공방
리뷰 수: 71
주소: 서울 성동구 아차산로 54 4층
위도: 37.54612729999999, 경도: 127.0500548
--------------------------------------------------

[지역: 뚝섬역] 13. 컬러온스튜디오 · 공방
리뷰 수: 736
주소: 서울 성동구 왕십리로10길 20 2층
위도: 37.54647380000001, 경도: 127.0466892
-------