In [37]:
# 필요한 패키지 설치
!pip install selenium webdriver-manager pandas
!pip install selenium pandas webdriver-manager



In [38]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

In [None]:
# 크롬 옵션 설정
chrome_options = webdriver.ChromeOptions()
# headless 잠시 제거 → 안정적 크롤링
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")

In [39]:
# 드라이버 생성
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
wait = WebDriverWait(driver, 15)

In [43]:
# 크롤링 함수 (업소명, 카테고리, 주소, 평점, 연락처, 블로그 리뷰수, 방문자 리뷰 수, 메뉴, 영업시간, 편의시설)
def crawl_basic_info(place_name):
    data = {
        "name": None,
        "category": None,
        "address": None,
        "rating":None,
        "phone": None,
        "blog_reviews": None,
        "visitor_reviews": None,
        "menus":None,
        "hours":None,
        "facilites":None,
        "error": None
    }
    
    try:
        query = place_name
        url = f"https://map.naver.com/v5/search/{query}"
        driver.get(url)
        time.sleep(3)  # 페이지 렌더링 대기

        
        # iframe 접근
        try:
            iframe = wait.until(EC.presence_of_element_located((By.ID, "entryIframe")))
            driver.switch_to.frame(iframe)
            
            # 전역 스크롤(초기 로딩 촉진)
            try:
                for _ in range(4):        # 횟수
                    driver.execute_script("window.scrollBy(0, 800);")
                    time.sleep(0.3)       # 아주 짧은 대기
                # 최상단 복귀
                driver.execute_script("window.scrollTo(0, 0);")
                time.sleep(0.2)
            except:
                pass
            
        except:
            search_iframe = wait.until(EC.presence_of_element_located((By.ID, "searchIframe")))
            driver.switch_to.frame(search_iframe)
            
            try:
                first_result = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.place_bluelink")))
                driver.execute_script("arguments[0].click();", first_result)
                driver.switch_to.default_content()
                time.sleep(2)
                iframe = wait.until(EC.presence_of_element_located((By.ID, "entryIframe")))
                driver.switch_to.frame(iframe)

                
                # 전역 스크롤(초기 로딩 촉진)
                try:
                    for _ in range(4):
                        driver.execute_script("window.scrollBy(0, 800);")
                        time.sleep(0.3)
                    driver.execute_script("window.scrollTo(0, 0);")
                    time.sleep(0.2)
                except:
                    pass

            except:
                data["error"] = "검색 결과 없음"
                return data


        # 업소명
        try:
            data["name"] = driver.find_element(By.CSS_SELECTOR, "span.GHAhO").text.strip()
        except:
            data["name"] = place_name
            

        # 카테고리
        try:
            data["category"] = driver.find_element(By.CSS_SELECTOR, "span.lnJFt").text.strip()
        except:
            data["category"] = None

        
        # 주소
        try:
            data["address"] = driver.find_element(By.CSS_SELECTOR, "span.LDgIH").text.strip()
        except:
            data["address"] = None

        
        # 별점
        try:
            data["rating"] = driver.find_element(By.CSS_SELECTOR, "span.PXMot.LXIwF").text.strip()
        except:
            data["rating"] = None


        # 영업시간
        try:# 1) 펼치기 버튼 클릭 
            try:
                expand_btn = driver.find_element(By.CSS_SELECTOR, "div.vV_z_ a.gKP9i.RMgN0")
                driver.execute_script("arguments[0].click();", expand_btn)
                time.sleep(1.5)  # 버튼 클릭 후 대기
            except:
                pass 
        
            hours_elements = driver.find_elements(By.CSS_SELECTOR, "div.vV_z_ div.w9QyJ")
            biz_hours = []
            
            for el in hours_elements:
                try:
                    day = el.find_element(By.CSS_SELECTOR, "span.i8cJw").text.strip()     # 요일
                    time_info = el.find_element(By.CSS_SELECTOR, "div.H3ua4").text.strip() # 영업 시간
                    if day and time_info:
                        biz_hours.append(f"{day}: {time_info}")
                except:
                    continue
            data["hours"] = ", ".join(biz_hours) if biz_hours else None
        
        except Exception as e:
            data["hours"] = None

       
        # 메뉴 
        try:
            menu_list = []
            # 1) a.ihmWt 리스트형
            try:
                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.ihmWt")))
                anchors = driver.find_elements(By.CSS_SELECTOR, "a.ihmWt")  
                menu_list.extend([a.text.strip() for a in anchors if a.text.strip()])
            except:
                pass
        
            # 2) span.JfbYN 카드형
            try:
                li_cards = driver.find_elements(By.CSS_SELECTOR, "ul.YYh8o li.BLsus")
                for li in li_cards:
                    try:
                        name_el = li.find_element(By.CSS_SELECTOR, "span.JfbYN")
                        t = name_el.text.strip()
                        if t:
                            menu_list.append(t)
                    except:
                        continue
            except:
                pass
        
            # 메뉴에서 불필요한 키워드 제거
            cleaned = []
            seen = set()
            for t in menu_list:
                if t in seen:
                    continue
                if t in ("더보기", "메뉴판 이미지로 보기"):
                    continue
                seen.add(t)
                cleaned.append(t)
            data["menus"] = ", ".join(cleaned) if cleaned else None
        
        except Exception:
            data["menus"] = None

        
        # 전화번호
        try:
            data["phone"] = driver.find_element(By.CSS_SELECTOR, "span.xlx7Q").text.strip()
        except:
            data["phone"] = None

        
         # 방문자 리뷰
        try:
            visitor_text = driver.find_element(By.CSS_SELECTOR,'span.PXMot a[href*="visitor"]').text.strip()
            data["visitor_reviews"] = int(''.join(filter(str.isdigit, visitor_text)))
        except NoSuchElementException:
            data["visitor_reviews"] = None

        
        # 블로그 리뷰
        try:
            blog_text = driver.find_element(By.CSS_SELECTOR,'span.PXMot a[href*="ugc"]').text.strip()
            data["blog_reviews"] = int(''.join(filter(str.isdigit, blog_text)))
        except NoSuchElementException:
            data["blog_reviews"] = None 

        
        #편의시설 
        try:
            data["facilites"] = driver.find_element(By.CSS_SELECTOR, "div.xPvPE").text.strip()
        except:
            data["facilites"] = None

    
    except Exception as e:
        data["error"] = str(e)

    return data


# CSV 불러오기
df = pd.read_csv("창동식당.csv", encoding="cp949")
results = []

# 상위 n개 크롤링 예시
for place in df["사업장명"][:35]:  #534:650
    print("검색중:", place)
    info = crawl_basic_info(place)
    print(info)
    results.append(info)
    time.sleep(1.5)  # 요청 간격

# 결과
#df_result = pd.DataFrame(results)
#df_result.to_csv("창동음식점_결과1.csv", index=False, encoding="utf-8-sig")
print("크롤링 완료! ^^")


검색중: 훼미리
{'name': '훼미리', 'category': '치킨,닭강정', 'address': '경기 성남시 분당구 판교로 519 경남아너스빌 정문 상가 108호', 'rating': None, 'phone': '031-706-6784', 'blog_reviews': 2, 'visitor_reviews': 121, 'menus': '후라이드치킨, 골뱅이, 치킨양념, 치킨반반', 'hours': '월: 16:00 - 24:00\n23:00 라스트오더, 화: 16:00 - 24:00\n23:00 라스트오더, 수: 16:00 - 24:00\n23:00 라스트오더, 목: 16:00 - 24:00\n23:00 라스트오더, 금: 16:00 - 24:00\n23:00 라스트오더, 토: 16:00 - 24:00\n23:00 라스트오더, 일(10/5): 추석 연휴 휴무', 'facilites': '예약, 단체 이용 가능, 주차, 포장, 배달, 무선 인터넷, 남/녀 화장실 구분', 'error': None}
검색중: 돼지제국
{'name': '돼지제국', 'category': '한식', 'address': '서울 도봉구 해등로16길 7 1층 돼지제국', 'rating': None, 'phone': '02-900-1392', 'blog_reviews': 70, 'visitor_reviews': 432, 'menus': '생삼겹살, 꽃목살, 항정살, 김치전골', 'hours': '월: 정기휴무 (매주 월요일), 화: 11:00 - 23:00\n22:00 라스트오더, 수: 11:00 - 23:00\n22:00 라스트오더, 목: 11:00 - 23:00\n22:00 라스트오더, 금: 11:00 - 23:00\n22:00 라스트오더, 토: 11:00 - 23:00\n22:00 라스트오더, 일: 11:00 - 23:00\n22:00 라스트오더', 'facilites': '단체 이용 가능, 포장, 유아의자, 남/녀 화장실 구분, 주차, 발렛파킹', 'error': None}
검색중