In [14]:
import time
import warnings
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup

warnings.filterwarnings('ignore')

# 브라우저 꺼짐 방지 옵션 설정
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])  # 불필요한 에러 메시지 없애기

# 서비스 및 드라이버 객체 생성
service = Service(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)


In [16]:
# 카카오 맵으로 이동
url = "https://map.kakao.com/"
driver.get(url)

# 검색어 입력 및 검색 실행
searchloc = '청주 카페'
search_area = driver.find_element(By.XPATH, r'//*[@id="search.keyword.query"]')
search_area.send_keys(searchloc)
driver.find_element(By.XPATH, r'//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER)
time.sleep(5)
driver.find_element(By.XPATH, r'//*[@id="info.main.options"]/li[2]/a').send_keys(Keys.ENTER)


In [18]:
# 숙소 정보 리스트 초기화
room_list = []

def roomNamePrint():
    time.sleep(0.2)
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    room_lists = soup.select('.placelist > .PlaceItem')
    
    for i, room in enumerate(room_lists):
        temp = []
        name = room.select('.head_item > .tit_name > .link_name')[0].text
        score = room.select('.rating > .score > em')[0].text
        addr = room.select('.addr > p')[0].text
        
        # 상세정보 탭으로 이동
        driver.find_element(By.XPATH, r'//*[@id="info.search.place.list"]/li['+str(i+1)+']/div[5]/div[4]/a[1]').send_keys(Keys.ENTER)
        driver.switch_to.window(driver.window_handles[-1])
        time.sleep(2)
        rev = extract_review()  # 리뷰 추출
        
        # 하나의 리스트로 만들어 room_list에 추가
        temp.append(name)
        temp.append(score)
        temp.append(addr[3:])
        temp.append(rev)
        
        room_list.append(temp)

def extract_review():
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    review_lists = soup.select('.list_evaluation > li')
    
    rev = []
    if len(review_lists) != 0:
        for review in review_lists:
            comment = review.select('.txt_comment > span')[0].text  # 리뷰
            if len(comment) != 0:
                rev.append(comment)
    else:
        rev.append(' ')
    
    # 다시 검색 탭으로 전환
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
    time.sleep(2)
    
    return rev

In [19]:
# 페이지별 크롤링
page = 1
page2 = 1
for i in range(1, 9):
    try:
        page2 += 1
        print(page, 'page')
        if i > 5:
            xpath = '/html/body/div[5]/div[2]/div[1]/div[7]/div[6]/div/a['+str(i-5)+']'
        else:
            xpath = '/html/body/div[5]/div[2]/div[1]/div[7]/div[6]/div/a['+str(i)+']'
        driver.find_element(By.XPATH, xpath).send_keys(Keys.ENTER)
        roomNamePrint()
        if page2 > 5:
            page2 = 1
            driver.find_element(By.XPATH, r'//*[@id="info.search.page.next"]').send_keys(Keys.ENTER)
        page += 1
    except:
        break
        
print('크롤링 완료')
driver.quit()


1 page
2 page
크롤링 완료


In [31]:
import csv

# room_list 데이터를 CSV 파일로 저장
csv_filename = 'hanok_stay_data.csv'

# CSV 파일을 쓰기 모드로 열고 데이터 저장
with open(csv_filename, mode='w', newline='', encoding='utf-8-sig') as file:
    writer = csv.writer(file)
    
    # 헤더 작성
    writer.writerow(["Name", "Score", "Location", "Reviews"])
    
    # 데이터 작성
    for room in room_list:
        # Reviews 리스트를 문자열로 변환하여 저장
        writer.writerow([room[0], room[1], room[2], "; ".join(room[3])])

print(f'데이터가 CSV 파일로 저장되었습니다: {csv_filename}')

데이터가 CSV 파일로 저장되었습니다: hanok_stay_data.csv
