In [1]:
# !pip install selenium

In [2]:
import time
import requests
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
import pandas as pd

## 페이지 가져오기 및 정보 저장하기

In [3]:
channel_link = 'https://www.youtube.com/@RawFishEater/videos'
driver = webdriver.Chrome()
driver.get(channel_link)

# 영상 링크 배열에 저장하기
video_links = set()
scroll_count = 0

for _ in range(19):  # 20으로 설정해야함
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.END)
    time.sleep(1)
    scroll_count += 1

    videos = driver.find_elements(By.CSS_SELECTOR, '#thumbnail')
    for video in videos:
        href = video.get_attribute('href')
        if href:
            video_links.add(href)

print(len(video_links))

for i, link in enumerate(video_links):
    if i < 10:
        print(link)
    else:
        break

443
https://www.youtube.com/watch?v=6NI1OZxlP-o
https://www.youtube.com/watch?v=_gQgOxvxW-Y
https://www.youtube.com/watch?v=2-cjK_sG_RA
https://www.youtube.com/watch?v=Y8h8FkZhlkA
https://www.youtube.com/watch?v=ctNMSm27B-g
https://www.youtube.com/watch?v=-GWZ8EC424w
https://www.youtube.com/watch?v=KQf3kXV5GI0
https://www.youtube.com/watch?v=CT1tnchz2hA
https://www.youtube.com/watch?v=Ra6K03SKwus
https://www.youtube.com/watch?v=D7-cAHgrApU


In [4]:
english_columns = {
    "식당명": "Restaurant Name",
    "식당위치": "Location",
    "전화번호": "Phone Number",
    "영업시간": "Business Hours",
    "리뷰 메뉴": "Review Menu"
}

df = pd.DataFrame(columns=english_columns.values())

In [5]:
def add_restaurant_info_to_df(youtube_link, df):
    tmp_driver = webdriver.Chrome()
    tmp_driver.get(youtube_link)
    tmp_driver.implicitly_wait(3)

    try:
        expand_button = tmp_driver.find_element(By.ID, "expand")
        expand_button.click()

        description = tmp_driver.find_element(By.CSS_SELECTOR, "#description-inline-expander")
        text = description.text

        pattern = re.compile(r"\*식당정보\n(?:- .*\n)+")
        match = pattern.search(text)

        if match:
            extracted_text = match.group()
            extracted_lines = extracted_text.split('\n')
            extracted_lines = [line for line in extracted_lines if line]

            parsed_data = {}
            for line in extracted_lines[1:]:
                if " : " in line:
                    key, value = line.split(" : ", maxsplit=1)
                    key = key.strip('- ').strip()
                    if key in english_columns:
                        parsed_data[english_columns[key]] = value.strip()

            new_df = pd.DataFrame([parsed_data])
            df = pd.concat([df, new_df], ignore_index=True)

        else:
            print(f"No restaurant info found in video: {youtube_link}")

    except Exception as e:
        print(f"Error processing video: {youtube_link}, Error: {e}")

    tmp_driver.quit()
    return df

In [6]:
for link in video_links:
    df = add_restaurant_info_to_df(link, df)

df.head()

No restaurant info found in video: https://www.youtube.com/watch?v=6NI1OZxlP-o
No restaurant info found in video: https://www.youtube.com/watch?v=ctNMSm27B-g
No restaurant info found in video: https://www.youtube.com/watch?v=KQf3kXV5GI0
No restaurant info found in video: https://www.youtube.com/watch?v=CT1tnchz2hA
No restaurant info found in video: https://www.youtube.com/watch?v=D7-cAHgrApU
No restaurant info found in video: https://www.youtube.com/watch?v=S4ixeFnc_DU
No restaurant info found in video: https://www.youtube.com/watch?v=73VM-JmXYfE
No restaurant info found in video: https://www.youtube.com/watch?v=9EFt6u5wHfw
No restaurant info found in video: https://www.youtube.com/watch?v=Ilbqfq6m8a0
No restaurant info found in video: https://www.youtube.com/watch?v=CtT3GjGbqSs
No restaurant info found in video: https://www.youtube.com/watch?v=rVqgrjq_mL0
No restaurant info found in video: https://www.youtube.com/watch?v=kakGzYALikA
No restaurant info found in video: https://www.youtu

Unnamed: 0,Restaurant Name,Location,Phone Number,Business Hours,Review Menu
0,신선물회,부산 영도구 절영로35번길 16,0507-1313-1479,11:30-21:00(월요일 휴무),"모둠회(2인/50,000원)"
1,삼미식당 롯데백화점본점,서울 중구 남대문로 81 지하 1층 푸드코트,없음,백화점 영업시간과 동일,"대왕연어초밥(3개/9900원), 갑오징어초밥(4개/9900원), 연어뱃살(3개/15..."
2,고쉐프의 신선한 초밥,인천 서구 청라에메랄드로41번길 20 101호,0507-1392-1848,11:00-22:00(14:30-16:30 브레이크 타임) 일요일 휴무,
3,진미식당,서울 마포구 마포대로 186-6,02-3211-4468,12:00-20:00 (15:30-17:00 브레이크타임/일요일 휴무),간장게장2인분 (1인분/41000원)
4,묘수,서울 영등포구 선유서로25길 9-2 1층,0507-1328-7639,매일 17:00-01:00,"레드아이(6,000원), 묘수스시(27,000원), 어묵우동(7,000원), 타마고..."


In [7]:
df.shape

(239, 5)

In [8]:
df_sorted_by_location = df.sort_values(by="Location")

In [9]:
df_sorted_by_location.head()

Unnamed: 0,Restaurant Name,Location,Phone Number,Business Hours,Review Menu
74,그루비그루비(Groobgroobb),"44 ถนน เจริญราษฎร์ Bang Khlo, Bang Kho Laem, B...",+66982414265,15:30-00:00 (월요일 휴무),"무늬바리회 (반마리/31000원), 타이거새우회(1마리/16500원),"
173,시드니피쉬마켓,"Corner Pyrmont Bridge Rd &, Bank St, Pyrmont N...",+61290041100,07:00-16:00,
37,주문진항샵20,강원 강릉시 하평길 68,033-651-1144,"17:00-22:00 (일, 월 휴무)","자연산 막회(소/45,000원), 매운탕(8,000원), 면사리(1,000원)"
150,부흥횟집,강원 동해시 일출로 93 부흥횟집,033-531-5209(바쁠 때 전화 잘 안 받는 것 같았음),"10:30-22:00(14:30-17:00 브레이크 타임), 첫 째주 일요일 휴무","회덮밥(15,000원), 물회(15,000원)"
22,봉포머구리집 본점,강원 속초시 영랑해안길 223 봉포머구리집,0507-1404-2026,매일 10:00-21:30,"전복물회(22,000원)"


In [10]:
df_sorted_by_location.to_csv('restaurants.csv')