In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup as bs
import pandas as pd
import os
import time

pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 1000)
pd.options.display.float_format = '{:.2f}'.format

In [7]:
def get_comments(service, df, lang = 'kr', titleId = 747269, episode_no = 1):
    TIMEOUT = 1   
    driver = webdriver.Chrome(service=service)
    
    if lang == 'kr':
        base_url = 'https://comic.naver.com/webtoon/detail?'
        episode_url = base_url + 'titleId=' + str(titleId) + '&no='+str(episode_no)
    elif lang == 'en':
        base_url = 'https://www.webtoons.com/en/action/omniscient-reader/episode-109/viewer?'
        episode_url = base_url + 'title_no=' + str(titleId) + '&episode_no='+str(episode_no)        
        
    print(episode_url)
    driver.get(episode_url)
    driver.implicitly_wait(TIMEOUT)
    
    if lang == 'kr':
        # 댓글창에 해당하는 프레임 선택
        driver.switch_to.frame('commentIframe')

        # 수집 편의를 위한 클린봇 댓글 필터링 기능 해제
        # (클린봇이 부적절한 표현을 감지한 댓글입니다.) 제거
        driver.find_element(By.CLASS_NAME,'u_cbox_cleanbot_setbutton').click()
        time.sleep(0.2)
        driver.find_element(By.CLASS_NAME,'u_cbox_layer_cleanbot2_checkbox').click()
        time.sleep(0.2)
        driver.find_element(By.CLASS_NAME,'u_cbox_layer_cleanbot2_extrabtn').click()
        time.sleep(0.2)

        # 전체 댓글 클릭
        driver.find_element(By.CSS_SELECTOR,'#cbox_module > div > div.u_cbox_view_comment > a').click()
        time.sleep(0.5)        
    elif lang == 'en':
        # No iframe
        # No Cleanbot filtering setting
        # No need to click "전체 댓글"        
        pass         
    
    # Move to the last page and Get the total number of pages
    driver.find_element(By.CSS_SELECTOR,'#cbox_module > div > div.u_cbox_paginate > div > a.u_cbox_next.u_cbox_next_end').click()
    time.sleep(0.5)
    strong = driver.find_element(By.CSS_SELECTOR,'#cbox_module > div > div.u_cbox_paginate > div > strong')
    num_pages = strong.get_attribute('data-param')
    print("num_pages:", num_pages)

    # Move to the first page
    driver.find_element(By.CSS_SELECTOR,'#cbox_module > div > div.u_cbox_paginate > div > a.u_cbox_pre.u_cbox_pre_end').click()
    time.sleep(0.5)
        
    for page in range(int(num_pages)):
        if lang == 'kr':        
            # switch to the recent tab
            driver.switch_to.window(driver.window_handles[-1]) 
            driver.switch_to.frame("commentIframe")        
        
        soup = bs(driver.page_source,"html.parser")

        # Extract dates
        dates = soup.findAll("span", {"class":["u_cbox_date"]})
        dates = [date.text for date in dates]        

        # Extract reveiws
        reviews = soup.findAll("span", {"class":["u_cbox_contents"]})
        reviews = [review.get_text() for review in reviews]

        df_page = pd.DataFrame(data=[], columns=['Episode','Date','Review'])       
        df_page['Date'] = dates
        df_page['Review'] = reviews
        df_page['Episode'] = df_page['Episode'].fillna(int(episode_no))
        
        df = pd.concat([df,df_page])      
        #print(page+1, " page Done.")  
        print('.', end='')

        if (page+1) == int(num_pages):
            return df
        
        # Move to the next page
        driver.find_element(By.CSS_SELECTOR, "#cbox_module > div > div.u_cbox_paginate > div > strong + a").click()        
        time.sleep(0.2)
    driver.close()
    return df

In [8]:
# create dataframe
df = pd.DataFrame(data=[], columns=['Episode','Date','Review'])

# 전지적 독자 시점 - 에피소드 1 , omniscient-reader - Episode 1
# language setting
lang = 'kr'

# titleId, episode_no setting
if lang == 'kr': 
    titleId = 747269
    episode_no = 24
elif lang == 'en': 
    titleId = 2154
    episode_no = 1

# get all comments
# https://comic.naver.com/webtoon/detail?titleId=747269&no=24

service = Service(executable_path='c:\Temp\chromedriver.exe')
df = get_comments(service, df, lang, titleId, episode_no)

# Save dataframe
DATA_PATH = './data/'
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)
file_path = './data/omniscient-reader_Episode_{lang}_{episode_no}.csv'.format(lang=lang, episode_no=episode_no)
df.to_csv(file_path,index = False)
print("Save Done.")
df

https://comic.naver.com/webtoon/detail?titleId=747269&no=24
num_pages: 294
......................................................................................................................................................................................................................................................................................................Save Done.


Unnamed: 0,Episode,Date,Review
0,24,19시간 전,저 파브르는 훗날 강형욱을 만나게 된다.이 정도는 스포 아니지?
1,24,2022-10-03 12:53,그러네 미궁 실로 탈출하기 그리스로마신화네
2,24,2022-09-28 03:31,뭐야 왜저래
3,24,2022-09-27 03:19,아리아드네나 테세우스가 후원자인가보군
4,24,2022-09-18 15:59,미로를 틸출하는 실? 이거 그리스 로마 신화에 나오는 내용이잖아 ㅋㅋㅋㅋㅋㅋㅋㅋ
...,...,...,...
11,24,2020-09-22 23:05,나왔다
12,24,2020-09-22 23:05,빨랐다
13,24,2020-09-22 23:05,ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ
14,24,2020-09-22 23:05,1
