# YouTube Ranking

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
url = 'https://youtube-rank.com/board/bbs/board.php?bo_table=youtube'
driver = webdriver.Chrome('chromedriver')
driver.get(url)
time.sleep(2)

- Step 1, 2

In [3]:
soup = BeautifulSoup(driver.page_source, 'html.parser')
trs = soup.select('tr.aos-init')
len(trs)

100

- Step 3

In [4]:
tr = trs[0]

# 순위
tr.select_one('.rank').get_text().strip()

'1'

In [5]:
# 카테고리
tr.select_one('.category').get_text().strip()[1:-1]

'음악/댄스/가수'

In [6]:
# 채널명
tr.select_one('.subject a').get_text().strip()

'BLACKPINK'

In [7]:
# 구독자, 조회, 비디오 갯수
subscriber = tr.select_one('.subscriber_cnt').get_text().strip()
view = tr.select_one('.view_cnt').get_text().strip()
video = tr.select_one('.video_cnt').get_text().strip()
subscriber, view, video

('7350만', '233억1663만', '394개')

- Step 4

In [8]:
lines = []
for tr in trs:
    rank = int(tr.select_one('.rank').get_text().strip())
    category = tr.select_one('.category').get_text().strip()[1:-1]
    channel = tr.select_one('.subject a').get_text().strip()
    subscriber = tr.select_one('.subscriber_cnt').get_text().strip()
    view = tr.select_one('.view_cnt').get_text().strip()
    video = tr.select_one('.video_cnt').get_text().strip()
    lines.append([rank, category, channel, subscriber, view, video])

- Step 5

In [9]:
df = pd.DataFrame(lines, columns=['순위','카테고리','채널','구독자수','조회수','비디오수'])
df.head()

Unnamed: 0,순위,카테고리,채널,구독자수,조회수,비디오수
0,1,음악/댄스/가수,BLACKPINK,7350만,233억1663만,394개
1,2,음악/댄스/가수,BANGTANTV,6550만,157억9463만,"1,686개"
2,3,음악/댄스/가수,HYBE LABELS,6500만,221억9069만,818개
3,4,음악/댄스/가수,SMTOWN,3030만,243억1077만,"3,895개"
4,5,키즈/어린이,Boram Tube Vlog [보람튜브 브이로그],2650만,110억5288만,223개


In [10]:
df.tail()

Unnamed: 0,순위,카테고리,채널,구독자수,조회수,비디오수
95,96,키즈/어린이,미니팡TV,419만,28억3759만,734개
96,97,음식/요리/레시피,[Dorothy]도로시,417만,10억1456만,916개
97,98,미분류,MayTree,410만,7억5549만,173개
98,99,취미/라이프,JaeYeol ASMR 재열,409만,11억6453만,"1,085개"
99,100,TV/방송,채널 십오야,404만,8억8278만,547개


In [11]:
# 숫자로 바꿔주는 함수
def convert_unit(s):
    s = s.replace('억','').replace('개','').replace(',','').replace('만','0000')
    return f'{int(s):,d}'

In [12]:
convert_unit('233억1663만')

'23,316,630,000'

In [13]:
for column in df.columns[3:]:
    df[column] = df[column].apply(convert_unit)
df.head()

Unnamed: 0,순위,카테고리,채널,구독자수,조회수,비디오수
0,1,음악/댄스/가수,BLACKPINK,73500000,23316630000,394
1,2,음악/댄스/가수,BANGTANTV,65500000,15794630000,1686
2,3,음악/댄스/가수,HYBE LABELS,65000000,22190690000,818
3,4,음악/댄스/가수,SMTOWN,30300000,24310770000,3895
4,5,키즈/어린이,Boram Tube Vlog [보람튜브 브이로그],26500000,11052880000,223


- Step 6: 모든 페이지

In [19]:
from tqdm.notebook import tqdm

In [22]:
lines = []
for page in tqdm(range(1,11)):
    url = 'https://youtube-rank.com/board/bbs/board.php?bo_table=youtube&page='+str(page)
    driver.get(url)
    time.sleep(2)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    trs = soup.select('tr.aos-init')
    
    for tr in trs:
        rank = int(tr.select_one('.rank').get_text().strip())
        category = tr.select_one('.category').get_text().strip()[1:-1]
        channel = tr.select_one('.subject a').get_text().strip()
        subscriber = tr.select_one('.subscriber_cnt').get_text().strip()
        view = tr.select_one('.view_cnt').get_text().strip()
        video = tr.select_one('.video_cnt').get_text().strip()
        lines.append([rank, category, channel, subscriber, view, video])

  0%|          | 0/10 [00:00<?, ?it/s]

In [23]:
df = pd.DataFrame(lines, columns=['순위','카테고리','채널','구독자수','조회수','비디오수'])
for column in df.columns[3:]:
    df[column] = df[column].apply(convert_unit)
df.head()

Unnamed: 0,순위,카테고리,채널,구독자수,조회수,비디오수
0,1,음악/댄스/가수,BLACKPINK,73500000,23316630000,394
1,2,음악/댄스/가수,BANGTANTV,65500000,15794630000,1686
2,3,음악/댄스/가수,HYBE LABELS,65000000,22190690000,818
3,4,음악/댄스/가수,SMTOWN,30300000,24310770000,3895
4,5,키즈/어린이,Boram Tube Vlog [보람튜브 브이로그],26500000,11052880000,223


In [24]:
df.tail()

Unnamed: 0,순위,카테고리,채널,구독자수,조회수,비디오수
995,994,애완/반려동물,김메주와 고양이들,580000,209780000,1060
996,997,미분류,일주어터,580000,118380000,194
997,1001,미분류,쏘대장,580000,131650000,352
998,1000,영화/만화/애니,네이버 웹툰,580000,432110000,833
999,999,음악/댄스/가수,DanalEntertainment,580000,316800000,10089


- Step 7

In [25]:
driver.close()

In [26]:
df.to_csv('YouTube랭킹.csv', index=False)