In [3]:
import time
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

# YouTube NLP

This class allows you to do any of the following:

- Get a list of the top 100 comments of a video and their respective sentiments
- Get the overall sentiment of a video based on the top 100 comments
- Get a list of the top n videos of a channel sorted by popularity, newest releases, or oldest releases



In [76]:
class youtube():
    def __init__(self):
        pass

    def get_top_videos(self, url, sorting_method, num_videos):
        ''' 
        Summary: Returns list of the titles of top num_videos videos
        Parameters:
            sorting_method(int): Determines how the videos will be sorted
                sorting_method = 1 for sorting by most popular videos
                sorting_method = 2 for sorting by new videos
                sorting_method = 3 for sorting by old videos
            url(string): Channel URL - ensure there is the default url with no query string
            num_videos(int): The number of videos to be returned
        Returns: list of the titles of top num_videos videos
        '''
        url = __create_link(url, sorting_method)
        videos=[]
        driver = webdriver.Chrome()
        driver.get(url)
        content = driver.page_source.encode('utf-8').strip()
        soup = BeautifulSoup(content,'lxml')
        titles = soup.findAll('a',id='video-title')
        titles = titles[0: (num_videos)]
        for title in titles:
            videos.append(title.text)
        return videos

    def sentiment_df(self, link):
        ''' 
        Summary: Returns full dataframe of top 100 comments and sentiments
        Parameters:
            link(string): link of youtube video
        Returns: Full dataframe of top 100 comments and sentiments
        '''
        comments = __get_comments(link)
        df = pd.DataFrame(data = comments,columns = ['comments'])
        df['sentiment'] = df.apply(lambda  row : __vader_sentiment(row['comments']), axis=1)
        return(df)

    def get_sentiment(self, link):
        ''' 
        Summary: Returns overall video sentiment
        Parameters:
            link(string): link of youtube video
        Summary: Returns overall video sentiment
        '''
        df = sentiment_df(link)
        return df['sentiment'].mean()

    def print_pretty(self, list):
        for l in list:
            print (l)

    #Sub methods
    def __create_link(self, url, sorting_method):
        ''' 
        This is a sub method and should not be called by the user
        Summary: Returns appropriate channel link based on sorting parameters
        Parameters:
            sorting_method(int): Determines how the videos will be sorted
                sorting_method = 1 for sorting by most popular videos
                sorting_method = 2 for sorting by new videos
                sorting_method = 3 for sorting by old videos
            url(string): Channel URL - ensure there is the default url with no query string
            num_videos(int): The number of videos to be returned
        Returns: Channel string with appropriate sort query string
        '''
        if sorting_method == 1:
            return f'{url}/videos?view=0&sort=p&flow=grid'
        elif sorting_method == 2:
            return f'{url}/videos?view=0&sort=dd&flow=grid' 
        elif sorting_method == 3:
            return f'{url}/videos?view=0&sort=da&flow=grid' 

    def __get_comments(self, link):
        ''' 
        This is a sub method and should not be called by the user
        Summary: Returns list of top 100 comments
        Parameters:
            link(string): link of youtube video
        Summary: Returns list of top 100 comments
        '''
        comments=[]
        with Chrome() as driver:
            wait = WebDriverWait(driver,12)       
            driver.get(link)
            time.sleep(5)
            driver.execute_script("window.scrollTo(0, 500)") 
            time.sleep(5)

            for item in range(6): #by increasing the highest range you can get more content
                wait.until(EC.visibility_of_element_located((By.TAG_NAME, "body"))).send_keys(Keys.END)
                time.sleep(3)

            for comment in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#comment #content-text"))):
                comments.append(comment.text)

        comments = comments[0:100]
        return comments

    def __vader_sentiment(self, comment):
        ''' 
        This is a sub method and should not be called by the user
        Summary: Predicts sentiment with VADER
        Parameters:
            comment(string): a string/ youtube comment
        Summary: Returns a double from [-1,1] of comment sentiment
        '''
        obj = SentimentIntensityAnalyzer()
        sentiment_dict = obj.polarity_scores(comment)
        sentiment = sentiment_dict.get('compound')
        return sentiment

    


In [63]:
yt = youtube()

In [72]:
yt.print_pretty(get_top_videos('https://www.youtube.com/channel/UCktiIeImOu4QzS6fM8gQKPA',1,5))

Basic beatbox patterns for beginners | Tutorial
Basic beatbox patterns for beginners 2 | Tutorial
Basic beatbox patterns for beginners 3 | Tutorial
Basic beatbox patterns for beginners 6 | Tutorial
Basic beatbox patterns for beginners 4 | Tutorial


In [75]:

df = yt.sentiment_df('https://www.youtube.com/watch?v=YbJOTdZBX1g&ab_channel=YouTube')
df

Unnamed: 0,comments,sentiment
0,Let’s all take a moment of silence for the 19M...,-0.4019
1,"What a great video, I am sure it doesn't have ...",0.6784
2,"Rest in Peace My dear dislike Buttom, you Will...",0.3182
3,Rewind: *gets millions of dislikes and hate co...,-0.5994
4,To everyone else who participate in those 19MI...,0.2654
...,...,...
95,This is the best like to dislike ratio i've ev...,0.6801
96,Wow! Three million likes! This must be of the ...,0.8164
97,"""i like this society""",0.3612
98,I wouldn't be surprised if this surpassed 20M ...,-0.7923


In [48]:
print(yt.get_sentiment('https://www.youtube.com/watch?v=YbJOTdZBX1g&ab_channel=YouTube'))

-0.048117
