In [6]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax
import googleapiclient.discovery
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyCZ9Q_lGbaO9IG2mXoMEVQkHu4zwoP_KEI"

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY)

MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

### Sentiment analyzer engine

In [9]:
# Function to fetch comments and perform sentiment analysis
def sentiment_engine(video_id):
    comments = []
    
    # Fetch comments
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100
    )
    response = request.execute()

    while response:
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comments.append([
                comment['authorDisplayName'],
                comment['publishedAt'],
                comment.get('updatedAt', comment['publishedAt']),  # Use publishedAt if updatedAt not present
                comment['likeCount'],
                comment['textDisplay'],
                item['id']  # Add unique comment ID from the API response
            ])

        # Check for next page of comments
        if 'nextPageToken' in response:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=100,
                pageToken=response['nextPageToken']
            )
            response = request.execute()
        else:
            break
    
    # Create DataFrame
    df = pd.DataFrame(comments, columns=['author', 'published_at', 'updated_at', 'like_count', 'text', 'comment_id'])

    def polarity_scores_roberta(text):
        # Encode text and get model output
        encoded_text = tokenizer(text, return_tensors='pt')
        output = model(**encoded_text)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        scores_dict = {
            'roberta_neg': scores[0],
            'roberta_neu': scores[1],
            'roberta_pos': scores[2]
        }
        return scores_dict
    
    res = {}
    
    # Loop through comments and perform sentiment analysis
    for i, row in tqdm(df.iterrows(), total=len(df)):
        try:
            text = row['text']
            comment_id = row['comment_id']  # Use comment ID as a unique key
            
            # Use VADER for sentiment analysis
            vader_result = sia.polarity_scores(text)
            vader_result_rename = {f"vader_{key}": value for key, value in vader_result.items()}
            
            # Use RoBERTa for sentiment analysis
            roberta_result = polarity_scores_roberta(text)
            
            # Combine both results
            both = {**vader_result_rename, **roberta_result}
            res[comment_id] = both
            
        except Exception as e:
            print(f"Error for comment ID {comment_id}: {e}")
    
    # Convert results to DataFrame and merge with original DataFrame
    results_df = pd.DataFrame(res).T.reset_index().rename(columns={'index': 'comment_id'})
    results_df = results_df.merge(df, how='left', on='comment_id')
    
    return results_df

# Fetch comments for a video and perform sentiment analysis
df = sentiment_engine("OzV0tnMahGI")
df.head()

100%|███████████████████████████████████████████| 18/18 [00:00<00:00, 18.56it/s]


Unnamed: 0,comment_id,vader_neg,vader_neu,vader_pos,vader_compound,roberta_neg,roberta_neu,roberta_pos,author,published_at,updated_at,like_count,text
0,Ugzbv4pLF2CIr3WSbjl4AaABAg,0.0,1.0,0.0,0.0,0.001087,0.032337,0.966576,@thelightknight4811,2024-10-19T21:11:01Z,2024-10-19T21:11:01Z,0,Golazooooo!! Siiuuuuu!!! 😎❤️⚽🔥👍👍
1,UgyMshI0QDOREfPAq2p4AaABAg,0.0,0.734,0.266,0.4404,0.004731,0.197537,0.797732,@ManuChourasia2029,2024-10-19T20:58:46Z,2024-10-19T20:58:46Z,0,Furts good goal fro real madrid by kylian mbape
2,Ugy3bjrruw6mXrKsYWp4AaABAg,0.474,0.526,0.0,-0.4019,0.444784,0.380854,0.174362,@Juiccy_777,2024-10-19T20:55:38Z,2024-10-19T20:55:38Z,2,The glazing is insane
3,UgwrZJhufkS2583kOyd4AaABAg,0.178,0.822,0.0,-0.3818,0.463953,0.500175,0.035872,@LeastSlutty,2024-10-19T20:51:34Z,2024-10-19T20:51:34Z,0,Don&#39;t sleep on Milan or Liverpool to upset...
4,UgwmXaS9nuDmLm_47Kl4AaABAg,0.0,0.891,0.109,0.2382,0.281003,0.501992,0.217005,@LeastSlutty,2024-10-19T20:50:57Z,2024-10-19T20:50:57Z,1,This is a good team but a CL is NOT GURANTEED....


In [3]:
comments

[['@MrBeast',
  '2024-10-12T15:59:31Z',
  '2024-10-12T15:59:31Z',
  89298,
  'The chaos in this video is unreal lol'],
 ['@Daisy_Bailey',
  '2024-10-19T20:23:01Z',
  '2024-10-19T20:23:01Z',
  0,
  'STACKED GROUP'],
 ['@Sandeep_7999',
  '2024-10-19T20:22:56Z',
  '2024-10-19T20:22:56Z',
  0,
  'Welcome indiA🎉'],
 ['@YahirRegalado-d6m',
  '2024-10-19T20:22:23Z',
  '2024-10-19T20:22:23Z',
  0,
  'MR BEAST I FIXED WHEN I MEAN FIXED I MEAN MAKE A NEW ACCOUNT TO WACH YOUR VIDS AND I SUBBED'],
 ['@SVT_Sonic',
  '2024-10-19T20:17:23Z',
  '2024-10-19T20:17:23Z',
  1,
  'roblox doors floor 2 vibe'],
 ['@KaelaBlackfox-t9c',
  '2024-10-19T20:15:56Z',
  '2024-10-19T20:15:56Z',
  0,
  '10k likes he buys me a now laptop'],
 ['@SimpleCannon',
  '2024-10-19T20:14:25Z',
  '2024-10-19T20:14:25Z',
  0,
  'Love the video!'],
 ['@Mr.Butterscotch-i1k',
  '2024-10-19T20:05:57Z',
  '2024-10-19T20:05:57Z',
  0,
  'Why does everything look new'],
 ['@funrider28',
  '2024-10-19T20:05:32Z',
  '2024-10-19T20:05:32Z'