## Import and Initialize the model

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import torch
import pandas as pd



roberta = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(roberta)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(roberta).to(device)



Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Analyze Text and output results as a Dataframe

In [None]:

# Initialize a dictionary to store overall sentiment counts
overall_sentiments = {'negative': 0, 'neutral': 0, 'positive': 0}

def AnalyzeSentiment(sentences):
    results = []
    global overall_sentiments

    for sentence in sentences:
        if len(sentence) >514:
          continue
        encoded_text = tokenizer(sentence, return_tensors='pt').to(device)
        output = model(**encoded_text)
        scores = output.logits[0].cpu().detach().numpy()
        scores = softmax(scores)
        max_index = scores.argmax()
        label = ['negative', 'neutral', 'positive'][max_index]

        # Update overall sentiment counts
        overall_sentiments[label] += 1

        result = {
            'text': sentence,
            'negative': scores[0],
            'neutral': scores[1],
            'positive': scores[2],
            'overall_sentiment': label
        }
        results.append(result)

    return pd.DataFrame(results)



## Using the Youtube Data API to fetch comments from a Video

In [2]:
import os
import googleapiclient.discovery

import googleapiclient.errors

from dotenv import load_dotenv

load_dotenv()

api_service_name = "youtube"
api_version = "v3"
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")


youtube = googleapiclient.discovery.build(

    api_service_name, api_version, developerKey=YOUTUBE_API_KEY

)


def fetchAllComments(video_id, pageToken=None):

    items = []

    maxCount = 100


    while True:

        request = youtube.commentThreads().list(

            part="snippet",

            videoId=video_id,

            maxResults=100,

            pageToken=pageToken

        )


        response = request.execute()

        items.extend(response['items'])

        # if len(items) >= maxCount:

        #     break

        if 'nextPageToken' in response:

            pageToken = response['nextPageToken']

        else:

            break

    return items


#full link https://www.youtube.com/watch?v=4_UDm-nCjeA

video_id = "ddTV12hErTc"

items = fetchAllComments(video_id)

KeyboardInterrupt: 

## Output as DataFrame

In [None]:
comments = []

for item in items:
    comment = item['snippet']['topLevelComment']['snippet']
    comments.append([
        comment['authorDisplayName'],
        comment['publishedAt'],
        comment['updatedAt'],
        comment['likeCount'],
        comment['textDisplay']
    ])

df = pd.DataFrame(comments, columns=['author', 'published_at', 'updated_at', 'like_count', 'text'])

display(df)


Unnamed: 0,author,published_at,updated_at,like_count,text
0,@simoninkin9090,2024-05-29T17:12:30Z,2024-05-29T17:12:30Z,0,I had a better view on teenage engineering bef...
1,@nichtnennenswert2894,2024-05-29T15:47:35Z,2024-05-29T15:47:35Z,0,"To this Point, i only knew the Pocket synthesi..."
2,@This_Is_Skooba,2024-05-29T14:34:22Z,2024-05-29T14:34:22Z,0,I&#39;ll never understand why this device exists
3,@ALT-vz3jn,2024-05-29T14:28:20Z,2024-05-29T14:28:20Z,0,I don’t want another device to be chained to.....
4,@nuttynancy96,2024-05-29T09:01:22Z,2024-05-29T09:01:22Z,0,I feel like technology has slowed down in term...
...,...,...,...,...,...
18062,@stevennyang9304,2024-04-30T00:21:38Z,2024-04-30T00:21:38Z,0,!
18063,@creelfo,2024-04-30T00:21:38Z,2024-04-30T00:21:38Z,1,Oh no
18064,@chungyxmoo9963,2024-04-30T00:21:37Z,2024-04-30T00:21:37Z,2,First
18065,@mrk37k,2024-04-30T00:21:36Z,2024-04-30T00:21:36Z,3,First


## Analyze youtube comments

In [None]:
# print(df['text'])
youtubeAnalysisResults = AnalyzeSentiment(df['text'])


In [None]:
display(youtubeAnalysisResults)

Unnamed: 0,text,negative,neutral,positive,overall_sentiment
0,I had a better view on teenage engineering bef...,0.187516,0.658548,0.153936,neutral
1,"To this Point, i only knew the Pocket synthesi...",0.032795,0.153608,0.813597,positive
2,I&#39;ll never understand why this device exists,0.928738,0.063043,0.008219,negative
3,I don’t want another device to be chained to.....,0.875495,0.113198,0.011307,negative
4,I feel like technology has slowed down in term...,0.734678,0.243103,0.022219,negative
...,...,...,...,...,...
17424,!,0.038152,0.272419,0.689429,positive
17425,Oh no,0.359477,0.489131,0.151392,neutral
17426,First,0.089203,0.480305,0.430492,neutral
17427,First,0.089203,0.480305,0.430492,neutral


In [None]:
print(overall_sentiments)

{'negative': 9685, 'neutral': 6761, 'positive': 3396}
