In [None]:
#Problem Statement
"""
*Sentiment Analysis of Movie Trailer Comments using NLP

Conducted a comprehensive sentiment analysis of YouTube comments for a movie trailer as an AI Service Provider, quantifying positive and negative reactions to predict the trailer's potential box office performance. The analysis will:

- Classify comment sentiments(positive or negative)
- Generate an overall sentiment score
- Provide insights into audience reception
- Assess the trailer's market appeal

"""

In [None]:
#Tools used in this project:
"""
PyTorch - torch
HuggingFace - transformers
NLTK - nltk
VADER - sentiment.vader
"""

In [None]:
!pip install torch

In [None]:
!pip install transformers

In [None]:
!pip install nltk

In [None]:
!pip install vaderSentiment

In [None]:
import nltk

nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt_tab')

In [None]:
from transformers import pipeline

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

sia = SentimentIntensityAnalyzer()
stop_words = stop_words = set(stopwords.words('english'))
classifier = pipeline("sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
import pandas as pd
from google.colab import files

# Upload the Excel file if it's not already present
import os
file_path = '/content/SnowWhite Comments YT.xlsx'

if not os.path.exists(file_path):
  uploaded = files.upload()
  if 'SnowWhite Comments YT.xlsx' not in uploaded:
    print("Error: 'SnowWhite Comments YT.xlsx' was not uploaded. Please ensure you upload the correct file.")
  else:
    print("File 'SnowWhite Comments YT.xlsx' uploaded successfully.")

df = pd.read_excel(file_path)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
comments = []
comments = df['Comments'].tolist()

In [None]:
def remove_stopwords(raw_comment):
  tokenized_comment = word_tokenize(raw_comment)
  processed_comment = [ word for word in tokenized_comment if word.lower() not in stop_words]
  return ' '.join(processed_comment)

In [None]:
def get_comment_sentiment_details(raw_comment):
  processed_comment = remove_stopwords(raw_comment)

  words = processed_comment.split()
  positive_words = ""
  negative_words = ""
  comment_sentiment = "" #Either positive or negative

  sentence_score_temp = sia.polarity_scores(processed_comment)

  abs_sentence_score = abs(sentence_score_temp['compound']) #absolute value of -3.4 = 3.4
  sentiment_label = classifier( processed_comment)
  comment_sentiment = sentiment_label[0]['label']

  if abs_sentence_score == 0 :
    comment_sentiment = "NEUTRAL"

  if comment_sentiment == "NEGATIVE":
    sentence_score = abs_sentence_score * -1
    for word in words:
      word_sentiment = sia.polarity_scores(word)
      if word_sentiment ['compound'] < 0:
        negative_words += word + ""

  elif comment_sentiment == "POSITIVE":
    sentence_score = abs_sentence_score
    for word in words:
      word_sentiment = sia.polarity_scores(word)
      if word_sentiment['compound'] > 0:
        positive_words += word + " "
  else:
    sentence_score = abs_sentence_score

  return positive_words, negative_words, sentence_score, comment_sentiment


In [None]:
positive_words = ""
negative_words = ""

pos_values_list = []
neg_values_list = []
avg_pos_score = 0
avg_neg_score = 0
neu_count = 0

for comment in comments:
  pw, nw, ss, cs = get_comment_sentiment_details(comment)
  positive_words += pw + " " #storing pw from each comment into our central positive words
  negative_words += nw + " "

  if cs == "NEGATIVE":
    neg_values_list.append(ss)
  elif cs == "POSITIVE":
    pos_values_list.append(ss)
  else:
    neu_count += 1

  try:
    avg_pos_score = sum(pos_values_list) / len(pos_values_list)
    avg_neg_score = sum(neg_values_list) / len(neg_values_list)
  except ZeroDivisionError:
    if len(pos_values_list) == 0 or len(neg_values_list) == 0:
      avg_pos_score = 0
      avg_neg_score = 0

  final_score = (avg_pos_score + avg_neg_score) / (len(pos_values_list) + len(neg_values_list))


In [None]:
print(final_score)

In [None]:
positive_words

In [None]:
negative_words

In [None]:
avg_pos_score

In [None]:
avg_neg_score

In [None]:
!pip install wordcloud

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

print("positives")

wordcloud_positive = WordCloud(width = 800 ,height = 400, background_color = 'purple').generate(positive_words)

plt.figure(figsize=(10,5))
plt.imshow(wordcloud_positive, interpolation = 'bilinear')
plt.axis('off')
plt.show()


In [None]:
print("negatives")

wordcloud_negative = WordCloud(width = 800, height = 400, background_color = 'red').generate(negative_words)

plt.figure(figsize=(10,5))
plt.imshow(wordcloud_negative, interpolation = 'bilinear')
plt.axis('off')
plt.show()