In [None]:
# Import the necessary libraries
import pandas as pd
from textblob import TextBlob

# Read the CSV file into a Pandas dataframe using the direct path
df = pd.read_csv('/content/ringsofpowerreviews_csv.csv',encoding='iso-8859-1')

# Extract the 'text' field from the dataframe
text_field = df['text']

# Create a new empty list to store the sentiment values
sentiment_list = []

# Loop through the 'text' field and use TextBlob to analyze the sentiment of each review
for review in text_field:
    textblob_review = TextBlob(review)
    sentiment = textblob_review.sentiment
    sentiment_list.append(sentiment)

# Add the sentiment values as a new column in the dataframe
df['sentiment'] = sentiment_list

In [None]:
# now the sentiment is in there!
df.head()

Unnamed: 0,rating-other-user-rating,point-scale,review_favorability_score,title,review-date,text,found_helpful,total_voted,voted_not_helpful,helpful_percentage,sentiment
0,5.0,10.0,0.5,I expected much better after all the hype,3 September 2022,Ok So where to begin. I gave it a 2.5 stars or...,3018,3967,949,0.76,"(0.09324168255986438, 0.5137649051285416)"
1,1.0,10.0,0.1,First Season - Disconnected Bits & Pieces With...,4 September 2022,"To be fair, like the late Christopher Lee, I r...",2339,3486,1147,0.67,"(-0.0717948717948718, 0.5282051282051283)"
2,4.0,10.0,0.4,"2 Episodes in: ""It's okay.""",2 September 2022,I honestly didn't really have any expectations...,360,1061,701,0.34,"(0.1542929292929293, 0.5340548340548339)"
3,6.0,10.0,0.6,i will be honest,2 September 2022,I was expecting a total fiasco but I must admi...,1256,3430,2174,0.37,"(0.03888888888888889, 0.5092592592592591)"
4,7.0,10.0,0.7,Surprisingly good,2 September 2022,"A pleasant surprise, the cinematography is imp...",1486,5535,4049,0.27,"(0.17435897435897438, 0.5038461538461539)"


In [None]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [None]:
# Create an instance of SentimentIntensityAnalyzer
sentiment_analyzer = SentimentIntensityAnalyzer()

# Define a custom function to analyze the sentiment of each review using SentimentIntensityAnalyzer
def analyze_sentiment(review):
  return sentiment_analyzer.polarity_scores(review)

# Use the DataFrame.apply() method to apply the custom function to each value in the 'text' column
sentiment_values = df['text'].apply(analyze_sentiment)

# Create new columns in the dataframe to store the polarity scores
df['neg'] = sentiment_values.apply(lambda x: x['neg'])
df['neu'] = sentiment_values.apply(lambda x: x['neu'])
df['pos'] = sentiment_values.apply(lambda x: x['pos'])
df['compound'] = sentiment_values.apply(lambda x: x['compound'])

In [None]:
# now we have the polarity scores in this notebook
df.head()

Unnamed: 0,rating-other-user-rating,point-scale,review_favorability_score,title,review-date,text,found_helpful,total_voted,voted_not_helpful,helpful_percentage,sentiment,neg,neu,pos,compound
0,5.0,10.0,0.5,I expected much better after all the hype,3 September 2022,Ok So where to begin. I gave it a 2.5 stars or...,3018,3967,949,0.76,"(0.09324168255986438, 0.5137649051285416)",0.087,0.739,0.174,0.9977
1,1.0,10.0,0.1,First Season - Disconnected Bits & Pieces With...,4 September 2022,"To be fair, like the late Christopher Lee, I r...",2339,3486,1147,0.67,"(-0.0717948717948718, 0.5282051282051283)",0.057,0.814,0.129,0.9281
2,4.0,10.0,0.4,"2 Episodes in: ""It's okay.""",2 September 2022,I honestly didn't really have any expectations...,360,1061,701,0.34,"(0.1542929292929293, 0.5340548340548339)",0.096,0.728,0.176,0.9722
3,6.0,10.0,0.6,i will be honest,2 September 2022,I was expecting a total fiasco but I must admi...,1256,3430,2174,0.37,"(0.03888888888888889, 0.5092592592592591)",0.093,0.83,0.077,-0.2846
4,7.0,10.0,0.7,Surprisingly good,2 September 2022,"A pleasant surprise, the cinematography is imp...",1486,5535,4049,0.27,"(0.17435897435897438, 0.5038461538461539)",0.0,0.808,0.192,0.9665


In [None]:
import nltk

#specifically make sure to do this step for the next cell to work
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import sent_tokenize, word_tokenize


# Define a custom function to identify the most negative or positive phrase in the review text
def find_most_negative_positive_phrase(review):
  # Use the sent_tokenize() and word_tokenize() functions to split the review text into sentences and words
  sentences = sent_tokenize(review)
  words = [word_tokenize(sent) for sent in sentences]

  # Loop through the sentences and words to find the most negative or positive phrase
  most_negative_phrase = ""
  most_negative_score = 0.0
  most_positive_phrase = ""
  most_positive_score = 0.0
  for sentence in sentences:
    for word in words:
      phrase = " ".join(word)
      polarity_scores = sentiment_analyzer.polarity_scores(phrase)
      if polarity_scores['compound'] < most_negative_score:
        most_negative_phrase = phrase
        most_negative_score = polarity_scores['compound']
      if polarity_scores['compound'] > most_positive_score:
        most_positive_phrase = phrase
        most_positive_score = polarity_scores['compound']

  # Return the most negative and positive phrases and their corresponding scores
  return most_negative_phrase, most_negative_score, most_positive_phrase, most_positive_score

# Use the DataFrame.apply() method to apply the custom function to each value in the 'text' column
most_negative_positive_phrases = df['text'].apply(find_most_negative_positive_phrase)

# Create new columns in the dataframe to store the most negative and positive phrases and their scores
df['most_negative_phrase'] = most_negative_positive_phrases.apply(lambda x: x[0])
df['most_negative_score'] = most_negative_positive_phrases.apply(lambda x: x[1])
df['most_positive_phrase'] = most_negative_positive_phrases.apply(lambda x: x[2])
df['most_positive_score'] = most_negative_positive_phrases.apply(lambda x: x[3])

In [None]:
# now with most positive and negative phrases
df.head()

Unnamed: 0,rating-other-user-rating,point-scale,review_favorability_score,title,review-date,text,found_helpful,total_voted,voted_not_helpful,helpful_percentage,sentiment,neg,neu,pos,compound,most_negative_phrase,most_negative_score,most_positive_phrase,most_positive_score
0,5.0,10.0,0.5,I expected much better after all the hype,3 September 2022,Ok So where to begin. I gave it a 2.5 stars or...,3018,3967,949,0.76,"(0.09324168255986438, 0.5137649051285416)",0.087,0.739,0.174,0.9977,"No dis to the actor , just wildly miscast and ...",-0.6486,Yes the Cinematography is great and most of th...,0.9299
1,1.0,10.0,0.1,First Season - Disconnected Bits & Pieces With...,4 September 2022,"To be fair, like the late Christopher Lee, I r...",2339,3486,1147,0.67,"(-0.0717948717948718, 0.5282051282051283)",0.057,0.814,0.129,0.9281,"It is boring , annoying , and makes no sense o...",-0.6369,"In order to make this happen , the writer stru...",0.8348
2,4.0,10.0,0.4,"2 Episodes in: ""It's okay.""",2 September 2022,I honestly didn't really have any expectations...,360,1061,701,0.34,"(0.1542929292929293, 0.5340548340548339)",0.096,0.728,0.176,0.9722,"It did n't leave a strong impression at all , ...",-0.4973,"Respect for the source material , respect for ...",0.8494
3,6.0,10.0,0.6,i will be honest,2 September 2022,I was expecting a total fiasco but I must admi...,1256,3430,2174,0.37,"(0.03888888888888889, 0.5092592592592591)",0.093,0.83,0.077,-0.2846,"But , Galadriel disappoints me , she is very b...",-0.7469,I was expecting a total fiasco but I must admi...,0.7269
4,7.0,10.0,0.7,Surprisingly good,2 September 2022,"A pleasant surprise, the cinematography is imp...",1486,5535,4049,0.27,"(0.17435897435897438, 0.5038461538461539)",0.0,0.808,0.192,0.9665,,0.0,"A pleasant surprise , the cinematography is im...",0.9117


In [None]:
# Define a custom function to identify the overall sentiment of the review
def identify_overall_sentiment(review):
  polarity_scores = sentiment_analyzer.polarity_scores(review)
  return polarity_scores['compound']

# Use the DataFrame.apply() method to apply the custom function to each value in the 'text' column
overall_sentiment_scores = df['text'].apply(identify_overall_sentiment)

# Add the overall sentiment scores as a new column in the dataframe
df['overall_sentiment_score'] = overall_sentiment_scores

In [None]:
# now with overall score
df['overall_sentiment_score']

0      0.9977
1      0.9281
2      0.9722
3     -0.2846
4      0.9665
        ...  
770    0.6545
771   -0.9795
772   -0.9704
773    0.9750
774    0.9606
Name: overall_sentiment_score, Length: 775, dtype: float64

In [None]:
# Define a custom function to identify the overall sentiment of the review
def identify_overall_sentiment(review):
  polarity_scores = sentiment_analyzer.polarity_scores(review)
  return polarity_scores['compound']

# Use the DataFrame.apply() method to apply the custom function to each value in the 'text' column
overall_sentiment_scores = df['text'].apply(identify_overall_sentiment)

# Add the overall sentiment scores as a new column in the dataframe
df['overall_sentiment_score'] = overall_sentiment_scores

# Define a custom function to translate the overall sentiment score into a binary text value
def translate_overall_sentiment_score(overall_sentiment_score):
  if overall_sentiment_score > 0.78:
    return "Positive"
  else:
    return "Negative"

# Use the DataFrame.apply() method to apply the custom function to each value in the overall_sentiment_score column
overall_sentiment_text = df['overall_sentiment_score'].apply(translate_overall_sentiment_score)

# Add the overall sentiment text as a new column in the dataframe
df['overall_sentiment_text'] = overall_sentiment_text

In [None]:
def identify_overall_sentiment(review):
  polarity_scores = sentiment_analyzer.polarity_scores(review)
  if polarity_scores['neg'] < polarity_scores['pos']:
    return "Negative"
  elif polarity_scores['pos'] < polarity_scores['neg']:
    return "Positive"
  else:
    return "Neutral"

overall_sentiment_text = df['text'].apply(identify_overall_sentiment)

df['overall_sentiment_text'] = overall_sentiment_text



In [None]:
df['overall_sentiment_text'].value_counts()

Negative    482
Positive    290
Neutral       3
Name: overall_sentiment_text, dtype: int64

In [None]:
# Write the updated dataframe to a CSV file
df.to_csv('ringsofpowerreviews_with_sentiment.csv')