In [None]:
!pip install pandas nltk



In [None]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk import pos_tag
from nltk.tokenize import word_tokenize

# Download necessary NLTK data
nltk.download('vader_lexicon')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/sentiwordnet.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
from google.colab import files

uploaded = files.upload()

# After uploading, you can load the file with pandas
file_name = "test.csv"  # Make sure this matches the name of the uploaded file
df = pd.read_csv(file_name)

print(df.head())  # Verify the data is loaded correctly


Saving test.csv to test.csv
                                              review sentiment
0  Steven Rea plays a forensic scientist thrust o...  positive
1  As the first of the TV specials offered on the...  positive
2  There may something poetically right in seeing...  negative
3  all i can say about this film is to read the b...  negative
4  I thought it was a pretty good movie and shoul...  positive


In [None]:
# Function to map NLTK's POS tags to WordNet's POS tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wn.ADJ
    elif treebank_tag.startswith('V'):
        return wn.VERB
    elif treebank_tag.startswith('N'):
        return wn.NOUN
    elif treebank_tag.startswith('R'):
        return wn.ADV
    else:
        return None

# Function for sentiment analysis using SentiWordNet
def analyze_sentiment_sentiwordnet(review):
    sentiment = 0.0
    tokens_count = 0

    words = word_tokenize(review)
    tagged_words = pos_tag(words)

    for word, tag in tagged_words:
        wn_tag = get_wordnet_pos(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            continue

        lemma = wn.morphy(word, wn_tag)
        if not lemma:
            continue

        synsets = wn.synsets(lemma, pos=wn_tag)
        if not synsets:
            continue

        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        sentiment += swn_synset.pos_score() - swn_synset.neg_score()
        tokens_count += 1

    # Judgement call ? Default to positive or negative
    return "positive" if sentiment >= 0 else "negative"


In [None]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
# Apply sentiment analysis using SentiWordNet
df['SentiWordNet_Sentiment'] = df['review'].apply(analyze_sentiment_sentiwordnet)


In [None]:
# Apply sentiment analysis using VADER
analyzer = SentimentIntensityAnalyzer()
df['VADER_Sentiment'] = df['review'].apply(lambda review: "positive" if analyzer.polarity_scores(review)['compound'] >= 0 else "negative")


In [None]:
!pip install afinn

Collecting afinn
  Downloading afinn-0.1.tar.gz (52 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: afinn
  Building wheel for afinn (setup.py) ... [?25l[?25hdone
  Created wheel for afinn: filename=afinn-0.1-py3-none-any.whl size=53429 sha256=1e583b556eb10b34ffb032f4d922f9ecc24076c457cbbcbd8823f30dee160a5a
  Stored in directory: /root/.cache/pip/wheels/b0/05/90/43f79196199a138fb486902fceca30a2d1b5228e6d2db8eb90
Successfully built afinn
Installing collected packages: afinn
Successfully installed afinn-0.1


In [None]:
from afinn import Afinn
afinn = Afinn()


In [None]:
def analyze_sentiment_afinn(review):
    score = afinn.score(review)
    return "positive" if score > 0 else "negative"


In [None]:
df['AFINN_Sentiment'] = df['review'].apply(analyze_sentiment_afinn)


In [None]:
# Display the results
print(df[['review', 'SentiWordNet_Sentiment', 'VADER_Sentiment', 'AFINN_Sentiment']])

                                                  review  \
0      Steven Rea plays a forensic scientist thrust o...   
1      As the first of the TV specials offered on the...   
2      There may something poetically right in seeing...   
3      all i can say about this film is to read the b...   
4      I thought it was a pretty good movie and shoul...   
...                                                  ...   
19995  Well-done ghost story that will give you the c...   
19996  I'm at a loss for words. This movie is beyond ...   
19997  First off, I had my doubts just looking at the...   
19998  In an early scene, Luca (David Pasquesi) and J...   
19999  I have no idea why people are so crazy about t...   

      SentiWordNet_Sentiment VADER_Sentiment AFINN_Sentiment  
0                   negative        positive        negative  
1                   positive        positive        positive  
2                   negative        positive        negative  
3                   positiv

In [None]:
# Calculate accuracy for SentiWordNet
sentiwordnet_accuracy = (df['SentiWordNet_Sentiment'] == df['sentiment']).mean()

# Calculate accuracy for VADER
vader_accuracy = (df['VADER_Sentiment'] == df['sentiment']).mean()

# Calculate accuracy for AFINN
afinn_accuracy = (df['AFINN_Sentiment'] == df['sentiment']).mean()

# Print the accuracies
print(f"SentiWordNet Accuracy: {sentiwordnet_accuracy}")
print(f"VADER Accuracy: {vader_accuracy}")
print(f"AFINN Accuracy: {afinn_accuracy}")

SentiWordNet Accuracy: 0.66775
VADER Accuracy: 0.7016
AFINN Accuracy: 0.72
