# Importing Libraries

In [None]:
!pip install afinn

In [None]:
!pip install NRCLex

In [None]:
import pandas as pd
import numpy as np

# Lexicons
from afinn import Afinn
from nrclex import NRCLex
from textblob import TextBlob

import nltk
nltk.download('vader_lexicon')
nltk.download('punkt')

from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Desi nu fol. un alg. de clasificare, putem fol. classification report
# pentru a evalua performanta lexiconului comparativ cu "ground truth":
# valorile din coloana 'positive'
from sklearn.metrics import classification_report

# Citirea Datelor

In [None]:
url = 'https://raw.githubusercontent.com/DanielaManate/SentimentAnalysis-TopicModeling/master/Data/Input/3.input_data_prepped_bow.csv'
reviews = pd.read_csv(url)
reviews.head(2)

In [None]:
reviews['text'] = reviews['text'].astype(str)
reviews['text_prep'] = reviews['text_prep'].astype(str)
reviews['text_prep_lim'] = reviews['text_prep_lim'].astype(str)

# AFINN

## Exemple

In [None]:
Afinn().score('This is utterly excellent!')

In [None]:
Afinn().score('This restaurant has OK food')

In [None]:
Afinn().score('This restaurant has OK food!!!')

In [None]:
Afinn().score('This restaurant has OK food. It is the best in the neighbourhood.')

In [None]:
Afinn().score('The pizza here is absolutely amazing!')

In [None]:
Afinn().score('This restaurant has the worst food!')

In [None]:
Afinn().score('This restaurant has the WORST food!')

## Implementari pe textul pre-procesat

In [None]:
reviews['afinn_score_tp'] = reviews['text_prep'].apply(lambda x: Afinn().score(x))

In [None]:
# Cream coloana afinn_class_tp, care va avea val. 1 daca scorul Afinn este pozitiv,
# valoarea 0 daca scorul este negativ
reviews['afinn_class_tp'] = np.where(reviews['afinn_score_tp']>=0, 1, 0)

In [None]:
print(classification_report(reviews['positive'], reviews['afinn_class_tp']))

* Tema 2: Implementati lexiconul Afinn pe textul original (reviews['text']), pentru a calcula scorurile fiecarui review si pentru a determina daca un review este pozitiv / negative. Apoi, veti crea un classification report. Descrieti performanta lexiconului.
* Deadline: 26 Mar.

# TextBlob
* Scorul documentului intre [-1, 1]
* Subiectivitatea documentului este intre [0,1]; 0=fapt, realitate; 1=parere

## Exemple

In [None]:
TextBlob('This restaurant has OK food').sentiment

In [None]:
TextBlob('This restaurant has OK food').sentiment.polarity

In [None]:
TextBlob('This restaurant has OK food').sentiment.subjectivity

In [None]:
TextBlob('This restaurant has OK food. It is the best in the neighbourhood').sentiment

In [None]:
TextBlob('This restaurant has OK food!!!').sentiment

In [None]:
TextBlob('Python is a programming language').sentiment

In [None]:
TextBlob(':D').sentiment

In [None]:
TextBlob('This restaurant has bad food').sentiment

In [None]:
TextBlob('This restaurant has BAD food').sentiment

## Implementare pe textul original

In [None]:
reviews['textblob_score'] = reviews['text'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [None]:
reviews['textblob_score'].describe()

In [None]:
reviews['textblob_class'] = np.where(reviews['textblob_score']>=0, 1, 0)

In [None]:
print(classification_report(reviews['positive'], reviews['textblob_class']))

## Implementare pe textul pre-procesat

In [None]:
reviews['textblob_score_tp'] = reviews['text_prep'].apply(lambda x: 
                                                          TextBlob(x).sentiment.polarity)

In [None]:
reviews['textblob_score_tp'].describe()

In [None]:
reviews['textblob_class_tp'] = np.where(reviews['textblob_score_tp']>=0, 1, 0)

In [None]:
print(classification_report(reviews['positive'], reviews['textblob_class_tp']))

# VADER
* scorul compus este intre [-1, 1]
* valorile pentru neg, neu, pos indica % documentului in acea categ.

## Exemple

In [None]:
SentimentIntensityAnalyzer().polarity_scores('This restaurant has OK food')

In [None]:
SentimentIntensityAnalyzer().polarity_scores('This restaurant has OK food!!!')

In [None]:
SentimentIntensityAnalyzer().polarity_scores(':D')

In [None]:
SentimentIntensityAnalyzer().polarity_scores(':(')

In [None]:
SentimentIntensityAnalyzer().polarity_scores('This restaurant has the worst food')

In [None]:
SentimentIntensityAnalyzer().polarity_scores('This restaurant has the WORST food')

In [None]:
SentimentIntensityAnalyzer().polarity_scores('This restaurant has the WORST food')['compound']

## Implementare pe textul original

In [None]:
reviews['vader_score'] = reviews['text'].apply(lambda x: SentimentIntensityAnalyzer().polarity_scores(x)['compound'])

In [None]:
reviews['vader_class'] = np.where(reviews['vader_score']>=0, 1, 0)

In [None]:
reviews[['vader_score', 'vader_class', 'positive']].describe()

In [None]:
print(classification_report(reviews['positive'], reviews['vader_class']))

In [None]:
recenzii_vaderscore0 = reviews[reviews['vader_score']==0].copy()
print('Numarul de recenzii pentru care scorul VADER este exact 0:', 
      len(recenzii_vaderscore0))
print('Procentul de recenzii pozitive din acele recenzii cu scor VADER 0',
      recenzii_vaderscore0['positive'].mean())

## Implementare pe textul pre-procesat

In [None]:
reviews['vader_score_tp'] = reviews['text_prep'].apply(lambda x: SentimentIntensityAnalyzer().polarity_scores(x)['compound'])

In [None]:
reviews['vader_class_tp'] = np.where(reviews['vader_score_tp']>=0, 1, 0)

In [None]:
reviews[['vader_score_tp', 'vader_class_tp', 'positive']].describe()

In [None]:
print(classification_report(reviews['positive'], reviews['vader_class_tp']))

# EmoLex
https://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm

In [None]:
emotion = NRCLex('person')
emotion.top_emotions

In [None]:
emotion = NRCLex('lovely')
emotion.top_emotions

In [None]:
emotion = NRCLex('hate')
emotion.top_emotions