### Import Libraries

In [1]:
import pandas as pd
import re
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('wordnet')
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
nltk.download('averaged_perceptron_tagger')
from afinn import Afinn
from nltk.corpus import sentiwordnet as swn
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Faegheh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Faegheh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Faegheh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Faegheh\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


### Define Sentiment Analysis methodes

In [4]:
def AFINN(text):
    afinn = Afinn()
    return afinn.score(text)



def SentiWordNet(pos_data):
    sentiment = 0
    tokens_count = 0
    for word, pos in pos_data:
        if not pos:
            continue
        lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
        if not lemma:
            continue
        
        synsets = wordnet.synsets(lemma, pos=pos)
        if not synsets:
            continue
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        sentiment += swn_synset.pos_score() - swn_synset.neg_score()
        tokens_count += 1
        
    if not tokens_count:
        return 0
    if sentiment>0:
        return "Positive", sentiment
    if sentiment==0:
        return "Neutral", sentiment
    else:
        return "Negative", sentiment



def VADER(text):
    analyzer = SentimentIntensityAnalyzer()
    result = analyzer.polarity_scores(text)['compound']
    if result >= 0.5:
        return 'Positive', analyzer.polarity_scores(text)['pos']
    elif result <= -0.5 :
        return 'Negative', analyzer.polarity_scores(text)['neg']
    else:
        return 'Neutral', analyzer.polarity_scores(text)['neu']
                                                         


def Textblob(text):
    Polarity = TextBlob(text).sentiment.polarity
    if Polarity < 0:
        res = 'Negative'
    elif Polarity == 0:
        res = 'Neutral'
    else:
        res = 'Positive'
    return res, Polarity

### Define function for calculate sentiment analysis score with above methods

In [5]:
def Sentiment_Analysis_lex(dataset,text_name,methods):

    # dataset: path of csv file
    # text_name: name of texts column
    # methods: list of sentiment analysis methods

    # create dataframe and Data preprocessing steps
     
    my_data = pd.read_csv(dataset)

    # Cleaning the text
    def clean_text(text):
        text = re.sub('[^A-Za-z]+', ' ', text) 
        return text
    
    # Tokenization, POS tagging, stopwords removal
    def Tokenization_POS_stopwords(text):
        # POS tagger dictionary
        pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}
        tags = pos_tag(word_tokenize(text))
        newlist = []
        for word, tag in tags:
            if word.lower() not in set(stopwords.words('english')):
                newlist.append(tuple([word, pos_dict.get(tag[0])]))
        return newlist
    
    # Lemmatization
    def lemmatiz(pos_data):
        lemma_rew = " "
        for word, pos in pos_data:
            if not pos: 
                lemma = word
                lemma_rew = lemma_rew + " " + lemma
            else:  
                lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
                lemma_rew = lemma_rew + " " + lemma
        return lemma_rew

    # create Required dataset
    my_data['Cleaned_Doc'] = my_data[text_name].apply(clean_text)
    my_data['POS_tagged'] = my_data['Cleaned_Doc'].apply(Tokenization_POS_stopwords)
    my_data['Lemma'] = my_data['POS_tagged'].apply(lemmatiz)


                    ####-------------------------------------------------####
    if methods == 'all':
        my_data['AFINN_Score'] = my_data['Cleaned_Doc'].apply(AFINN)
        my_data[['SentiWordNet_polarity', 'SentiWordNet_Score']] = my_data['POS_tagged'].apply(lambda x: pd.Series(SentiWordNet(x)))
        my_data[['Vader_polarity', 'vader_Score']] = my_data['Lemma'].apply(lambda x: pd.Series(VADER(x)))
        my_data[['TextBlob_polarity', 'TextBlob_Score']] = my_data['Lemma'].apply(lambda x: pd.Series(Textblob(x)))
    else:
        for li in methods:
            if li == 'AFINN':
                my_data['AFINN_Score'] = my_data['Cleaned_Doc'].apply(AFINN)
            if li == 'SentiWordNet':
                my_data[['SentiWordNet_polarity', 'SentiWordNet_Score']] = my_data['POS_tagged'].apply(lambda x: pd.Series(SentiWordNet(x)))
            if li == 'VADER':
                my_data[['Vader_polarity', 'vader_Score']] = my_data['Lemma'].apply(lambda x: pd.Series(VADER(x)))
            if li == 'TextBlob':
                my_data[['TextBlob_polarity', 'TextBlob_Score']] = my_data['Lemma'].apply(lambda x: pd.Series(Textblob(x)))

    final_data = my_data.drop(columns=['Cleaned_Doc','POS_tagged','Lemma'])
    return final_data

#### Example - 1 : Use AFINN and TextBlob methods

In [6]:
Sentiment_Analysis_lex(r'D:\lexicon_example.csv','DOCUMENT',['AFINN','TextBlob'])

Unnamed: 0,DOCUMENT_INDEX,DOCUMENT,TRUE_SENTIMENT,AFINN_Score,TextBlob_polarity,TextBlob_Score
0,3360,In 2006 Benjamin Koellmann bought a condomini...,Neutral,0.0,Positive,0.100000
1,3361,Lugo a former Catholic bishop who assumed off...,Positive,0.0,Positive,0.012500
2,3362,Spanish Wimbledon winner Rafael Nadal said Sun...,Positive,7.0,Positive,0.250000
3,3363,In a letter posted on the White House web site...,Positive,0.0,Positive,0.033333
4,3364,TAMPA At least Raheem Morris finally has the ...,Positive,0.0,Negative,-0.100000
...,...,...,...,...,...,...
573,3933,In the space of four days Harvey Weinstein ...,Neutral,-9.0,Positive,0.147917
574,3934,Weâll get to the merits of the charges and c...,Negative,0.0,Positive,0.276190
575,3935,Russia âs president Vladimir Putin wanted t...,Negative,8.0,Neutral,0.000000
576,3936,All five living former US presidents are teami...,Negative,2.0,Positive,0.068182


#### Example - 2 : Use All of methods

In [7]:
Sentiment_Analysis_lex(r'D:\lexicon_example.csv','DOCUMENT','all')

Unnamed: 0,DOCUMENT_INDEX,DOCUMENT,TRUE_SENTIMENT,AFINN_Score,SentiWordNet_polarity,SentiWordNet_Score,Vader_polarity,vader_Score,TextBlob_polarity,TextBlob_Score
0,3360,In 2006 Benjamin Koellmann bought a condomini...,Neutral,0.0,Positive,0.125,Neutral,0.896,Positive,0.100000
1,3361,Lugo a former Catholic bishop who assumed off...,Positive,0.0,Negative,-0.875,Neutral,0.913,Positive,0.012500
2,3362,Spanish Wimbledon winner Rafael Nadal said Sun...,Positive,7.0,Positive,0.500,Positive,0.348,Positive,0.250000
3,3363,In a letter posted on the White House web site...,Positive,0.0,Positive,1.500,Positive,0.299,Positive,0.033333
4,3364,TAMPA At least Raheem Morris finally has the ...,Positive,0.0,Positive,0.125,Neutral,1.000,Negative,-0.100000
...,...,...,...,...,...,...,...,...,...,...
573,3933,In the space of four days Harvey Weinstein ...,Neutral,-9.0,Negative,-0.875,Negative,0.273,Positive,0.147917
574,3934,Weâll get to the merits of the charges and c...,Negative,0.0,Positive,2.000,Positive,0.322,Positive,0.276190
575,3935,Russia âs president Vladimir Putin wanted t...,Negative,8.0,Negative,-0.125,Positive,0.376,Neutral,0.000000
576,3936,All five living former US presidents are teami...,Negative,2.0,Positive,0.625,Neutral,0.819,Positive,0.068182
