# Applying VADER for Sentence-Level Sentiment Detection in Social Media Text

In [None]:
!pip install transformers
!pip install simpletransformers
!pip install vaderSentiment
!pip install scikit-learn

In [None]:
import spacy
from transformers import pipeline 
from pandas import np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datasets import load_dataset

nlp = spacy.load("en_core_web_sm")
vader = SentimentIntensityAnalyzer()

In [None]:
#loading the dataset from Hugging Face's datasets
dataset = load_dataset("mteb/tweet_sentiment_extraction")
#COnverting into vectors 
train_dataset = dataset['train'].to_pandas()

In [3]:
def vader_output_to_label(vader_output):
    compound = vader_output['compound']
    if compound < 0:
        return 'negative'
    elif compound == 0.0:
        return 'neutral'
    else:
        return 'positive'
    
def run_vader_spacy(textual_unit, lemmatize=False, pos_to_include=None, verbose=0):
    doc = nlp(textual_unit)
    input_tokens = []

    for token in doc:
        if pos_to_include is None or token.pos_ in pos_to_include:
            word = token.lemma_ if lemmatize else token.text
            input_tokens.append(word)

    processed_text = " ".join(input_tokens)
    scores = vader.polarity_scores(processed_text)

    if verbose:
        print("Input to VADER:", processed_text)
        print("Scores:", scores)

    return scores    

#Matching the dataset's labels with their corresponding Sentiments 
label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
train_dataset['gold_label'] = train_dataset['label'].map(label_map)

In [None]:
#Applying VADER method ot each tweet in the dataset 
def get_vader_label(row):
    scores = run_vader_spacy(row['text'], lemmatize=True)
    return vader_output_to_label(scores)


#Applying VADER to each row in the dataset 
train_dataset['vader_label'] = train_dataset.apply(get_vader_label, axis=1)

#Saving the file for qualitative and quantitative analysis
train_dataset[['text', 'gold_label', 'vader_label']].to_csv("vader_sentiment_analysis.csv", index=False)