# Vader Model

In [2]:
# Imports to create the model
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np

# Imports to evaluate the model
from sklearn.metrics import accuracy_score 
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# Creating a SentimentIntensityAnalyzer object from VaderSentiment
analyzer = SentimentIntensityAnalyzer()

In [4]:
# Reading the processed concatenated data, and adding columns for sentiment analysis, replacing NaN with np.nan
ConcatData_df = pd.read_csv('processed_concat_data.csv')
ConcatData_df['neg'] = np.nan
ConcatData_df['neu'] = np.nan
ConcatData_df['pos'] = np.nan
ConcatData_df['compound'] = np.nan
ConcatData_df['pred_cb_label'] = np.nan

In [5]:
ConcatData_df.head()

Unnamed: 0,cyberbullying_type,processed_tweet_text,neg,neu,pos,compound,pred_cb_label
0,0,words katandandre food crapilicious mkr,,,,,
1,0,aussietv white mkr theblock imacelebrityau tod...,,,,,
2,0,xochitlsuckks classy whore red velvet cupcakes,,,,,
3,0,meh p thanks heads concerned another angry dud...,,,,,
4,0,rudhoeenglish isis account pretending kurdish ...,,,,,


In [6]:
# Going through each row in the dataframe, and calculating the sentiment scores using VaderSentiment
# adding the scores to the dataframe, and finally predicting the sentiment label based on the compound score
for index, row in ConcatData_df.iterrows():
    vs = analyzer.polarity_scores(str(row['processed_tweet_text']))
    ConcatData_df.loc[index, 'neg'] = vs['neg']
    ConcatData_df.loc[index, 'neu'] = vs['neu']
    ConcatData_df.loc[index, 'pos'] = vs['pos']
    ConcatData_df.loc[index, 'compound'] = vs['compound']
    ConcatData_df.loc[index, 'pred_cb_label'] = 1 if vs['compound'] <= -0.5 else 0
ConcatData_df['pred_cb_label'] = ConcatData_df['pred_cb_label'].astype(int)

In [89]:
ConcatData_df.head(15)

Unnamed: 0,cyberbullying_type,processed_tweet_text,neg,neu,pos,compound,pred_cb_label
0,0,words katandandre food crapilicious mkr,0.0,1.0,0.0,0.0,0
1,0,aussietv white mkr theblock imacelebrityau tod...,0.0,1.0,0.0,0.0,0
2,0,xochitlsuckks classy whore red velvet cupcakes,0.384,0.357,0.259,-0.34,0
3,0,meh p thanks heads concerned another angry dud...,0.341,0.444,0.215,-0.1779,0
4,0,rudhoeenglish isis account pretending kurdish ...,0.22,0.472,0.307,0.0258,0
5,0,raja5aab quickieleaks yes test god good bad in...,0.323,0.323,0.355,0.1779,0
6,0,itu sekolah ya bukan tempat bully ga jauh kaya...,0.262,0.738,0.0,-0.4939,0
7,0,karma hope bites kat butt nasty mkr,0.313,0.435,0.252,-0.1779,0
8,0,stockputout everything mostly priest,0.0,1.0,0.0,0.0,0
9,0,rebecca black drops school due bullying,0.438,0.562,0.0,-0.5994,1


In [93]:
# Evaluating the Vader model using accuracy, F1 score, and classification report from sklearn
acc = accuracy_score(ConcatData_df['cyberbullying_type'], ConcatData_df['pred_cb_label'])
f1 = f1_score(ConcatData_df['cyberbullying_type'], ConcatData_df['pred_cb_label'])
classification_report = classification_report(ConcatData_df['cyberbullying_type'], ConcatData_df['pred_cb_label'])

print('Accuracy:', acc)
print('F1:', f1)
print('Classification Report:\n', classification_report)

Accuracy: 0.7860559047537045
F1: 0.5018953200174954
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.91      0.86    191084
           1       0.61      0.43      0.50     64422

    accuracy                           0.79    255506
   macro avg       0.72      0.67      0.68    255506
weighted avg       0.77      0.79      0.77    255506

