In [1]:
import pandas as pd 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

## Loading data

In [2]:
df_pos = pd.read_excel('data/output/clean_handlabeled_data.xlsx')
df_neg = pd.read_excel('data/output/clean_handlabeled_data.xlsx')
df_pos_vader = pd.read_excel('data/output/clean_handlabeled_data.xlsx')
df_neg_vader = pd.read_excel('data/output/clean_handlabeled_data.xlsx')

## Preparing data for calculations

In [3]:
def not_null_and_mapping(df, map_to, df_type):
    
    if df_type == 'vader':
        df = df[pd.notnull(df['COMMENT'])]
        df = df[pd.notnull(df['LABEL'])]
        return df

    elif df_type == 'hl':
    
        if map_to == 'POS':
            df['LABEL'] = df['LABEL'].map({'NEU':'POS','POS':'POS','NEG':'NEG'})
        elif map_to == 'NEG':
            df['LABEL'] = df['LABEL'].map({'NEU':'NEG','POS':'POS','NEG':'NEG'})
            
        df['TRUE_LABEL'] = df['LABEL'].map({'POS':int(1),'NEG':int(0)})
        df['TRUE_LABEL'] = df['LABEL'].map({'POS':int(1),'NEG':int(0)})
        
        
        return df
        

In [4]:
df_pos = not_null_and_mapping(df_pos, 'POS','hl')
df_neg = not_null_and_mapping(df_neg, 'NEG','hl')
df_pos_vader = not_null_and_mapping(df_pos_vader, 'POS','vader')
df_neg_vader = not_null_and_mapping(df_neg_vader, 'NEG','vader')

## Assigning VADER labels

In [5]:
def get_compound_score(comment):
    ss = sid.polarity_scores(str(comment))
    return ss['compound']

def get_sentiment(compound_score):
    if compound_score > 0:
        return 1
    else:
        return 0


def vader(df):
    for comment in df.COMMENT:
        ss = sid.polarity_scores(comment)
    
    df['compound_score'] = df['COMMENT'].apply(lambda x: get_compound_score(x))
    df['PRED_LABEL'] = df['compound_score'].apply(lambda x: get_sentiment(x))
    
    return df

In [6]:
df_pos_vader = vader(df_pos_vader)

In [7]:
df_neg_vader = vader(df_neg_vader)

## Joining vader labels and true labels on index

In [8]:
result_pos = pd.merge(df_pos_vader[['COMMENT','PRED_LABEL']], 
                      df_pos[['TRUE_LABEL']], 
                      left_index=True, 
                      right_index=True, 
                      how="inner")

In [9]:
result_neg = pd.merge(df_neg_vader[['COMMENT','PRED_LABEL']], 
                      df_neg[['TRUE_LABEL']], 
                      left_index=True, 
                      right_index=True, 
                      how="inner")

## Calculating F1 Score for NEU mapped to POS and NEG, respectively 

In [13]:
from sklearn.metrics import f1_score
y_true = result_pos['TRUE_LABEL']
y_pred = result_pos['PRED_LABEL']


f1_score(y_true, y_pred, average='weighted') 

0.5084140831155031

In [12]:
from sklearn.metrics import f1_score
y_true = result_neg['TRUE_LABEL']
y_pred = result_neg['PRED_LABEL']



f1_score(y_true, y_pred, average='weighted') 

0.6347994135826863