### VADER

In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [2]:
columns = ['id','text_final', 'polarity', 'VADER_score']

df_vader_train = pd.read_csv('VADER_train.csv',
                     header = 0, 
                     usecols = [1,2,3], 
                     names=columns,
                     encoding ='ISO-8859-1')

In [3]:
df_vader_train.head()

Unnamed: 0,text_final,polarity,VADER_score
0,"@USER @URL - aw , that ' s a bummer . you sho...",0,-0.3818
1,is upset that he can not update his facebook b...,0,-0.75
2,@USER i dived many times for the ball . manage...,0,0.4939
3,my whole body feels itchy and like its on fire,0,-0.25
4,"@USER no , it ' s not behaving at all . i am m...",0,-0.6597


### Create column VADER_binary: 0 if lower or equal to 0, 1 otherwise

In [4]:
df_vader_train['VADER_binary'] = (df_vader_train['VADER_score'] > 0).astype(int)

In [5]:
df_vader_train.head()

Unnamed: 0,text_final,polarity,VADER_score,VADER_binary
0,"@USER @URL - aw , that ' s a bummer . you sho...",0,-0.3818,0
1,is upset that he can not update his facebook b...,0,-0.75,0
2,@USER i dived many times for the ball . manage...,0,0.4939,1
3,my whole body feels itchy and like its on fire,0,-0.25,0
4,"@USER no , it ' s not behaving at all . i am m...",0,-0.6597,0


In [8]:
vader_0 = df_vader_train.drop(df_vader_train[(df_vader_train.VADER_score == 0) ].index)

### Threshold 0

In [9]:
### Polarity 0
type(vader_0['polarity'])
y_true0 = vader_0['polarity']
y_true0.tolist()
pd.Series(y_true0).values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [11]:
### VADER predictions 0

type(vader_0['VADER_binary'])
y_pred0 = vader_0['VADER_binary']
y_pred0.tolist()
pd.Series(y_pred0).values

array([0, 0, 1, ..., 1, 1, 1])

### Metrics 0

In [12]:
print( "Accuracy sore: ", accuracy_score(y_true0, y_pred0))
print( "Precision score: ", precision_score(y_true0, y_pred0, average='binary'))
print( "Recall score: ", recall_score(y_true0, y_pred0, average='binary'))
print( "F-measure score: ", f1_score(y_true0, y_pred0, average='binary'))

Accuracy sore:  0.7150829088247717
Precision score:  0.6623838340739507
Recall score:  0.8586482343676488
F-measure score:  0.7478536730445129


### Threshold 025

In [13]:
vader_025 = df_vader_train.drop(df_vader_train[(df_vader_train.VADER_score > -0.25) & (df_vader_train.VADER_score < 0.25)].index)

In [14]:
vader_025.head()

Unnamed: 0,text_final,polarity,VADER_score,VADER_binary
0,"@USER @URL - aw , that ' s a bummer . you sho...",0,-0.3818,0
1,is upset that he can not update his facebook b...,0,-0.75,0
2,@USER i dived many times for the ball . manage...,0,0.4939,1
3,my whole body feels itchy and like its on fire,0,-0.25,0
4,"@USER no , it ' s not behaving at all . i am m...",0,-0.6597,0


In [15]:
### VADER predictions 025

type(vader_025['VADER_binary'])
y_pred025 = vader_025['VADER_binary']
y_pred025.tolist()
pd.Series(y_pred025).values

array([0, 0, 1, ..., 1, 1, 1])

In [16]:
### Polarity 025
type(vader_025['polarity'])
y_true025 = vader_025['polarity']
y_true025.tolist()
pd.Series(y_true025).values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

### METRICS 025

In [17]:
print( "Accuracy sore: ", accuracy_score(y_true025, y_pred025))
print( "Precision score: ", precision_score(y_true025, y_pred025, average='binary'))
print( "Recall score: ", recall_score(y_true025, y_pred025, average='binary'))
print( "F-measure score: ", f1_score(y_true025, y_pred025, average='binary'))

Accuracy sore:  0.7413869116404634
Precision score:  0.699157032050344
Recall score:  0.887189867775962
F-measure score:  0.7820294979456204


### Threshold 035


In [18]:
vader_035 = df_vader_train.drop(df_vader_train[(df_vader_train.VADER_score > -0.35) & (df_vader_train.VADER_score < 0.35)].index)

In [19]:
vader_035.head()

Unnamed: 0,text_final,polarity,VADER_score,VADER_binary
0,"@USER @URL - aw , that ' s a bummer . you sho...",0,-0.3818,0
1,is upset that he can not update his facebook b...,0,-0.75,0
2,@USER i dived many times for the ball . manage...,0,0.4939,1
4,"@USER no , it ' s not behaving at all . i am m...",0,-0.6597,0
6,need a hug,0,0.4767,1


In [20]:
### Polarity 035
type(vader_035['polarity'])
y_true035 = vader_035['polarity']
y_true035.tolist()
pd.Series(y_true035).values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [21]:
### VADER predictions 035

type(vader_035['VADER_binary'])
y_pred035 = vader_035['VADER_binary']
y_pred035.tolist()
pd.Series(y_pred035).values

array([0, 0, 1, ..., 1, 1, 1])

### Metrics 035

In [22]:
print( "Accuracy sore: ", accuracy_score(y_true035, y_pred035))
print( "Precision score: ", precision_score(y_true035, y_pred035, average='binary'))
print( "Recall score: ", recall_score(y_true035, y_pred035, average='binary'))
print( "F-measure score: ", f1_score(y_true035, y_pred035, average='binary'))

Accuracy sore:  0.7549302243656203
Precision score:  0.7177450397736149
Recall score:  0.9090556746623475
F-measure score:  0.8021513582789134


### Threshold 0.5

In [23]:
vader_05 = df_vader_train.drop(df_vader_train[(df_vader_train.VADER_score > -0.50) & (df_vader_train.VADER_score < 0.50)].index)

In [24]:
print("Dataset ripulito: ",len(df_vader_train))

Dataset ripulito:  1596375


In [25]:
print("Dataset finale: ", len(vader_05))

Dataset finale:  602766


In [26]:
print("Tweet neutri: ",len(df_vader_train)-len(vader_05))

Tweet neutri:  993609


In [27]:
vader_05.head()

Unnamed: 0,text_final,polarity,VADER_score,VADER_binary
1,is upset that he can not update his facebook b...,0,-0.75,0
4,"@USER no , it ' s not behaving at all . i am m...",0,-0.6597,0
7,@USER hey long time no see ! yes . rains a bi...,0,0.807,1
12,@USER i could not bear to watch it . and i tho...,0,-0.5994,0
16,hollis ' death scene will hurt me severely to ...,0,-0.9081,0


In [28]:
### Polarity 05
type(vader_05['polarity'])
y_true05 = vader_05['polarity']
y_true05.tolist()
pd.Series(y_true05).values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [29]:
### VADER predictions 05

type(vader_05['VADER_binary'])
y_pred05 = vader_05['VADER_binary']
y_pred05.tolist()
pd.Series(y_pred05).values

array([0, 0, 1, ..., 1, 1, 1])

### Metrics 05

In [30]:
print( "Accuracy sore: ", accuracy_score(y_true05, y_pred05))
print( "Precision score: ", precision_score(y_true05, y_pred05, average='binary'))
print( "Recall score: ", recall_score(y_true05, y_pred05, average='binary'))
print( "F-measure score: ", f1_score(y_true05, y_pred05, average='binary'))

Accuracy sore:  0.7805732241035493
Precision score:  0.7511891880620115
Recall score:  0.9287371665597214
F-measure score:  0.830580836060639


In [58]:
#vader_05.to_csv("dataset_05.csv")