In [3]:
#Imports
import json
import nltk
import numpy as np
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
import pandas as pd
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch


In [4]:
#Load models Sentiment
nltk.download('vader_lexicon')
nltk.download('punkt')
sentimentIntensityAnalyzer = SentimentIntensityAnalyzer()


pretrained_LM_path = "kornosk/bert-election2020-twitter-stance-biden-KE-MLM"

#Load models Stance
tokenizer = AutoTokenizer.from_pretrained(pretrained_LM_path)
stanceDetectionmodel = AutoModelForSequenceClassification.from_pretrained(pretrained_LM_path)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\steem\AppData\Roaming\nltk_data...
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\steem\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [5]:
#Settings
pd.set_option('display.width', 1000)

In [6]:
#Load data
twitter_data = pd.read_csv('dataset/train.csv', encoding="unicode_escape", lineterminator='\r')

twitter_data.head(10)

Unnamed: 0,Tweet,Target,Stance,Opinion Towards,Sentiment
0,"@tedcruz And, #HandOverTheServer she wiped cle...",Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg
1,Hillary is our best choice if we truly want to...,Hillary Clinton,FAVOR,1. The tweet explicitly expresses opinion abo...,pos
2,@TheView I think our country is ready for a fe...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg
3,I just gave an unhealthy amount of my hard-ear...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg
4,@PortiaABoulger Thank you for adding me to you...,Hillary Clinton,NONE,3. The tweet is not explicitly expressing opi...,pos
5,Hillary can not win. Here's hoping the Dems of...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg
6,Respect FOR the law and respect BY the law Yes...,Hillary Clinton,NONE,2. The tweet does NOT expresses opinion about ...,pos
7,I don't want to be appointed to an Ambassador ...,Hillary Clinton,NONE,2. The tweet does NOT expresses opinion about ...,neg
8,#StopHillary2016 @HillaryClinton if there was ...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg
9,@HillaryClinton End lawless #ClintonFoundation...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg


In [7]:
#Calculate Sentiment
def calculateSentiment(text):
    return sentimentIntensityAnalyzer.polarity_scores(text)['compound']

twitter_data['sentimentVader'] = twitter_data['Tweet'].apply(calculateSentiment)
display(twitter_data)

Unnamed: 0,Tweet,Target,Stance,Opinion Towards,Sentiment,sentimentVader
0,"@tedcruz And, #HandOverTheServer she wiped cle...",Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.4019
1,Hillary is our best choice if we truly want to...,Hillary Clinton,FAVOR,1. The tweet explicitly expresses opinion abo...,pos,0.8126
2,@TheView I think our country is ready for a fe...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.3612
3,I just gave an unhealthy amount of my hard-ear...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,-0.5267
4,@PortiaABoulger Thank you for adding me to you...,Hillary Clinton,NONE,3. The tweet is not explicitly expressing opi...,pos,0.3612
...,...,...,...,...,...,...
2909,"There's a law protecting unborn eagles, but no...",Legalization of Abortion,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.1139
2910,I am 1 in 3... I have had an abortion #Abortio...,Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,other,0.0000
2911,How dare you say my sexual preference is a cho...,Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,neg,0.0000
2912,"Equal rights for those 'born that way', no rig...",Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,neg,-0.2960


In [8]:
# Calculate Stance Detection

id2label = {
    0: "AGAINST",
    1: "FAVOR",
    2: "NONE"
}

def calculateStance(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = stanceDetectionmodel(**inputs)
    print(inputs)
    prediction = torch.softmax(outputs[0], dim=1)[0].tolist()
    return id2label[np.argmax(prediction)]


twitter_data['stancePrediction'] = twitter_data['Tweet'].apply(calculateStance)

{'input_ids': tensor([[  101,  1030,  6945, 26775, 17040,  1998,  1010,  1001,  2192,  7840,
         24138,  2121,  6299,  2016,  8342,  4550,  1009,  2382,  2243, 17159,
         22028,  1010,  7607, 28839,  3258,  1997,  4611,  1013,  3658,  2128,
          1001,  3841,  5603, 16103,  1010,  4385,  1001, 22975,  4140,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
{'input_ids': tensor([[  101, 18520,  2003,  2256,  2190,  3601,  2065,  2057,  5621,  2215,
          2000,  3613,  2108,  1037,  6555,  3842,  1012,  1001,  4058,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [9]:
display(twitter_data)

Unnamed: 0,Tweet,Target,Stance,Opinion Towards,Sentiment,sentimentVader,stancePrediction
0,"@tedcruz And, #HandOverTheServer she wiped cle...",Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.4019,NONE
1,Hillary is our best choice if we truly want to...,Hillary Clinton,FAVOR,1. The tweet explicitly expresses opinion abo...,pos,0.8126,NONE
2,@TheView I think our country is ready for a fe...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.3612,NONE
3,I just gave an unhealthy amount of my hard-ear...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,-0.5267,NONE
4,@PortiaABoulger Thank you for adding me to you...,Hillary Clinton,NONE,3. The tweet is not explicitly expressing opi...,pos,0.3612,NONE
...,...,...,...,...,...,...,...
2909,"There's a law protecting unborn eagles, but no...",Legalization of Abortion,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,0.1139,NONE
2910,I am 1 in 3... I have had an abortion #Abortio...,Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,other,0.0000,NONE
2911,How dare you say my sexual preference is a cho...,Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,neg,0.0000,NONE
2912,"Equal rights for those 'born that way', no rig...",Legalization of Abortion,AGAINST,2. The tweet does NOT expresses opinion about ...,neg,-0.2960,NONE


In [10]:
twitter_data.to_csv('output.csv')
print("saved")

saved
