# Sentiment analysis using pre-trained models on English twitter data

## Environment

- google-cloud-language
- pip:
   - vaderSentiment
   - textblob

In [1]:
# change to root directory of project
import os
os.chdir('/home/tm/sciebo/corona/twitter_analysis/')

## Create data

In [2]:
import pandas as pd
pd.set_option('max_colwidth', 100)

In [9]:
sentences = [
    "Today is the best day ever, I love it so much!",
    "I like that dress a lot, it fits you very well, good job!",
    "Teslas stockprice rose to a new high today. Will this be the end of GM?",
    "Experts have compared the ongoing Corona epidemic with the spanish flu.",
    "President Trump can go ** himself, I **** hate that bitch",
    "President Trump can go fuck himself, I fucking hate that bitch",
    "President Trump can go ** himself, I **** hate him",
    "President Trump can go fuck himself, I fucking hate him",
]

sentiments = [
    'pos',
    'pos',
    'neu',
    'neu',
    'neg',
    'neg',
    'neg',
    'neg',
]

In [10]:
data = {
    'text': sentences,
    'sentiment': sentiments,
}

df = pd.DataFrame(data)
df

Unnamed: 0,text,sentiment
0,"Today is the best day ever, I love it so much!",pos
1,"I like that dress a lot, it fits you very well, good job!",pos
2,Teslas stockprice rose to a new high today. Will this be the end of GM?,neu
3,Experts have compared the ongoing Corona epidemic with the spanish flu.,neu
4,"President Trump can go ** himself, I **** hate that bitch",neg
5,"President Trump can go fuck himself, I fucking hate that bitch",neg
6,"President Trump can go ** himself, I **** hate him",neg
7,"President Trump can go fuck himself, I fucking hate him",neg


## Comparison of various packages

## TextBlob

In [11]:
from textblob import TextBlob

In [12]:
sentiment_textblob = []
for sentence in df.text:
    testemonial = TextBlob(sentence).sentiment
    sentiment_textblob.append(testemonial.polarity)

## Vader

In [13]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [14]:
sentiment_vader = []
for sentence in df.text:
    sentiment_vader.append(analyzer.polarity_scores(sentence)['compound'])

## Stanford NLP (not working)

## Google NLP

In [15]:
import requests
import json
import argparse

In [16]:
from google.cloud import language
from google.oauth2 import service_account
from google.cloud.language import enums
from google.cloud.language import types

In [17]:
client = language.LanguageServiceClient.from_service_account_json('src/keys/ose-twitter-analysis-8508806b2efb.json')

In [18]:
sentiment_google = []
for sentence in df.text:
    document = types.Document(
        content=sentence,
        type=enums.Document.Type.PLAIN_TEXT
    )
    annotations = client.analyze_sentiment(document=document)
    score = annotations.document_sentiment.score
    #magnitude = annotations.document_sentiment.magnitude
    sentiment_google.append(score)

## Comparison

In [19]:
df['google'] = sentiment_google
df['textblob'] = sentiment_textblob
df['vader'] = sentiment_vader

In [20]:
df

Unnamed: 0,text,sentiment,google,textblob,vader
0,"Today is the best day ever, I love it so much!",pos,0.9,0.583333,0.8655
1,"I like that dress a lot, it fits you very well, good job!",pos,0.9,0.5375,0.8107
2,Teslas stockprice rose to a new high today. Will this be the end of GM?,neu,0.0,0.298788,0.0
3,Experts have compared the ongoing Corona epidemic with the spanish flu.,neu,0.0,0.0,-0.3818
4,"President Trump can go ** himself, I **** hate that bitch",neg,-0.3,-0.8,-0.8176
5,"President Trump can go fuck himself, I fucking hate that bitch",neg,-0.6,-0.6,-0.911
6,"President Trump can go ** himself, I **** hate him",neg,-0.5,-0.8,-0.5719
7,"President Trump can go fuck himself, I fucking hate him",neg,-0.6,-0.6,-0.8173
