## Sentiment analysis using additional models

In [1]:
# Clone this repository
!git clone https://github.com/alexdseo/Sentiment-analysis-on-scraped-news-article

Cloning into 'Sentiment-analysis-on-scraped-news-article'...
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 8 (delta 0), reused 8 (delta 0), pack-reused 0[K
Unpacking objects: 100% (8/8), done.


In [2]:
# Change directory
%cd Sentiment-analysis-on-scraped-news-article

/content/Sentiment-analysis-on-scraped-news-article


In [3]:
# Install requirments
!pip install -r requirements.txt
# Additional models
!pip install flair
!pip install happytransformer

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting requests==2.27.1
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
Collecting vaderSentiment==3.3.2
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 19.5 MB/s 
Collecting pyyaml==5.4.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 41.0 MB/s 
Installing collected packages: requests, vaderSentiment, pyyaml
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
[31mERROR: pip's dependency resolver does not currently take int

In [4]:
import pandas as pd
import json
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from flair.models import TextClassifier
from flair.data import Sentence
from happytransformer import HappyTextClassification
import plotly.express as px


def textblob(text):
    tb_clf = TextBlob(text)
    sentiment_tb = tb_clf.sentiment.polarity
    # Additional parameter TextBlob provides: Subjectivity of the text
    # subjectivity = tb_clf.sentiment.subjectivity

    return sentiment_tb


def vader(text):
    vd_clf = SentimentIntensityAnalyzer()

    sentiment_vd = vd_clf.polarity_scores(text)['compound']
    # Additional parameters Vader provide
    # negativity = vd_clf.polarity_scores(sentence)['neg']
    # neutrality = vd_clf.polarity_scores(sentence)['neu']
    # positivity = vd_clf.polarity_scores(sentence)['pos']

    return sentiment_vd


def flair(text):
    fl_clf = TextClassifier.load('en-sentiment')
    sentence = Sentence(text)
    fl_clf.predict(sentence)

    if sentence.tag == 'NEGATIVE':
        sentiment_fl = sentence.score * (-1)
    else:
        sentiment_fl = sentence.score

    return sentiment_fl


def distilbert(text):
    db_clf = HappyTextClassification(model_type='DISTILBERT',
                                     model_name='distilbert-base-uncased-finetuned-sst-2-english', num_labels=2)
    result = db_clf.classify_text(text)
    if result.label == 'NEGATIVE':
        sentiment_db = result.score * (-1)
    else:
        sentiment_db = result.score

    return sentiment_db


def write_sentiment(json_file):
    file = open(json_file)
    # Create Dataframe
    sentiments = pd.DataFrame()
    col_list = ['Date', 'Model', 'Sentiment']
    for i in file:
        article = json.loads(i)
        # Text components from the article
        header = str(article['title'] + '. ' + article['sub_title'])
        texts = str(article['title'] + '. ' + article['sub_title'] + article['main_article'])
        date = str(article['publish_date'])
        # Sentiment scores +1: positive, -1: negative
        score_v = vader(header)
        score_t = textblob(texts)
        score_f = flair(texts)
        score_d = distilbert(header)
        # Write dataframe consisting Text components
        temp = [[date, 'Vader', score_v], [date, 'TextBlob', score_t], [date, 'Flair', score_f],
                [date, 'DistilBERT', score_d]]
        temp_df = pd.DataFrame(temp, columns=col_list)
        sentiments = pd.concat([sentiments, temp_df], ignore_index=True)

    return sentiments

  defaults = yaml.load(f)


In [5]:
# JSON file created from scrape_news.py
articles = 'recent_articles.json'
# Write dataframe with sentiment score of each article using different models
senti_gpu = write_sentiment(articles)
# Export csv file
senti_gpu.to_csv('sentiments_gpu.csv', encoding='utf-8', index=False)

2022-06-09 03:52:18,905 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpmvi3661v


100%|██████████| 265512723/265512723 [00:15<00:00, 17401526.30B/s]

2022-06-09 03:52:34,663 copying /tmp/tmpmvi3661v to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-06-09 03:52:35,085 removing temp file /tmp/tmpmvi3661v
2022-06-09 03:52:35,118 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

06/09/2022 03:53:10 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:10,741 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:18 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:18,592 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:26 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:26,477 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:34 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:35,056 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:42 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:43,036 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:50 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:50,940 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:53:58 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:53:58,808 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:54:06 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:54:06,923 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:54:14 - INFO - happytransformer.happy_transformer -   Using model: cuda


2022-06-09 03:54:14,855 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


06/09/2022 03:54:22 - INFO - happytransformer.happy_transformer -   Using model: cuda


## Simple visualization using plotly

In [6]:
# Read csv file
df = pd.read_csv('sentiments_gpu.csv')
df

Unnamed: 0,Date,Model,Sentiment
0,2022-5-23,Vader,-0.6705
1,2022-5-23,TextBlob,0.057986
2,2022-5-23,Flair,-0.991747
3,2022-5-23,DistilBERT,-0.700244
4,2022-3-18,Vader,-0.9081
5,2022-3-18,TextBlob,-0.157955
6,2022-3-18,Flair,-0.991663
7,2022-3-18,DistilBERT,-0.874174
8,2022-3-4,Vader,0.0
9,2022-3-4,TextBlob,0.100909


In [7]:
# Plot using plotly
fig = px.line(df, x="Date", y="Sentiment", color="Model")
fig.show()