In [61]:
import sys, os

import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import nltk

# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer


In [86]:
# # If you face errors with vader sentiment analysis, run the below code
# nltk.download('vader_lexicon')

In [87]:
df = pd.read_csv("../../data/raw/news/newsapiorg_v2.csv", index_col=0)

In [88]:
df

Unnamed: 0,title,publishedAt,url,content
1,Key remarks from Russian c.bank governor after...,2022-02-11T12:26:00Z,https://www.reuters.com/markets/currencies/key...,"MOSCOW, Feb 11 (Reuters) - The Russian Central..."
2,LIVE MARKETS Forex: worrying about an inverted...,2022-02-18T12:33:00Z,https://www.reuters.com/markets/stocks/live-ma...,Feb 18 - Welcome to the home for real-time cov...
3,Dollar takes a break from this week's slide ah...,2022-02-03T02:18:00Z,https://www.reuters.com/markets/europe/dollar-...,"SINGAPORE, Feb 3 (Reuters) - The dollar found ..."
4,Euro bounce pauses ahead of U.S. inflation - R...,2022-02-08T01:16:00Z,https://www.reuters.com/business/euro-bounce-p...,"SINGAPORE, Feb 8 (Reuters) - A resurgent euro ..."
5,Dollar near two-week high amid jitters over ha...,2022-01-25T01:52:00Z,https://www.reuters.com/markets/europe/dollar-...,"TOKYO, Jan 25 (Reuters) - The safe-haven U.S. ..."
...,...,...,...,...
2171,Is First Trust Mid Cap Growth AlphaDEX ETF (FN...,2022-02-28T11:20:07Z,http://www.zacks.com/stock/news/1874180/is-fir...,"A smart beta exchange traded fund, the First T..."
2172,Should First Trust Mid Cap Growth AlphaDEX ETF...,2022-02-15T11:20:08Z,http://www.zacks.com/stock/news/1867522/should...,If you're interested in broad exposure to the ...
2173,Should iShares Morningstar SmallCap Growth ETF...,2022-02-28T11:20:09Z,http://www.zacks.com/stock/news/1874172/should...,"Launched on 06/28/2004, the iShares Morningsta..."
2174,Comcast vs. DISH Network: Which Stock is a Bet...,2022-03-02T20:28:25Z,https://stocknews.com/news/cmcsa-dish-comcast-...,"With more people choosing to cut the cord, cab..."


In [89]:
df.isna().sum()

title           2
publishedAt     0
url             0
content        20
dtype: int64

In [90]:
df_clean = df.dropna()

In [91]:
analyzer = SentimentIntensityAnalyzer()
analyzer.polarity_scores("Bitcoin bounces off six month lows as buyers step in - Reuters")

{'neg': 0.153, 'neu': 0.847, 'pos': 0.0, 'compound': -0.2023}

In [94]:
def get_vader_sentiment(df, col):
    analyzer = SentimentIntensityAnalyzer()

    vader_neg = []
    vader_neu = []
    vader_pos = []
    vader_comp = []
    vader_label = []

    result_df = df.copy()

    for idx, row in df.iterrows():
        # print(idx)
        result = analyzer.polarity_scores(row[col])
        if result["compound"] >=0.05:
            curr_label = 1
        elif result["compound"] <= -0.05:
            curr_label = -1
        else:
            curr_label = 0

        vader_neg.append(result["neg"])
        vader_neu.append(result["neu"])
        vader_pos.append(result["pos"])
        vader_comp.append(result["compound"])
        vader_label.append(curr_label)

    result_df["vader_neg"] = vader_neg
    result_df["vader_neu"] = vader_neu
    result_df["vader_pos"] = vader_pos
    result_df["vader_comp"] = vader_comp
    result_df["vader_label"] = vader_label
    return result_df
    

In [96]:
final_df = get_vader_sentiment(df_clean, "title")

In [97]:
final_df

Unnamed: 0,title,publishedAt,url,content,vader_neg,vader_neu,vader_pos,vader_comp,vader_label
1,Key remarks from Russian c.bank governor after...,2022-02-11T12:26:00Z,https://www.reuters.com/markets/currencies/key...,"MOSCOW, Feb 11 (Reuters) - The Russian Central...",0.000,1.000,0.000,0.0000,0
2,LIVE MARKETS Forex: worrying about an inverted...,2022-02-18T12:33:00Z,https://www.reuters.com/markets/stocks/live-ma...,Feb 18 - Welcome to the home for real-time cov...,0.211,0.789,0.000,-0.3400,-1
3,Dollar takes a break from this week's slide ah...,2022-02-03T02:18:00Z,https://www.reuters.com/markets/europe/dollar-...,"SINGAPORE, Feb 3 (Reuters) - The dollar found ...",0.000,1.000,0.000,0.0000,0
4,Euro bounce pauses ahead of U.S. inflation - R...,2022-02-08T01:16:00Z,https://www.reuters.com/business/euro-bounce-p...,"SINGAPORE, Feb 8 (Reuters) - A resurgent euro ...",0.000,1.000,0.000,0.0000,0
5,Dollar near two-week high amid jitters over ha...,2022-01-25T01:52:00Z,https://www.reuters.com/markets/europe/dollar-...,"TOKYO, Jan 25 (Reuters) - The safe-haven U.S. ...",0.197,0.803,0.000,-0.4019,-1
...,...,...,...,...,...,...,...,...,...
2171,Is First Trust Mid Cap Growth AlphaDEX ETF (FN...,2022-02-28T11:20:07Z,http://www.zacks.com/stock/news/1874180/is-fir...,"A smart beta exchange traded fund, the First T...",0.000,0.521,0.479,0.8481,1
2172,Should First Trust Mid Cap Growth AlphaDEX ETF...,2022-02-15T11:20:08Z,http://www.zacks.com/stock/news/1867522/should...,If you're interested in broad exposure to the ...,0.000,0.670,0.330,0.7096,1
2173,Should iShares Morningstar SmallCap Growth ETF...,2022-02-28T11:20:09Z,http://www.zacks.com/stock/news/1874172/should...,"Launched on 06/28/2004, the iShares Morningsta...",0.000,0.809,0.191,0.3818,1
2174,Comcast vs. DISH Network: Which Stock is a Bet...,2022-03-02T20:28:25Z,https://stocknews.com/news/cmcsa-dish-comcast-...,"With more people choosing to cut the cord, cab...",0.000,0.734,0.266,0.4404,1


## Export of labelled dataset

In [99]:
# Export the labelled dataset
final_df.to_excel("../../data/raw/news/newsapiorg_labelling_v2.xlsx")