# Instructor Do: Intro to VADER Sentiment

In [1]:
# Initial imports
from dotenv import load_dotenv
import os
from path import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/Andrew/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
# Load environment variables and read the News API key enviroment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")



In [4]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)



In [5]:
# Fetch all the news about Facebook Libra
libra_headlines = newsapi.get_everything(
    q="facebook AND libra",
    language="en",
    page_size=100,
    sort_by="relevancy"
)

# Print total articles
print(f"Total articles about Facebook Libra: {libra_headlines['totalResults']}")

# Show sample article
libra_headlines["articles"][0]



Total articles about Facebook Libra: 98


{'source': {'id': 'reuters', 'name': 'Reuters'},
 'author': 'Huw Jones',
 'title': 'Regulators plot path for cross-border payments to counter Facebook - Reuters',
 'description': 'Streamlining anti-money laundering checks, longer central bank opening hours and linking national systems would cut cross-border payment costs, global regulators said on Monday.',
 'url': 'https://www.reuters.com/article/us-g20-payments-idUSKCN24E2NQ',
 'urlToImage': 'https://s3.reutersmedia.net/resources/r/?m=02&d=20200713&t=2&i=1525571576&w=1200&r=LYNXNPEG6C1JO',
 'publishedAt': '2020-07-13T19:00:00Z',
 'content': 'LONDON (Reuters) - Streamlining anti-money laundering checks, longer central bank opening hours and linking national systems would cut cross-border payment costs, global regulators said on Monday. \r\n… [+1934 chars]'}

In [6]:
# Create the Facebook Libra sentiment scores DataFrame
libra_sentiments = []

for article in libra_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        libra_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
libra_df = pd.DataFrame(libra_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
libra_df = libra_df[cols]

libra_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-07-13,LONDON (Reuters) - Streamlining anti-money lau...,-0.2732,0.0,0.075,0.925
1,2020-07-02,VILNIUS (Reuters) - Lithuania is about to issu...,0.0,0.0,0.0,1.0
2,2020-06-24,SAO PAULO/BRASILIA (Reuters) - Brazil’s centra...,-0.0516,0.091,0.097,0.812
3,2020-06-24,SAO PAULO/BRASILIA (Reuters) - Brazil’s centra...,-0.0516,0.091,0.097,0.812
4,2020-07-09,BRUSSELS (Reuters) - Global powers should coop...,-0.296,0.081,0.114,0.806


In [7]:
# Get descriptive stats from the DataFrame
libra_df.describe()



Unnamed: 0,compound,positive,negative,neutral
count,98.0,98.0,98.0,98.0
mean,0.120195,0.076031,0.047378,0.876612
std,0.429226,0.070121,0.065451,0.087655
min,-0.8779,0.0,0.0,0.629
25%,-0.0516,0.0,0.0,0.812
50%,0.0769,0.0795,0.0,0.8775
75%,0.472225,0.1085,0.094,0.9395
max,0.8718,0.308,0.291,1.0
