In [1]:
# Initial Imports
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")

In [3]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [4]:
# Fetch all the news about Facebook Libra
# Multiple tickers passed as "ticker1 AND ticker2"
ticker = input("Input ticker(s) here")
ticker_headlines = newsapi.get_everything(
    q=ticker,
    language="en",
    page_size=100,
    sort_by="relevancy"
)

Input ticker(s) here MSFT


In [5]:
# Print total articles
print(f"Total articles about this ticker: {ticker_headlines['totalResults']}")

Total articles about this ticker: 503


In [6]:
# Show sample article
ticker_headlines["articles"][0]

{'source': {'id': None, 'name': 'StockNews.com'},
 'author': 'Jaimini Desai',
 'title': '3 Dividend Growth Stocks to Buy During This Market Correction',
 'description': 'One characteristic of a high-quality stock is a consistent track record of hiking dividends. This indicates a strong business that can thrive in all types of economic conditions, and a...',
 'url': 'https://stocknews.com/news/msft-cost-abbv-3-dividend-growth-stocks-to-buy-during-this-market-correction/',
 'urlToImage': 'https://assets.entrepreneur.com/providers/stocknews/hero-image-stocknews-422729.jpeg',
 'publishedAt': '2022-03-21T13:14:05Z',
 'content': 'One characteristic of a high-quality stock is a consistent track record of hiking dividends. This indicates a strong business that can thrive in all types of economic conditions, and a management tea… [+7647 chars]'}

In [7]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/joseph/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [8]:
# Create the ticker sentiment scores DataFrame
ticker_sentiments = []

for article in ticker_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        ticker_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass

In [9]:
# Create DataFrame
ticker_df = pd.DataFrame(ticker_sentiments)

In [10]:
# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
ticker_df = ticker_df[cols]

In [13]:
ticker_df.sort_values('date', inplace = True)

In [14]:
ticker_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
59,2022-02-28,Microsoft(NASDAQ:MSFT) has turned out to be a ...,0.891,0.318,0.0,0.682
63,2022-02-28,"In this clip from ""The Future of Fintech"" on M...",-0.7003,0.0,0.146,0.854
71,2022-03-01,LONDON--(BUSINESS WIRE)--The Future Markets Re...,0.5994,0.149,0.0,0.851
24,2022-03-01,Apple (AAPL) said Tuesday that it has stopped ...,-0.2263,0.0,0.053,0.947
17,2022-03-01,As Zoom Video Inc.s skyrocketing growth rate s...,0.3818,0.075,0.0,0.925


In [30]:
ticker_df.date.value_counts()

2022-03-09    8
2022-03-08    8
2022-03-03    7
2022-03-18    6
2022-03-04    6
2022-03-23    6
2022-03-10    5
2022-03-26    5
2022-03-21    5
2022-03-02    4
2022-03-07    4
2022-03-28    3
2022-03-17    3
2022-03-22    3
2022-03-13    3
2022-03-24    3
2022-03-14    3
2022-03-15    3
2022-03-01    3
2022-03-05    2
2022-03-06    2
2022-03-12    2
2022-03-20    2
2022-02-28    2
2022-03-19    1
2022-03-11    1
Name: date, dtype: int64

In [None]:
def daily_agg(df):
    for date in row:
        filtered_df = df.loc[(df['date'] >= '2020-09-01')
                     & (df['date'] < '2020-09-15')]