In [1]:
# Initial Imports
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")

In [3]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [4]:
# Fetch all the news about Facebook Libra
# Multiple tickers passed as "ticker1 AND ticker2"
#ticker = input("Input ticker(s) here")
ticker = 'TSLA'
ticker_headlines = newsapi.get_everything(
    q=ticker,
    language="en",
    page_size=100,
    sort_by="relevancy"
)

In [5]:
# Print total articles
print(f"Total articles about this ticker: {ticker_headlines['totalResults']}")

Total articles about this ticker: 758


In [6]:
# Show sample article
ticker_headlines["articles"][0]

{'source': {'id': None, 'name': 'MarketBeat'},
 'author': 'Sam Quirke',
 'title': 'Tesla (NASDAQ: TSLA) Sets Off On A Fresh Rally',
 'description': 'They say you can’t keep a good stock down, and while the debate is still ongoing with regards to if Tesla (NASDAQ: TSLA) is a good stock or not, one...',
 'url': 'https://www.marketbeat.com/originals/tesla-nasdaq-tsla-sets-off-on-a-fresh-rally/',
 'urlToImage': 'https://assets.entrepreneur.com/providers/marketbeat/hero-image-marketbeat-422925.jpeg',
 'publishedAt': '2022-03-23T14:00:00Z',
 'content': 'They say you cant keep a good stock down, and while the debate is still ongoing with regards to if Tesla (NASDAQ: TSLA) is a good stock or not, one thing is for sure; it certainly cant be kept down l… [+4075 chars]'}

In [7]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/emilioacubero/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [8]:
# Create the ticker sentiment scores DataFrame
ticker_sentiments = []

for article in ticker_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        ticker_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass

In [9]:
# Create DataFrame
ticker_df = pd.DataFrame(ticker_sentiments)

In [10]:
# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
ticker_df = ticker_df[cols]

In [11]:
# Sort by date
ticker_df.sort_values('date', inplace = True)

In [12]:
# Show df
ticker_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
19,2022-03-01,"Cryptocurrency like bitcoin \r\n BTCUSD,\r\n +...",0.7964,0.224,0.0,0.776
80,2022-03-02,The Biden administration is treading carefully...,-0.2263,0.044,0.071,0.885
25,2022-03-02,Theo Wargo/Getty Images Entertainment\r\nOpeni...,0.4215,0.088,0.0,0.912
71,2022-03-02,What happened\r\nShares of electric car pionee...,0.6367,0.126,0.0,0.874
13,2022-03-02,"U.S. stocks posted sharp gains on Wednesday, a...",-0.128,0.066,0.08,0.854


In [13]:
# Count daily article frequency
ticker_df.date.value_counts()

2022-03-28    10
2022-03-10    10
2022-03-14     7
2022-03-03     6
2022-03-07     6
2022-03-15     4
2022-03-23     4
2022-03-22     4
2022-03-02     4
2022-03-30     4
2022-03-29     4
2022-04-01     4
2022-03-31     3
2022-03-11     3
2022-03-21     3
2022-03-25     3
2022-03-04     3
2022-03-17     2
2022-03-09     2
2022-03-08     2
2022-03-24     2
2022-03-26     2
2022-03-27     2
2022-03-18     2
2022-03-01     1
2022-03-13     1
2022-03-12     1
2022-04-02     1
Name: date, dtype: int64

In [14]:
# Aggregate scores per day
sentiment_analysis_df = ticker_df.groupby('date').sum()

In [15]:
sentiment_analysis_df.index.rename('Date', inplace = True)
sentiment_analysis_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-01,0.7964,0.224,0.0,0.776
2022-03-02,0.7039,0.324,0.151,3.525
2022-03-03,1.9527,0.595,0.075,5.328
2022-03-04,0.3236,0.094,0.04,2.866
2022-03-07,-0.5034,0.549,0.465,4.986
2022-03-08,-0.1807,0.16,0.154,1.686
2022-03-09,1.1932,0.308,0.0,1.692
2022-03-10,3.0842,1.207,0.345,8.448
2022-03-11,-1.1942,0.0,0.222,2.778
2022-03-12,-0.6705,0.0,0.2,0.8


In [16]:
csvpath = Path("./sentiment_analysis_df.csv")
sentiment_analysis_df.to_csv(csvpath)