In [12]:
# Initial Imports

!pip install newsapi-python
!pip3 install python-dotenv 
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()

# Mount Google Drive for working with files
from google.colab import drive
drive.mount('/content/drive')

# Upload data to Colab
from google.colab import files

# Upload .env file (Must enable viewing of hidden files on your laptop)
csv_file = files.upload()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Saving .env to .env (1)


In [13]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")

In [14]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [15]:
# Fetch all the news about Facebook Libra
# Multiple tickers passed as "ticker1 AND ticker2"
#ticker = input("Input ticker(s) here")
ticker = 'TSLA'
ticker_headlines = newsapi.get_everything(
    q=ticker,
    language="en",
    page_size=100,
    sort_by="relevancy"
)

In [16]:
# Print total articles
print(f"Total articles about this ticker: {ticker_headlines['totalResults']}")

Total articles about this ticker: 770


In [17]:
# Show sample article
ticker_headlines["articles"][0]

{'author': 'Sam Quirke',
 'content': 'They say you cant keep a good stock down, and while the debate is still ongoing with regards to if Tesla (NASDAQ: TSLA) is a good stock or not, one thing is for sure; it certainly cant be kept down l… [+4075 chars]',
 'description': 'They say you can’t keep a good stock down, and while the debate is still ongoing with regards to if Tesla (NASDAQ: TSLA) is a good stock or not, one...',
 'publishedAt': '2022-03-23T14:00:00Z',
 'source': {'id': None, 'name': 'MarketBeat'},
 'title': 'Tesla (NASDAQ: TSLA) Sets Off On A Fresh Rally',
 'url': 'https://www.marketbeat.com/originals/tesla-nasdaq-tsla-sets-off-on-a-fresh-rally/',
 'urlToImage': 'https://assets.entrepreneur.com/providers/marketbeat/hero-image-marketbeat-422925.jpeg'}

In [18]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [19]:
# Create the ticker sentiment scores DataFrame
ticker_sentiments = []

for article in ticker_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        ticker_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass

In [20]:
# Create DataFrame
ticker_df = pd.DataFrame(ticker_sentiments)

In [21]:
# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
ticker_df = ticker_df[cols]

In [22]:
# Sort by date
ticker_df.sort_values('date', inplace = True)

In [23]:
# Show df
ticker_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
69,2022-03-04,"At long last, Tesla Giga Berlin has received e...",0.4767,0.094,0.0,0.906
35,2022-03-04,It was always going to be tricky for Rivian Au...,-0.1531,0.0,0.04,0.96
33,2022-03-04,To say this has been an eventful week has been...,0.0,0.0,0.0,1.0
55,2022-03-07,Tesla is the fifth most valuable stock in the ...,0.9513,0.359,0.0,0.641
75,2022-03-07,Theres nothing like sticker shock to make cons...,0.4721,0.147,0.06,0.793


In [24]:
# Count daily article frequency
ticker_df.date.value_counts()

2022-03-10    10
2022-03-28    10
2022-03-14     8
2022-03-07     6
2022-04-04     5
2022-04-01     5
2022-03-31     4
2022-03-30     4
2022-03-29     4
2022-03-23     4
2022-03-22     4
2022-03-15     4
2022-03-04     3
2022-03-11     3
2022-03-21     3
2022-03-25     3
2022-04-03     2
2022-03-08     2
2022-03-09     2
2022-03-17     2
2022-03-18     2
2022-03-26     2
2022-03-24     2
2022-03-27     2
2022-03-13     1
2022-03-12     1
2022-04-02     1
2022-03-16     1
Name: date, dtype: int64

In [25]:
# Aggregate scores per day
sentiment_analysis_df = ticker_df.groupby('date').sum()

In [27]:
# Rename Index
sentiment_analysis_df.index.rename('Date', inplace = True)
sentiment_analysis_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-04,0.3236,0.094,0.04,2.866
2022-03-07,-0.5224,0.506,0.465,5.029
2022-03-08,-0.1807,0.16,0.154,1.686
2022-03-09,1.1932,0.308,0.0,1.692
2022-03-10,3.0842,1.207,0.345,8.448
2022-03-11,-1.1942,0.0,0.222,2.778
2022-03-12,-0.6705,0.0,0.2,0.8
2022-03-13,-0.4019,0.082,0.153,0.765
2022-03-14,2.0231,0.693,0.109,7.198
2022-03-15,1.1417,0.319,0.083,3.598


In [28]:
csvpath = Path("./sentiment_analysis_df.csv")
sentiment_analysis_df.to_csv(csvpath)