In [None]:
!pip install gnews
from gnews import GNews

Collecting gnews
  Downloading gnews-0.3.6-py3-none-any.whl (16 kB)
Collecting feedparser~=6.0.2 (from gnews)
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bs4~=0.0.1 (from gnews)
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting beautifulsoup4~=4.9.3 (from gnews)
  Downloading beautifulsoup4-4.9.3-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.8/115.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymongo~=3.12.0 (from gnews)
  Downloading pymongo-3.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (517 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.1/517.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dnspython~=1.16.0 (from gnews)
  Downloading dnspython-1.16.0-py2.py



Functions to Get News Data

In [None]:
def get_news(month, day):
  google_news = GNews()
  google_news.max_results = 10
  google_news.period = '12h'  # News from last 7 days
  google_news.country = 'United States'
  google_news.language = 'english'
  google_news.start_date = (2022, month, day)
  if day != 31:
    google_news.end_date = (2022, month, day+1)
  else:
    google_news.end_date = (2023, 1, 1)
  json_resp = google_news.get_news('Tesla')
  return json_resp


def parse_news(response):
  info = []
  months = {"Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06",
          "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"}

  for i in range(len(response)):
    string_date = response[i]['published date']
    separated_string_date = string_date.split(" ")
    month, day = months[separated_string_date[2]], separated_string_date[1]
    article = {"Title" : response[i]['title'], "Date" : f"{month}/{day}/2022"}
    info.append(article)
  return info



In [None]:
news_2022 = []

In [None]:
days_in_month = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
                 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}

for i in range(12, 13):
  for j in range(1, days_in_month[i]):
    response = get_news(i, j)
    news = parse_news(response)
    news_2022.append(news)


Period (12h) will be ignored in favour of the start and end dates
  json_resp = google_news.get_news('Tesla')


In [None]:
# news_2022

Convert News to Dataframe and Sort

In [None]:
import pandas as pd

news_2022_flat = [article for month_news in news_2022 for article in month_news]
df_2022 = pd.DataFrame(news_2022_flat)
df_2022['Date'] = pd.to_datetime(df_2022['Date'], format='%m/%d/%Y')
df_2022_sorted = df_2022.sort_values(by='Date')
print(df_2022_sorted)


                                                  Title       Date
1     First Tesla electric car supercharger station ... 2022-01-01
2     Lucid and Rivian are future of sexy electric c... 2022-01-01
3     Tesla Model 3, Y prices hiked in China for rea... 2022-01-01
4     How to Fix Tesla Model 3/Y Heater Not Working ... 2022-01-01
5     Tesla Model Q Looks Like a More Practical Mode... 2022-01-01
...                                                 ...        ...
3360  Why one green-focused investor has 'no interes... 2022-12-30
3361  Opinion | Elon Musk’s Antics Hurt Tesla’s Demo... 2022-12-30
3362  Tesla Offering $7500 Off Model S and Model X U... 2022-12-30
3363  Do You Really Need a Single-Stock Tesla ETF? -... 2022-12-30
3365  Tesla supplier Panasonic seeks to balance US a... 2022-12-30

[3366 rows x 2 columns]


In [None]:
df_2022_sorted.to_csv("Headlines_2022.csv")

NLP Sentiment Analysis

In [None]:
!pip install transformers



In [None]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline
import nltk

In [None]:
nltk.download('vader_lexicon')
vader_analyzer = SentimentIntensityAnalyzer()

def get_sentiment_score_textblob(headline):
    analysis = TextBlob(headline)
    return analysis.sentiment.polarity

df_vader = df_2022_sorted.copy()
df_textblob = df_2022_sorted.copy()

df_vader['Sentiment_VADER'] = df_vader['Title'].apply(lambda x: vader_analyzer.polarity_scores(x)['compound'])
df_textblob['Sentiment_TextBlob'] = df_textblob['Title'].apply(get_sentiment_score_textblob)

df_sentiment_2022 = pd.DataFrame({
    'Title': df_2022_sorted['Title'],
    'Date': df_2022_sorted['Date'],
    'Sentiment_VADER': df_vader['Sentiment_VADER'],
    'Sentiment_TextBlob': df_textblob['Sentiment_TextBlob'],
})

df_sentiment_2022['Sentiment_Average'] = df_sentiment_2022[['Sentiment_VADER', 'Sentiment_TextBlob']].mean(axis=1)

print(df_sentiment_2022)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                                  Title       Date  \
1     First Tesla electric car supercharger station ... 2022-01-01   
2     Lucid and Rivian are future of sexy electric c... 2022-01-01   
3     Tesla Model 3, Y prices hiked in China for rea... 2022-01-01   
4     How to Fix Tesla Model 3/Y Heater Not Working ... 2022-01-01   
5     Tesla Model Q Looks Like a More Practical Mode... 2022-01-01   
...                                                 ...        ...   
3360  Why one green-focused investor has 'no interes... 2022-12-30   
3361  Opinion | Elon Musk’s Antics Hurt Tesla’s Demo... 2022-12-30   
3362  Tesla Offering $7500 Off Model S and Model X U... 2022-12-30   
3363  Do You Really Need a Single-Stock Tesla ETF? -... 2022-12-30   
3365  Tesla supplier Panasonic seeks to balance US a... 2022-12-30   

      Sentiment_VADER  Sentiment_TextBlob  Sentiment_Average  
1              0.0000            0.250000           0.125000  
2              0.5267            

In [None]:
df_sentiment_2022.head(5000)

NameError: ignored

In [None]:
df_sentiment_2022.to_csv("Headlines2022_Sentiment_Analysis.csv")