In [4]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
from newsapi.newsapi_client import NewsApiClient

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\TribThapa\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [5]:
# Read your api key environment variable
news_api_key = os.getenv("NEWS_API_KEY")

print(f"News API Key type: {type(news_api_key)}")

News API Key type: <class 'str'>


In [7]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=news_api_key)

newsapi

<newsapi.newsapi_client.NewsApiClient at 0x1abccf5ce48>

In [8]:
# Fetch news about Canada and Blockchain in 2020 in the English language
query = ["AMZN", "AAPL", "COKE"]

query_articles_en = []

for i in query:
    
    news_query = newsapi.get_everything(q=i,
                                        language='en',
                                        sort_by='relevancy')
    query_articles_en.append(news_query)
    

# Show the total number of news
print(f"Total number of news articles for {query[0]} is {query_articles_en[0]['totalResults']}")
print()
print(f"Total number of news articles for {query[1]} is {query_articles_en[1]['totalResults']}")
print()
print(f"Total number of news articles for {query[2]} is {query_articles_en[2]['totalResults']}")

Total number of news articles for AMZN is 1022

Total number of news articles for AAPL is 902

Total number of news articles for COKE is 1013


In [9]:
# Function to create a dataframe for english news and french news
def create_df(news, language):

    df = pd.json_normalize(news["articles"])

    df_new = df[["title", "description", "content", "publishedAt"]]

    df_new.columns = ["Title", "Description", "Text", "Date"]

    language_list = []

    for x in range(len(df_new)):

        language_list.append(language)

    df_new["Language"] = language_list
    
    return df_new

In [10]:
# Create a DataFrame with the news in English
en_df = create_df(query_articles_en[1], 'en') 

en_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


Unnamed: 0,Title,Description,Text,Date,Language
0,What’s Next for Apple as it Forms a Key Resist...,A resistance line has formed in the chart of A...,This story originally appeared on StockNews\r\...,2021-07-29T13:50:23Z,en
1,Are Shares of Apple a Buy Under $150?,Tech giant Apple (AAPL) achieved double-digit ...,"August\r\n24, 2021\r\n5 min read\r\nThis story...",2021-08-24T13:01:42Z,en
2,"Breaking: Apple (AAPL) reports Q3 earnings, EP...",<ol><li>Breaking: Apple (AAPL) reports Q3 earn...,Information on these pages contains forward-lo...,2021-07-27T20:34:26Z,en
3,Amazon is delaying its return to office until ...,Amazon on Thursday pushed back its office retu...,(CNN Business)Amazon on Thursday pushed back i...,2021-08-05T19:20:19Z,en
4,New AAPL all-time high as it finally breaks $150,Yesterday’s close saw a new AAPL all-time high...,Yesterday’s close saw a new AAPL all-time high...,2021-08-17T12:01:43Z,en


In [13]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [14]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    
    all_dates = []    
    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    
    print(f"Fetching news about '{keyword}'")
    
    print("*" * 30)
    
    while date > end_date:
        
        print(f"retrieving news from: {date}")
        
        articles = newsapi.get_everything(q=keyword,
                                          from_param=str(date)[:10],
                                          to=str(date)[:10],
                                          language="en",
                                          sort_by="relevancy",
                                          page=1)
        headlines = []
        
        for i in range(0, len(articles["articles"])):
            
            headlines.append(articles["articles"][i]["title"])
            
        all_headlines.append(headlines)
        
        all_dates.append(date)
        
        date = date - timedelta(days=1)
        
    return all_headlines, all_dates

In [17]:
# Get first topic
current_date = '2021-08-26 00:00:00'
past_date = '2016-01-01 00:00:00'

apple_news = get_headlines('apple')

Fetching news about 'apple'
******************************
retrieving news from: 2021-08-26 00:00:00
retrieving news from: 2021-08-25 00:00:00
retrieving news from: 2021-08-24 00:00:00
retrieving news from: 2021-08-23 00:00:00
retrieving news from: 2021-08-22 00:00:00
retrieving news from: 2021-08-21 00:00:00
retrieving news from: 2021-08-20 00:00:00
retrieving news from: 2021-08-19 00:00:00
retrieving news from: 2021-08-18 00:00:00
retrieving news from: 2021-08-17 00:00:00
retrieving news from: 2021-08-16 00:00:00
retrieving news from: 2021-08-15 00:00:00
retrieving news from: 2021-08-14 00:00:00
retrieving news from: 2021-08-13 00:00:00
retrieving news from: 2021-08-12 00:00:00
retrieving news from: 2021-08-11 00:00:00
retrieving news from: 2021-08-10 00:00:00
retrieving news from: 2021-08-09 00:00:00
retrieving news from: 2021-08-08 00:00:00
retrieving news from: 2021-08-07 00:00:00
retrieving news from: 2021-08-06 00:00:00
retrieving news from: 2021-08-05 00:00:00
retrieving news f

NewsAPIException: {'status': 'error', 'code': 'parameterInvalid', 'message': 'You are trying to request results too far in the past. Your plan permits you to request articles as far back as 2021-07-25, but you have requested 2021-07-25. You may need to upgrade to a paid plan.'}