In [19]:
import requests
import json
import os
from dotenv import load_dotenv
import pandas as pd
import datetime as dt
from tqdm import trange
from IPython.display import clear_output
import time

load_dotenv()

True

In [20]:
TOKEN = os.getenv('TOKEN')
try:
    news = pd.read_csv("../data/News/news.csv")
except FileNotFoundError:
    news = pd.DataFrame(columns=["symbol","date","time","uuid","match","sentiment","content"])
API_site = "https://api.marketaux.com/v1/news/all"
others_setting = "filter_entities=true&language=en"
symbols = ["META", "AMZN", "AAPL", "NFLX", "GOOGL"]
stocknews = dict()

In [21]:
if news.shape[0] == 0:
    last_day = 0
else :
    last_day = news["date"].sort_values(ascending=True).iloc[0]
    last_day = dt.date.today() - dt.datetime.strptime(last_day,  "%Y-%m-%d").date()
    last_day = last_day.days
print(last_day)

675


In [22]:
def update_news(news, stocknews):
    news_dict = dict()
    for symbol, stocknew in stocknews.items():
        if "error" in stocknew:
            return False, news
        for new_info in stocknew["data"]:
            news_dict["symbol"] = symbol
            news_dict["uuid"] = uuid = new_info["uuid"]
            news_dict["content"] = new_info["title"]
            time = dt.datetime.strptime(new_info["published_at"], '%Y-%m-%dT%H:%M:%S.000000Z')
            news_dict["date"] = dt.datetime.strftime(time, "%Y-%m-%d")
            news_dict["time"] = dt.datetime.strftime(time, "%H:%M:%S")
            score = new_info["entities"][0]
            news_dict["match"] = score["match_score"]
            news_dict["sentiment"] = score["sentiment_score"]
            df_news_dict = pd.DataFrame([news_dict])
            if not news["uuid"].str.contains(uuid).any():
                news = pd.concat([news, df_news_dict], ignore_index=True)
    return True, news

In [27]:
day = last_day
while True:
    yesterday = dt.datetime.now() - dt.timedelta(day)
    yesterday = dt.datetime.strftime(yesterday, '%Y-%m-%d')
    clear_output(wait=True)
    for symbol in symbols:
        reqstr = f'{API_site}?symbols={symbol}&{others_setting}&published_before={yesterday}&api_token={TOKEN}'
        data = requests.get(reqstr, timeout=5)
        stocknews[symbol] = json.loads(data.text)
    flag, news = update_news(news, stocknews)
    print(yesterday)
    print(stocknews)
    day += 1
    time.sleep(0.5)
    if not flag:
        break

2021-01-01
{'META': {'meta': {'found': 0, 'returned': 0, 'limit': 20, 'page': 1}, 'data': []}, 'AMZN': {'meta': {'found': 0, 'returned': 0, 'limit': 20, 'page': 1}, 'data': []}, 'AAPL': {'meta': {'found': 0, 'returned': 0, 'limit': 20, 'page': 1}, 'data': []}, 'NFLX': {'meta': {'found': 0, 'returned': 0, 'limit': 20, 'page': 1}, 'data': []}, 'GOOGL': {'meta': {'found': 0, 'returned': 0, 'limit': 20, 'page': 1}, 'data': []}}


In [24]:
print(stocknews)

{'META': {'error': {'code': 'rate_limit_reached', 'message': 'The rate limit for this account has been reached.'}}, 'AMZN': {'error': {'code': 'rate_limit_reached', 'message': 'The rate limit for this account has been reached.'}}, 'AAPL': {'error': {'code': 'rate_limit_reached', 'message': 'The rate limit for this account has been reached.'}}, 'NFLX': {'error': {'code': 'rate_limit_reached', 'message': 'The rate limit for this account has been reached.'}}, 'GOOGL': {'error': {'code': 'rate_limit_reached', 'message': 'The rate limit for this account has been reached.'}}}


In [25]:
print(news)
news.to_csv("../data/News/news.csv", index=0)

      symbol        date      time                                  uuid  \
0       META  2022-11-05  20:13:24  d5b8c1b1-a352-488a-99a0-98084f6a425f   
1       META  2022-11-05  18:02:00  186d471c-27a8-4e3d-a905-ab6982b32996   
2       META  2022-11-05  18:00:00  a6e7fdb2-80ff-46ea-bd07-53f5c6416111   
3       META  2022-11-05  16:49:01  5ff55fac-7139-46f1-bc32-30ca00379954   
4       META  2022-11-05  11:58:00  3291b5cf-5c58-4a63-a1c0-db633b9ba218   
...      ...         ...       ...                                   ...   
25351   AAPL  2021-01-03  19:01:04  1010b8bc-0105-4ed9-b56b-31533c81fdd0   
25352   AAPL  2021-01-02  16:25:30  6f6996d1-a498-4e8f-bed0-62572a15c2cc   
25353   AAPL  2021-01-02  13:47:42  766027ce-309e-475d-83d2-1966cdad2c86   
25354   AAPL  2021-01-01  15:09:03  184fc4c7-906d-4b99-a79a-2b64135dadf4   
25355   AAPL  2021-01-01  11:35:35  6bb734eb-11ef-40f0-b839-022622434f4c   

           match  sentiment                                            content  
0     