In [29]:
import requests
import json
import os
from dotenv import load_dotenv
import pandas as pd
import datetime as dt
from tqdm import trange
from IPython.display import clear_output

load_dotenv()

True

In [30]:
TOKEN = os.getenv('TOKEN')
try:
    news = pd.read_csv("../data/News/news.csv")
except FileNotFoundError:
    news = pd.DataFrame(columns=["symbol","date","time","uuid","match","sentiment","content"])
API_site = "https://api.marketaux.com/v1/news/all"
others_setting = "filter_entities=true&language=en"
symbols = ["META", "AMZN", "AAPL", "NFLX", "GOOGL"]
stocknews = dict()

In [31]:
if news.shape[0] == 0:
    last_day = 0
else :
    last_day = news["date"].sort_values(ascending=True).iloc[0]
    last_day = dt.date.today() - dt.datetime.strptime(last_day,  "%Y-%m-%d").date()
    last_day = last_day.days
print(last_day)

641


In [32]:
def update_news(news, stocknews):
    news_dict = dict()
    for symbol, stocknew in stocknews.items():
        if "error" in stocknew:
            return False, news
        for new_info in stocknew["data"]:
            news_dict["symbol"] = symbol
            news_dict["uuid"] = uuid = new_info["uuid"]
            news_dict["content"] = new_info["title"]
            time = dt.datetime.strptime(new_info["published_at"], '%Y-%m-%dT%H:%M:%S.000000Z')
            news_dict["date"] = dt.datetime.strftime(time, "%Y-%m-%d")
            news_dict["time"] = dt.datetime.strftime(time, "%H:%M:%S")
            score = new_info["entities"][0]
            news_dict["match"] = score["match_score"]
            news_dict["sentiment"] = score["sentiment_score"]
            df_news_dict = pd.DataFrame([news_dict])
            if not news["uuid"].str.contains(uuid).any():
                news = pd.concat([news, df_news_dict], ignore_index=True)
    return True, news

In [33]:
day = last_day
while True:
    yesterday = dt.datetime.now() - dt.timedelta(day)
    yesterday = dt.datetime.strftime(yesterday, '%Y-%m-%d')
    clear_output(wait=True)
    print(yesterday)
    for symbol in symbols:
        reqstr = f'{API_site}?symbols={symbol}&{others_setting}&published_before={yesterday}&api_token={TOKEN}'
        data = requests.get(reqstr, timeout=5)
        stocknews[symbol] = json.loads(data.text)
    flag, news = update_news(news, stocknews)
    day += 1
    if not flag:
        break

In [34]:
print(news)
news.to_csv("../data/news.csv", index=0)

      symbol        date      time                                  uuid  \
0       META  2022-11-05  20:13:24  d5b8c1b1-a352-488a-99a0-98084f6a425f   
1       META  2022-11-05  18:02:00  186d471c-27a8-4e3d-a905-ab6982b32996   
2       META  2022-11-05  18:00:00  a6e7fdb2-80ff-46ea-bd07-53f5c6416111   
3       META  2022-11-05  16:49:01  5ff55fac-7139-46f1-bc32-30ca00379954   
4       META  2022-11-05  11:58:00  3291b5cf-5c58-4a63-a1c0-db633b9ba218   
...      ...         ...       ...                                   ...   
25273   NFLX  2021-01-22  14:55:08  2e8daa5d-b22e-43de-93b1-03ef99e57857   
25274   NFLX  2021-01-22  14:48:47  df63c55b-0ed3-432c-aa66-69911d9986e7   
25275   NFLX  2021-01-22  14:42:59  a3db3a06-5a21-4dd7-b42b-63fbfbd48a49   
25276   NFLX  2021-01-22  13:55:04  e9447691-d953-4025-8d04-2cdf8aaa0aa7   
25277   NFLX  2021-01-22  13:30:00  68dfe285-a462-4765-97f9-e261b177c24a   

           match  sentiment                                            content  
0     