In [None]:
import eikon as ek
import pandas as pd

import json
import os
import sys
from dotenv import load_dotenv

load_dotenv()
ek.set_app_key(os.getenv('EIKON_API_KEY'))
REPO_PATH: str = os.getenv('REPO_PATH')

sys.path.insert(0, rf'{REPO_PATH}src_HF')
from utils import *


### Gather data with EIKON, building on existing file

In [None]:
TOPIC: str = 'CWP'  # the topic to download the news for

headline_df: pd.DataFrame = pd.read_csv(rf'{REPO_PATH}data\raw_news_headlines\EIKON_{TOPIC}_NEWS.csv')  # load the headlines

previous_stories: dict[str, str] = load_previous_stories(TOPIC)  # load the existing stories

existing_ids = previous_stories.keys()  # get all story ids already downloaded

storie_ids: pd.Series = headline_df[~headline_df['storyId'].isin(existing_ids)]['storyId']  # get all story ids not in the existing_ids series

print(f'Number of stories alredy downloaded/failed: ({len(existing_ids)}/{len(headline_df)})')

new_dict: dict[str, str] = extract_stories(storie_ids)  # download the stories

previous_stories.update(new_dict)  # update the text_dict with the new_dict

with open(rf'{REPO_PATH}data\raw_news_stories\EIKON_{TOPIC}_NEWS_FULL.json', 'w') as file:
    json.dump(previous_stories, file, indent=2)  # save the updated text_dict


### Retry failed requests

In [None]:
previous_stories: dict[str, str] = load_previous_stories(TOPIC)

error_ids: list[str] = [k for k, v in previous_stories.items() if v == 'error']

print(f'Number of stories with errors: {len(error_ids)}')

new_dict: dict[str, str] = extract_stories(error_ids)  # download the stories

previous_stories.update(new_dict)  # update the text_dict with the new_dict

with open(rf'{REPO_PATH}data\raw_news_stories\EIKON_{TOPIC}_NEWS_FULL.json', 'w') as file:
    json.dump(previous_stories, file, indent=2)  # save the updated text_dict