## Notebook 00 – Data Pull

This notebook pulls and exports the data required for this project from EODHD. 

**Note:** To run this notebook, you must set your EODHD API key in `src/config.py`.

Replace the following line in `config.py`:

    API_KEY = ""  # Add your actual key here

Alternatively, you can store the key as an environment variable and update `config.py` like this:

    import os
    API_KEY = os.getenv("EODHD_API_KEY")

### Step 0 - Import packages and functions

In [None]:
import sys, os
sys.path.append("../src") 
sys.path.append("../") 
import pandas as pd
import requests
import time

# Helper functions to pull data
from data_fetcher import fetch_and_save_price

# Import global variables
from config import API_KEY, BASE_URL, DATE_FROM, DATE_TO, OUTPUT_DIR

### Step 1 - Assemble global parameters from config

In [None]:
# Assemble config dictionary required by the fetch function
config = {
    "API_KEY": API_KEY,
    "BASE_URL": BASE_URL,
    "DATE_FROM": DATE_FROM,
    "DATE_TO": DATE_TO,
    "OUTPUT_DIR": OUTPUT_DIR
}

### Step 2 - Create suffix mapping

In [None]:
# Create suffix mapping to account for EODHD naming convention
suffix_map = {
    'sp500.csv': '.US',        # US – Covers both NASDAQ and NYSE
}

### Step 3 - Create data folder if needed

In [None]:
os.makedirs(OUTPUT_DIR, exist_ok=True) # Creates the /data/raw_prices folder if it doesn't exist

### Step 4 - Get indexes

In [None]:
# Initialize dictionary to hold DataFrames for each exchange file
index_dfs = {}

# Define path to directory containing the raw CSV files
path = '../data'

# Iterate through all files in the directory
for file in os.listdir(path):
    # Only process CSV files that are in the predefined suffix_map
    if file.endswith('.csv') and file in suffix_map:
        # Load CSV into DataFrame with specific encoding and data handling
        index_df = pd.read_csv(
            os.path.join(path, file),
            encoding='utf-8-sig',            # Handles BOM markers in some CSVs
            dtype={'ticker': str},           # Ensure ticker column is read as string
            keep_default_na=False            # Prevent default NA parsing (preserve blanks)
        )

        # Retrieve the ticker suffix associated with the exchange file
        suffix = suffix_map[file]

        # Clean ticker field by removing extra whitespace
        index_df['ticker'] = index_df['ticker'].str.strip()

        # Append exchange suffix to match EODHD ticker format, if not already present
        index_df['eodhd_ticker'] = index_df['ticker'].apply(
            lambda x: x if x.endswith(suffix) else x + suffix
        )

        # Store the processed DataFrame in the dictionary
        index_dfs[file] = index_df

### Step 5 - Get tickers

In [None]:
# Define list of tickers to fetch data for (example only)
tickers = [] 

for index_name in index_dfs.keys(): # loop through all indexes
    index_df = index_dfs[index_name] # Set index_df
    for ticker in index_df['eodhd_ticker']: # Lopp through all tickers in index_df
        tickers.append(ticker) # Append ticker to tickers list

### Step 6 - Pull and export data

In [None]:
# Iterate through each ticker and download historical price data
for ticker in tickers:
    fetch_and_save_price(ticker, config) # Call helper function
    time.sleep(1.2)  # Respect rate limits