In [1]:
import os
import math
import pandas as pd
from pathlib import Path
import shutil                     

import datetime as dt
from datetime import datetime, timedelta

from eodhd import APIClient

import time
import requests
import pytz
import glob


In [2]:
###############################################################################
# 1 · helper – split the full span into equal-length chunks                   #
###############################################################################
def date_chunks(start: dt.date, end: dt.date, chunk_len: int):
    """
    Yields (chunk_start, chunk_end) inclusive date tuples no longer than chunk_len.
    """
    cur = start
    while cur <= end:
        nxt = cur + dt.timedelta(days=chunk_len - 1)
        yield (cur, min(nxt, end))
        cur = nxt + dt.timedelta(days=1)


In [3]:
def safe_get(url, tries=5, pause=2):
    """Attempts to perform a GET request with retries."""
    for k in range(tries):
        try:
            r = requests.get(url, timeout=60)
            r.raise_for_status()
            return r
        except requests.RequestException as e:
            print(f"  ⚠️  Error: {e} (retry {k+1}/{tries})")
            if k == tries - 1:
                raise
            time.sleep(pause * (k + 1))  # Exponential-ish back-off


In [4]:
###############################################################################
# 2 · one call = one CSV                                                      #
###############################################################################
def fetch_chunk(stock, d0, d1, api_key, out_dir):
    """Fetches one 1-minute chunk and saves it to disk; returns Path."""
    et = pytz.timezone("US/Eastern")
    start_et = et.localize(dt.datetime.combine(d0, dt.time()))
    end_et   = et.localize(dt.datetime.combine(d1 + dt.timedelta(days=1), dt.time()))
    url = (
        f"https://eodhd.com/api/intraday/{stock}.US"
        f"?interval=1m&api_token={api_key}&fmt=json"
        f"&from={int(start_et.astimezone(pytz.utc).timestamp())}"
        f"&to={int(end_et.astimezone(pytz.utc).timestamp())}"
    )
    fname = f"{stock}_{d0:%Y%m%d}_{d1:%Y%m%d}.csv"
    path  = Path(out_dir) / fname
    print(f"  → downloading chunk {fname}")
    
    # Use safe_get if desired. Here use requests.get() directly.
    response = requests.get(url, timeout=60)
    response.raise_for_status()
    pd.DataFrame(response.json()).to_csv(path, index=False)
    return path

In [5]:
###############################################################################
# 3 · wrapper for ONE ticker                                                  #
###############################################################################
def download_and_merge(stock, api_key, start_date, chunk_days, output_dir):
    
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    archive_dir = output_dir / "old"
    archive_dir.mkdir(exist_ok=True)
    
    start = dt.datetime.strptime(start_date, "%Y-%m-%d").date()
    today = dt.date.today()
    
    # Construct final merged file name.
    final_name = f"{stock}_{start:%Y%m%d}_{today:%Y%m%d}.csv"
    final_path = output_dir / final_name
    if final_path.exists():
        print(f"Merged file already exists for {stock} at {final_path}. Skipping.")
        return
    
    # --- Step A · download every chunk --------------------------------------
    csv_paths = []
    for d0, d1 in date_chunks(start, today, chunk_days):
        try:
            p = fetch_chunk(stock, d0, d1, api_key, output_dir)
            csv_paths.append(p)
            time.sleep(1) # small pause between calls if needed
        except Exception as e:
            print(f"Chunk {d0}–{d1} failed:", e)
    
    if not csv_paths:
        print("No data downloaded for", stock)
        return
    
    # --- Step B · merge chronologically -------------------------------------
    dfs = []
    for p in sorted(csv_paths):
        try:
            df = pd.read_csv(p, parse_dates=["datetime"])
            df.set_index("datetime", inplace=True)
            dfs.append(df)
        except Exception as e:
            print(f"Error reading {p}: {e}")
    if not dfs:
        print("No chunk files parsed for", stock)
        return

    merged = pd.concat(dfs).sort_index()
    merged.to_csv(final_path)
    print("Merged file saved to", final_path)
    
    # --- Step C · archive chunk files ---------------------------------------
    for p in csv_paths:
        shutil.move(str(p), str(archive_dir / p.name))
    print(f"Archived {len(csv_paths)} chunk files for {stock}.")

In [6]:
api_key = os.getenv('EODHD_API_KEY')

stocks = ['SPY', 'AAPL', 'MSFT', 'NVDA', 'TSLA', 'QQQ']

start_date = "2019-01-01"

max_allow_days = 120

output_dir="Intraday stocks"

In [None]:
###############################################################################
# =============================== run ========================================#
###############################################################################

for s in stocks:
    print(f"\n=== {s} ===")
    download_and_merge(s, api_key=api_key, start_date=start_date, chunk_days=max_allow_days, output_dir=output_dir)




=== SPY ===
Merged file already exists for SPY at Intraday stocks/SPY_20190101_20250619.csv. Skipping.

=== AAPL ===
  → downloading chunk AAPL_20190101_20190430.csv
  → downloading chunk AAPL_20190501_20190828.csv
  → downloading chunk AAPL_20190829_20191226.csv
  → downloading chunk AAPL_20191227_20200424.csv
