In [1]:
import ccxt
import pandas as pd
from dateutil import parser
from datetime import datetime, timezone, timedelta
from time import sleep
from tqdm import tqdm
import os

# ---------- CONFIG ----------
EXCHANGE = "binance"  # e.g., binance, coinbase, kraken, bybit (spot)
SYMBOLS  = ["BTC/USDT", "ETH/USDT"]   # add more as needed
TIMEFRAME = "1h"       # '1m','5m','15m','1h','4h','1d'
SINCE     = "2019-01-01T00:00:00Z"  # earliest timestamp to fetch (UTC ISO8601)
SAVE_DIR  = "ohlcv_raw"
RATELIMIT_SLEEP = 0.2  # seconds between requests
# ----------------------------

os.makedirs(SAVE_DIR, exist_ok=True)

# Init exchange (no API key needed for public OHLCV)
ex = getattr(ccxt, EXCHANGE)({
    "enableRateLimit": True,
    "options": {"adjustForTimeDifference": True}
})

def iso_to_ms(iso: str) -> int:
    return int(parser.isoparse(iso).timestamp() * 1000)

def fetch_ohlcv_all(symbol: str, timeframe: str, since_iso: str, ex: ccxt.Exchange) -> pd.DataFrame:
    """
    Paginate OHLCV safely from `since_iso` -> now.
    Returns UTC-indexed DataFrame with columns: open, high, low, close, volume, symbol, exchange.
    """
    since_ms = iso_to_ms(since_iso)
    limit = ex.fetch_ohlcv_limit(timeframe) if hasattr(ex, "fetch_ohlcv_limit") else 1000
    if not limit:
        limit = 1000

    rows = []
    pbar_desc = f"{symbol} {timeframe}"
    last_ts = None

    with tqdm(total=0, desc=pbar_desc) as pbar:
        while True:
            data = ex.fetch_ohlcv(symbol, timeframe=timeframe, since=since_ms, limit=limit)
            if not data:
                break

            # Append and advance `since` by the last candle’s time
            rows.extend(data)
            last_ts = data[-1][0]
            since_ms = last_ts + ex.parse_timeframe(timeframe) * 1000

            # progress feedback
            pbar.set_postfix_str(datetime.fromtimestamp(last_ts/1000, tz=timezone.utc).isoformat())
            pbar.update(1)

            # stop when caught up
            if len(data) < limit:
                break

            sleep(RATELIMIT_SLEEP)

    if not rows:
        return pd.DataFrame(columns=["timestamp","open","high","low","close","volume","symbol","exchange"]).set_index("timestamp")

    df = pd.DataFrame(rows, columns=["ts","open","high","low","close","volume"])
    df["timestamp"] = pd.to_datetime(df["ts"], unit="ms", utc=True)
    df = df.drop(columns=["ts"]).set_index("timestamp").sort_index()
    df["symbol"] = symbol
    df["exchange"] = ex.id
    return df[["open","high","low","close","volume","symbol","exchange"]]

# Fetch all requested markets
all_dfs = []
for sym in SYMBOLS:
    try:
        df = fetch_ohlcv_all(sym, TIMEFRAME, SINCE, ex)
        if df.empty:
            print(f"[WARN] No data for {sym}")
            continue
        # Save
        base = sym.replace("/","_")
        csv_path = os.path.join(SAVE_DIR, f"{EXCHANGE}_{base}_{TIMEFRAME}.csv")
        pq_path  = os.path.join(SAVE_DIR, f"{EXCHANGE}_{base}_{TIMEFRAME}.parquet")
        df.to_csv(csv_path)
        df.to_parquet(pq_path)
        print(f"Saved: {csv_path}  ({len(df)} rows)")
        all_dfs.append(df)
    except Exception as e:
        print(f"[ERROR] {sym}: {e}")

# Combined frame (stacked)
if all_dfs:
    mkt_df = pd.concat(all_dfs).sort_index()
    display(mkt_df.tail())
else:
    mkt_df = pd.DataFrame()

BTC/USDT 1h: 59it [00:41,  1.43it/s, 2025-08-17T19:00:00+00:00]


Saved: ohlcv_raw/binance_BTC_USDT_1h.csv  (58040 rows)


ETH/USDT 1h: 59it [00:28,  2.05it/s, 2025-08-17T19:00:00+00:00]


Saved: ohlcv_raw/binance_ETH_USDT_1h.csv  (58040 rows)


Unnamed: 0_level_0,open,high,low,close,volume,symbol,exchange
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-08-17 17:00:00+00:00,117834.55,117934.82,117645.04,117900.0,212.98829,BTC/USDT,binance
2025-08-17 18:00:00+00:00,4523.91,4532.29,4427.44,4469.0,31555.654,ETH/USDT,binance
2025-08-17 18:00:00+00:00,117900.01,117947.98,117321.53,117542.01,360.32583,BTC/USDT,binance
2025-08-17 19:00:00+00:00,117542.01,117730.56,117468.0,117687.76,96.25389,BTC/USDT,binance
2025-08-17 19:00:00+00:00,4469.0,4481.28,4450.56,4479.64,9729.1464,ETH/USDT,binance
