In [1]:
import requests
import time
import datetime
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()
COINDESKAPIKEY = os.getenv("COINDESKAPIKEY") 


In [17]:
import requests 

response = requests.get('https://data-api.coindesk.com/spot/v1/historical/hours',
    params={"market":"binance","instrument":"BTC-USDT","limit":10,"aggregate":1,"fill":"true","apply_mapping":"true","response_format":"JSON","to_ts":1740990845,"api_key":"4d50d36f37210bb124c685ada8aebfc92e6d384c35245c9eeee20dcf2b7377a0"},
    headers={"Content-type":"application/json; charset=UTF-8"}
)

json_response = response.json()

In [2]:
def get_unix_timestamp(dt):
    """Convert a datetime object to a Unix timestamp (seconds)."""
    return int(dt.timestamp())

def fetch_hourly_data_batch(to_ts, limit=1500, market="binance", instrument="BTC-USDT"):
    """
    Fetch a batch of hourly data from the Coindesk API using the proper headers
    and parameters. This function returns the data from the "Data" key.
    """
    url = "https://data-api.coindesk.com/spot/v1/historical/hours"
    params = {
        "market": market,
        "instrument": instrument,
        "limit": limit,
        "aggregate": 1,           # hourly intervals
        "fill": "true",
        "apply_mapping": "true",
        "response_format": "JSON",
        "to_ts": to_ts,
        "api_key": COINDESKAPIKEY
    }
    headers = {"Content-type": "application/json; charset=UTF-8"}
    
    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()  # Raise an exception for HTTP errors
    data = response.json()
    # Return the array of records from the "Data" key per the schema
    return data.get("Data", [])

def fetch_hourly_data_npages():
    """
    Paginate backwards using the to_ts parameter for a limited number of pages.
    Here, we limit the calls to 3 pages. The pagination uses the "TIMESTAMP" field.
    """
    # Set the end time to now (UTC)
    end_dt = datetime.datetime.utcnow()
    end_ts = get_unix_timestamp(end_dt)
    print("End timestamp:", end_ts)
    
    all_records = []
    current_to_ts = end_ts
    max_pages = 9  # Limit to 6 API calls
    page_count = 0

    while page_count < max_pages:
        batch_data = fetch_hourly_data_batch(to_ts=current_to_ts)
        
        if not batch_data:
            print("No more data returned. Stopping pagination.")
            break
        
        all_records.extend(batch_data)
        page_count += 1
        
        # Using the first element as the earliest record based on the JSON schema
        earliest_record = batch_data[0]
        print("Earliest record on page:", earliest_record)
        earliest_timestamp = int(earliest_record["TIMESTAMP"])
        
        print(f"Page {page_count} retrieved, earliest timestamp in batch: {earliest_timestamp}")
        
        # Set the new to_ts to one hour before the earliest timestamp to avoid duplicates
        current_to_ts = earliest_timestamp - 3600
        time.sleep(1)  # Pause to avoid rate limiting
    
    df = pd.DataFrame(all_records)
    
    if df.empty:
        print("No data retrieved.")
    else:
        # Verify that the 'TIMESTAMP' column exists
        if "TIMESTAMP" not in df.columns:
            print("ERROR: The DataFrame does not have a 'TIMESTAMP' column. Columns are:", df.columns)
        else:
            # Convert the TIMESTAMP column to a datetime for easier interpretation
            df["datetime"] = pd.to_datetime(df["TIMESTAMP"], unit='s', utc=True)
            df.sort_values(by="datetime", inplace=True)
    
    return df




In [3]:
# Execute and display results
df_prices = fetch_hourly_data_npages()
if not df_prices.empty:
    print("Fetched data shape:", df_prices.shape)
    display(df_prices.head())
else:
    print("No data retrieved.")

End timestamp: 1743102654
Earliest record on page: {'UNIT': 'HOUR', 'TIMESTAMP': 1737705600, 'TYPE': '954', 'MARKET': 'binance', 'INSTRUMENT': 'BTCUSDT', 'MAPPED_INSTRUMENT': 'BTC-USDT', 'BASE': 'BTC', 'QUOTE': 'USDT', 'BASE_ID': 1, 'QUOTE_ID': 7, 'TRANSFORM_FUNCTION': '', 'OPEN': 105046.8, 'HIGH': 105650, 'LOW': 104922.41, 'CLOSE': 105500.76, 'FIRST_TRADE_TIMESTAMP': 1737705600, 'LAST_TRADE_TIMESTAMP': 1737709199, 'FIRST_TRADE_PRICE': 105046.79, 'HIGH_TRADE_PRICE': 105650, 'HIGH_TRADE_TIMESTAMP': 1737707419, 'LOW_TRADE_PRICE': 104922.41, 'LOW_TRADE_TIMESTAMP': 1737705928, 'LAST_TRADE_PRICE': 105500.76, 'TOTAL_TRADES': 167520, 'TOTAL_TRADES_BUY': 80877, 'TOTAL_TRADES_SELL': 86643, 'TOTAL_TRADES_UNKNOWN': 0, 'VOLUME': 938.64874, 'QUOTE_VOLUME': 98896263.6909673, 'VOLUME_BUY': 485.69586, 'QUOTE_VOLUME_BUY': 51176193.8757169, 'VOLUME_SELL': 452.95288, 'QUOTE_VOLUME_SELL': 47720069.8152504, 'VOLUME_UNKNOWN': 0, 'QUOTE_VOLUME_UNKNOWN': 0}
Page 1 retrieved, earliest timestamp in batch: 17377

Unnamed: 0,UNIT,TIMESTAMP,TYPE,MARKET,INSTRUMENT,MAPPED_INSTRUMENT,BASE,QUOTE,BASE_ID,QUOTE_ID,...,TOTAL_TRADES_UNKNOWN,VOLUME,QUOTE_VOLUME,VOLUME_BUY,QUOTE_VOLUME_BUY,VOLUME_SELL,QUOTE_VOLUME_SELL,VOLUME_UNKNOWN,QUOTE_VOLUME_UNKNOWN,datetime
12000,HOUR,1694505600,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1893.94969,48877650.0,846.27631,21845190.0,1047.67338,27032460.0,0,0,2023-09-12 08:00:00+00:00
12001,HOUR,1694509200,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1455.35759,37557830.0,719.08619,18558340.0,736.2714,18999490.0,0,0,2023-09-12 09:00:00+00:00
12002,HOUR,1694512800,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,6661.53232,174111900.0,3444.38486,90020640.0,3217.14746,84091240.0,0,0,2023-09-12 10:00:00+00:00
12003,HOUR,1694516400,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,2116.39977,55240470.0,923.61832,24109640.0,1192.78145,31130830.0,0,0,2023-09-12 11:00:00+00:00
12004,HOUR,1694520000,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1686.56984,44109990.0,753.80994,19717770.0,932.7599,24392210.0,0,0,2023-09-12 12:00:00+00:00


In [4]:
# Save to CSV if desired
df_prices.to_csv("../data/raw/btc_usdt_hourly.csv", index=False)