In [3]:
import requests
import time
import datetime
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()
COINDESKAPIKEY = os.getenv("COINDESKAPIKEY") 


In [4]:
import requests 

response = requests.get('https://data-api.coindesk.com/spot/v1/historical/hours',
    params={"market":"binance","instrument":"BTC-USDT","limit":10,"aggregate":1,"fill":"true","apply_mapping":"true","response_format":"JSON","to_ts":1740990845,"api_key":"4d50d36f37210bb124c685ada8aebfc92e6d384c35245c9eeee20dcf2b7377a0"},
    headers={"Content-type":"application/json; charset=UTF-8"}
)

json_response = response.json()

In [5]:
def get_unix_timestamp(dt):
    """Convert a datetime object to a Unix timestamp (seconds)."""
    return int(dt.timestamp())

def fetch_hourly_data_batch(to_ts, limit=1500, market="binance", instrument="BTC-USDT"):
    """
    Fetch a batch of hourly data from the Coindesk API using the proper headers
    and parameters. This function returns the data from the "Data" key.
    """
    url = "https://data-api.coindesk.com/spot/v1/historical/hours"
    params = {
        "market": market,
        "instrument": instrument,
        "limit": limit,
        "aggregate": 1,           # hourly intervals
        "fill": "true",
        "apply_mapping": "true",
        "response_format": "JSON",
        "to_ts": to_ts,
        "api_key": COINDESKAPIKEY
    }
    headers = {"Content-type": "application/json; charset=UTF-8"}
    
    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()  # Raise an exception for HTTP errors
    data = response.json()
    # Return the array of records from the "Data" key per the schema
    return data.get("Data", [])

def fetch_hourly_data_npages():
    """
    Paginate backwards using the to_ts parameter for a limited number of pages.
    Here, we limit the calls to 3 pages. The pagination uses the "TIMESTAMP" field.
    """
    # Set the end time to now (UTC)
    end_dt = datetime.datetime.utcnow()
    end_ts = get_unix_timestamp(end_dt)
    print("End timestamp:", end_ts)
    
    all_records = []
    current_to_ts = end_ts
    max_pages = 10  # Limit to 6 API calls
    page_count = 0

    while page_count < max_pages:
        batch_data = fetch_hourly_data_batch(to_ts=current_to_ts)
        
        if not batch_data:
            print("No more data returned. Stopping pagination.")
            break
        
        all_records.extend(batch_data)
        page_count += 1
        
        # Using the first element as the earliest record based on the JSON schema
        earliest_record = batch_data[0]
        print("Earliest record on page:", earliest_record)
        earliest_timestamp = int(earliest_record["TIMESTAMP"])
        
        print(f"Page {page_count} retrieved, earliest timestamp in batch: {earliest_timestamp}")
        
        # Set the new to_ts to one hour before the earliest timestamp to avoid duplicates
        current_to_ts = earliest_timestamp - 3600
        time.sleep(1)  # Pause to avoid rate limiting
    
    df = pd.DataFrame(all_records)
    
    if df.empty:
        print("No data retrieved.")
    else:
        # Verify that the 'TIMESTAMP' column exists
        if "TIMESTAMP" not in df.columns:
            print("ERROR: The DataFrame does not have a 'TIMESTAMP' column. Columns are:", df.columns)
        else:
            # Convert the TIMESTAMP column to a datetime for easier interpretation
            df["datetime"] = pd.to_datetime(df["TIMESTAMP"], unit='s', utc=True)
            df.sort_values(by="datetime", inplace=True)
    
    return df




In [6]:
# Execute and display results
df_prices = fetch_hourly_data_npages()
if not df_prices.empty:
    print("Fetched data shape:", df_prices.shape)
    display(df_prices.head())
else:
    print("No data retrieved.")

End timestamp: 1743657464
Earliest record on page: {'UNIT': 'HOUR', 'TIMESTAMP': 1738260000, 'TYPE': '954', 'MARKET': 'binance', 'INSTRUMENT': 'BTCUSDT', 'MAPPED_INSTRUMENT': 'BTC-USDT', 'BASE': 'BTC', 'QUOTE': 'USDT', 'BASE_ID': 1, 'QUOTE_ID': 7, 'TRANSFORM_FUNCTION': '', 'OPEN': 105300.01, 'HIGH': 105583.39, 'LOW': 104762.59, 'CLOSE': 105578.95, 'FIRST_TRADE_TIMESTAMP': 1738260000, 'LAST_TRADE_TIMESTAMP': 1738263599, 'FIRST_TRADE_PRICE': 105300.01, 'HIGH_TRADE_PRICE': 105583.39, 'HIGH_TRADE_TIMESTAMP': 1738263591, 'LOW_TRADE_PRICE': 104762.59, 'LOW_TRADE_TIMESTAMP': 1738261741, 'LAST_TRADE_PRICE': 105578.95, 'TOTAL_TRADES': 196049, 'TOTAL_TRADES_BUY': 94803, 'TOTAL_TRADES_SELL': 101246, 'TOTAL_TRADES_UNKNOWN': 0, 'VOLUME': 669.46961, 'QUOTE_VOLUME': 70447605.273754, 'VOLUME_BUY': 341.90979, 'QUOTE_VOLUME_BUY': 35981051.4448385, 'VOLUME_SELL': 327.55982, 'QUOTE_VOLUME_SELL': 34466553.8289155, 'VOLUME_UNKNOWN': 0, 'QUOTE_VOLUME_UNKNOWN': 0}
Page 1 retrieved, earliest timestamp in batch

Unnamed: 0,UNIT,TIMESTAMP,TYPE,MARKET,INSTRUMENT,MAPPED_INSTRUMENT,BASE,QUOTE,BASE_ID,QUOTE_ID,...,TOTAL_TRADES_UNKNOWN,VOLUME,QUOTE_VOLUME,VOLUME_BUY,QUOTE_VOLUME_BUY,VOLUME_SELL,QUOTE_VOLUME_SELL,VOLUME_UNKNOWN,QUOTE_VOLUME_UNKNOWN,datetime
13500,HOUR,1689660000,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1388.9038,41675610.0,610.27999,18312440.0,778.62381,23363170.0,0,0,2023-07-18 06:00:00+00:00
13501,HOUR,1689663600,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1151.28365,34538280.0,589.87374,17698080.0,561.40991,16840200.0,0,0,2023-07-18 07:00:00+00:00
13502,HOUR,1689667200,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1610.96253,48321420.0,733.31672,21997630.0,877.64581,26323790.0,0,0,2023-07-18 08:00:00+00:00
13503,HOUR,1689670800,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,784.77762,23539370.0,341.89704,10255090.0,442.88058,13284280.0,0,0,2023-07-18 09:00:00+00:00
13504,HOUR,1689674400,954,binance,BTCUSDT,BTC-USDT,BTC,USDT,1,7,...,0,1128.41237,33811840.0,530.21864,15886810.0,598.19373,17925030.0,0,0,2023-07-18 10:00:00+00:00


In [7]:
# Save to CSV if desired
df_prices.to_csv("../data/raw/btc_usdt_hourly.csv", index=False)