Why Coinapi?
- Delisted symbols
- Long history (back to the origins)
- Cleaned data
- Pricing quite fair and usage-based (Coinmarketcap costs $700/month for the same)

In [1]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
from datetime import datetime, date
import os
from matplotlib import pyplot as plt
import coinapi_fetcher
import time

output_folder = '../coinapi_data'

In [2]:
# btc = coinapi_fetcher.get_history('COINBASE_SPOT_BTC_USD', datetime(2010, 1, 1))
# btc['close'].plot(figsize=(15, 6))
# plt.show()
# btc.to_csv(os.path.join(output_folder, 'COINBASE_SPOT_BTC_USD.csv'), index=True)

In [3]:
# Binance doesn't have base currency USD; use BTC and re-caculate (BINANCE has a market share
# of approx 40%)
# Problem: There is no BTC-BTC, so we don't get the BTC volume. Use Coinbase instead, as they
# directly quote USD.
exchange_id = 'COINBASE'
active = coinapi_fetcher.get_active_symbols(exchange_id)
historical = coinapi_fetcher.get_historical_symbols(exchange_id)

print(f'Got {len(active)} active and {len(historical)} historical symbols')
print(f'Active symbols: {active}.to_list()')
print(f'Historical symbols: {historical}.to_list()')

Get active symbols for COINBASE
Get historical symbols for COINBASE, adding to 0 existing
Out of 814 historical, 401 are USD-based
Got 330 active and 401 historical symbols
Active symbols: ['COINBASE_SPOT_AVNT_USD', 'COINBASE_SPOT_ACH_USD', 'COINBASE_SPOT_DASH_USD', 'COINBASE_SPOT_ARKM_USD', 'COINBASE_SPOT_AKT_USD', 'COINBASE_SPOT_IO_USD', 'COINBASE_SPOT_SEAM_USD', 'COINBASE_SPOT_VTHO_USD', 'COINBASE_SPOT_BCH_USD', 'COINBASE_SPOT_APE_USD', 'COINBASE_SPOT_RENDER_USD', 'COINBASE_SPOT_API3_USD', 'COINBASE_SPOT_AST_USD', 'COINBASE_SPOT_FLOKI_USD', 'COINBASE_SPOT_FLR_USD', 'COINBASE_SPOT_IOTX_USD', 'COINBASE_SPOT_CRO_USD', 'COINBASE_SPOT_WLD_USD', 'COINBASE_SPOT_ETHFI_USD', 'COINBASE_SPOT_SUKU_USD', 'COINBASE_SPOT_FORT_USD', 'COINBASE_SPOT_ZEN_USD', 'COINBASE_SPOT_RED_USD', 'COINBASE_SPOT_USDS_USD', 'COINBASE_SPOT_RAD_USD', 'COINBASE_SPOT_AVAX_USD', 'COINBASE_SPOT_AERO_USD', 'COINBASE_SPOT_ICP_USD', 'COINBASE_SPOT_DOGINME_USD', 'COINBASE_SPOT_XCN_USD', 'COINBASE_SPOT_GNO_USD', 'COINBASE_SPO

In [None]:
def write_file(data, file_path):
    data.to_csv(file_path, index=True)
    print(f'Wrote file {file_path} with {len(data)} rows')

def get_existing_content(file_path):
    '''
    Returns the date of the last row in the file, if it exists, and its whole content (which
    is needed to append new data to later).
    '''
    if os.path.exists(file_path):
        print(f'File {file_path} exists, get last row')
        content = pd.read_csv(file_path)
        # When the file is empty, there's no 'date' column that we can use as index; return an
        # empty df so that all data is fetched from the crypto's start of existence.
        if ('date' not in content.columns):
            print('File is missing date column, return empty DataFrame')
            return (pd.DataFrame(), None)
        # Remove the last row; it may contain intraday data; make it 2 to be sure.
        content = content.iloc[:-2]
        content['date'] = pd.to_datetime(content['date'])
        content.set_index('date', inplace=True)
        print(f'Existing file has {len(content)} rows')
        if(len(content)):
            last_date = content.index[-1].date()
            print(f'Last date in existing file is {last_date}')
            return (content, last_date)
        return (content, None)
    else:
        return (pd.DataFrame(), None)

output_folder = '../coinapi_data'
first_date = date(2010, 1, 1)

current_index = 0
# Historical contains *all* cryptos, delisted as well as active. Delisted ones just won't return
# any new data.
for symbol_id in historical:
    current_index += 1
    print('------')
    print(f'Get {current_index}/{len(historical)}')
    file_path = f'{output_folder}/historical/{symbol_id}.csv'
    (existing_content, start_date) = get_existing_content(file_path)
    if (start_date):
        print(f'File {symbol_id} exists, start date is {start_date}')
    else:
        start_date = first_date
        print(f'File {symbol_id} does not exist')
    end_date = date.today()
    if (start_date >= end_date):
        print(f'Skip {symbol_id}, start is on or after end')
        continue
    print(f'Get {symbol_id} from {start_date} to {end_date}')
    new_content = coinapi_fetcher.get_history(symbol_id, start_date)
    # Make sure that content was returned before we concat an empty DF (which would fail)
    if (not new_content.empty):
        data = pd.concat([df for df in [existing_content, new_content] if not df.empty])
        write_file(data, file_path)
    time.sleep(0.5)
print(f'Out of {len(historical)} cryptos, {current_index} were fetched')

------
Get 1/401
File ../coinapi_data/historical/COINBASE_SPOT_BTC_USD.csv exists, get last row
Existing file has 3863 rows
Last date in existing file is 2025-08-24
File COINBASE_SPOT_BTC_USD exists, start date is 2025-08-24
Get COINBASE_SPOT_BTC_USD from 2025-08-24 to 2025-10-13
Fetching COINBASE_SPOT_BTC_USD from 2025-08-24 to 2025-10-13 …
Fetched 50 bars
Wrote file ../coinapi_data/historical/COINBASE_SPOT_BTC_USD.csv with 3913 rows
------
Get 2/401
File ../coinapi_data/historical/COINBASE_SPOT_ETH_USD.csv exists, get last row
Existing file has 3333 rows
Last date in existing file is 2025-08-24
File COINBASE_SPOT_ETH_USD exists, start date is 2025-08-24
Get COINBASE_SPOT_ETH_USD from 2025-08-24 to 2025-10-13
Fetching COINBASE_SPOT_ETH_USD from 2025-08-24 to 2025-10-13 …
Fetched 50 bars
Wrote file ../coinapi_data/historical/COINBASE_SPOT_ETH_USD.csv with 3383 rows
------
Get 3/401
File ../coinapi_data/historical/COINBASE_SPOT_LTC_USD.csv exists, get last row
Existing file has 3242 row

In [6]:
# Turns out: /history returns active *and* delisted data
for symbol_id in active:
    if (symbol_id not in historical):
        print(f'!!!!! Active symbol {symbol_id} not in historical list')
#     file_path = f'{output_folder}/active/{symbol_id}.csv'
#     start_date = get_latest_entry_date(file_path) or first_date
#     end_date = date.today()
#     if (start_date >= end_date):
#         print(f'Skip {symbol_id}, start is on or after end')
#         continue
#     print(f'Get {symbol_id} from {start_date}')
#     data = coinapi_fetcher.get_history(symbol_id, start_date)
#     write_file(data, file_path)
#     time.sleep(1)

# print('Done')