# Fetch historical data

Python script to fetch historical data from binance using ccxt

In [5]:
!pip install openpyxl

Collecting openpyxl
  Downloading https://files.pythonhosted.org/packages/6f/af/88ff9eef0b8f665aee1111ac6cede5ad12190c5bd726242bd2b26fc21b32/openpyxl-3.0.0.tar.gz (172kB)
Collecting jdcal (from openpyxl)
  Downloading https://files.pythonhosted.org/packages/f0/da/572cbc0bc582390480bbd7c4e93d14dc46079778ed915b505dc494b37c57/jdcal-1.4.1-py2.py3-none-any.whl
Collecting et_xmlfile (from openpyxl)
  Downloading https://files.pythonhosted.org/packages/22/28/a99c42aea746e18382ad9fb36f64c1c1f04216f41797f2f0fa567da11388/et_xmlfile-1.0.1.tar.gz
Building wheels for collected packages: openpyxl, et-xmlfile
  Building wheel for openpyxl (setup.py): started
  Building wheel for openpyxl (setup.py): finished with status 'done'
  Created wheel for openpyxl: filename=openpyxl-3.0.0-py2.py3-none-any.whl size=241193 sha256=8bc5909533679c0c1e54d9164a2c38f6848177764b3e3c9e8a5c742ebe9a69e7
  Stored in directory: C:\Users\codeninja\AppData\Local\pip\Cache\wheels\34\ee\6c\1279f7b70ea72432c2cef15dd3d915477cb37

In [33]:
# -*- coding: utf-8 -*-

import os
from pathlib import Path

import sys
import csv

# -----------------------------------------------------------------------------

root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(''))))
sys.path.append(root + '/python')

import ccxt  # noqa: E402


# -----------------------------------------------------------------------------

def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds()
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        # if we have reached the beginning of history
        if ohlcv[0][0] >= earliest_timestamp:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        # if we have reached the checkpoint
        if fetch_since < since:
            break
    return all_ohlcv


def write_to_csv(filename, exchange, data):
    p = Path("./data/raw/", str(exchange))
    p.mkdir(parents=True, exist_ok=True)
    full_path = p / str(filename)
    with Path(full_path).open('w+', newline='') as output_file:
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)


def scrape_candles_to_csv(filename, exchange_id, max_retries, symbol, timeframe, since, limit):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    # save them to csv file
    write_to_csv(filename, exchange, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', filename)


# -----------------------------------------------------------------------------

scrape_candles_to_csv('btc_usdt_1m.csv', 'binance', 3, 'BTC/USDT', '1m', '2019-10-01T00:00:00Z', 1000)
# scrape_candles_to_csv('./data/raw/binance/eth_btc_1m.csv', 'binance', 3, 'ETH/BTC', '1m', '2018-01-01T00:00:00Z', 1000)
# scrape_candles_to_csv('./data/raw/binance/ltc_btc_1m.csv', 'binance', 3, 'LTC/BTC', '1m', '2018-01-01T00:00:00Z', 1000)
# scrape_candles_to_csv('./data/raw/binance/xlm_btc_1m.csv', 'binance', 3, 'XLM/BTC', '1m', '2018-01-01T00:00:00Z', 1000)

1000 BTC/USDT candles in total from 2019-10-11T10:20:00.000Z to 2019-10-12T02:59:00.000Z
2000 BTC/USDT candles in total from 2019-10-10T17:40:00.000Z to 2019-10-12T02:59:00.000Z
3000 BTC/USDT candles in total from 2019-10-10T01:00:00.000Z to 2019-10-12T02:59:00.000Z
4000 BTC/USDT candles in total from 2019-10-09T08:20:00.000Z to 2019-10-12T02:59:00.000Z
5000 BTC/USDT candles in total from 2019-10-08T15:40:00.000Z to 2019-10-12T02:59:00.000Z
6000 BTC/USDT candles in total from 2019-10-07T23:00:00.000Z to 2019-10-12T02:59:00.000Z
7000 BTC/USDT candles in total from 2019-10-07T06:20:00.000Z to 2019-10-12T02:59:00.000Z
8000 BTC/USDT candles in total from 2019-10-06T13:40:00.000Z to 2019-10-12T02:59:00.000Z
9000 BTC/USDT candles in total from 2019-10-05T21:00:00.000Z to 2019-10-12T02:59:00.000Z
10000 BTC/USDT candles in total from 2019-10-05T04:20:00.000Z to 2019-10-12T02:59:00.000Z
11000 BTC/USDT candles in total from 2019-10-04T11:40:00.000Z to 2019-10-12T02:59:00.000Z
12000 BTC/USDT cand