In [184]:
import os
import time
import ccxt
import requests

from dotenv import load_dotenv
from datetime import datetime, timezone
from pandas import DataFrame, to_datetime, concat, merge


load_dotenv('.env')

True

In [185]:
class DataCollector:
    def __init__(self, coinglass_api_key):
        self.baseURL = 'https://open-api-v3.coinglass.com/api/futures/'
        self.headers = {
            'accept': 'application/json',
            'CG-API-KEY': coinglass_api_key
        }
        self.validation_data = self.get_validation_data()


    def get_validation_data(self):
        return requests.get(
            self.baseURL + 'supported-exchange-pairs',
            headers=self.headers
        ).json()['data']


    def validate(self, exchange, symbol):
        if exchange not in self.validation_data:
            return False

        for instrument in self.validation_data[exchange]:
            if instrument['instrumentId'] == symbol:
                return True

        return False


    def get_symbol_tickers_coin_glass(self, exchange, symbol):
        for ticker in self.validation_data[exchange]:
            if symbol in ticker['instrumentId']:
                print(ticker)


    def get_symbol_tickers_ccxt(self, exchange_instance, symbol):
        tickers = exchange_instance.fetchMarkets()
        for ticker in tickers:
            if symbol in ticker['symbol']:
                print(ticker)


    @staticmethod
    def convert_interval(interval):
        match interval[-1]:
            case 'm':
                return int(interval[:-1]) * 60
            case 'h':
                return int(interval[:-1]) * 60 * 60
            case 'd':
                return 24 * 60 * 60
            case 'w':
                return 7 * 24 * 60 * 60
            case _:
                raise ValueError('invalid interval')


    def fetch_OHLCV(self, exchange_instance, symbol, interval, start, end, additional_params={}, hold=5):
        assert exchange_instance.has['fetchOHLCV'], 'exchange fetch OHLCV method is not supported in ccxt'

        start_dt = datetime.strptime(start, '%d.%m.%Y').replace(tzinfo=timezone.utc)
        end_dt = datetime.strptime(end, '%d.%m.%Y').replace(tzinfo=timezone.utc)

        start_timestamp = int(start_dt.timestamp() * 1e3)
        end_timestamp = int(end_dt.timestamp() * 1e3)
        
        multiplier = int(1e3 * self.convert_interval(interval))

        ohlcvs = []

        while start_timestamp <= end_timestamp:

            try:
                ohlcv = exchange_instance.fetch_ohlcv(
                    symbol=symbol,
                    timeframe=interval,
                    since=start_timestamp,
                    params=additional_params
                )

                ohlcvs += ohlcv
                start_timestamp += int(len(ohlcv) * multiplier) if len(ohlcv) else multiplier

            except (ccxt.ExchangeError, ccxt.AuthenticationError, ccxt.ExchangeNotAvailable, ccxt.RequestTimeout) as error:
                print('Got an error', type(error).__name__, error.args, ', retrying in', hold, 'seconds...')
                time.sleep(hold)

        ret = DataFrame(ohlcvs, columns=['timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
        ret = ret[ret['timestamp'] < end_timestamp]
        ret['timestamp'] = to_datetime(ret['timestamp'], unit='ms')
        ret = ret.reset_index(drop=True)
        
        return ret

    
    def get_ohlcv(self, exchange, symbol, interval, start, end, futures=False):
        match exchange:
            case 'OKX':
                return self.fetch_OHLCV(ccxt.okx(), symbol, interval, start, end)
            case 'Kraken':
                if futures:
                    return self.fetch_OHLCV(ccxt.krakenfutures(), symbol, interval, start, end)
                else:
                    return self.fetch_OHLCV(ccxt.kraken(), symbol, interval, start, end)
            case 'Huobi':
                return self.fetch_OHLCV(ccxt.huobi(), symbol, interval, start, end)
            case 'Deribit':
                return self.fetch_OHLCV(ccxt.deribit(), symbol, interval, start, end)
            case 'Bybit':
                return self.fetch_OHLCV(ccxt.bybit(), symbol, interval, start, end)
            case 'Binance':
                if futures:
                    return self.fetch_OHLCV(ccxt.binancecoinm(), symbol, interval, start, end)
                else:
                    return self.fetch_OHLCV(ccxt.binance(), symbol, interval, start, end)
            case 'Bitget':
                return self.fetch_OHLCV(ccxt.bitget(), symbol, interval, start, end)
            case 'Bitmex':
                return self.fetch_OHLCV(ccxt.bitmex(), symbol, interval, start, end)
            case _:
                raise ValueError('OI/FR data would not be available for this exchange')


    def get_oi_ohlc(self, exchange, symbol, interval, start, end):
        assert self.validate(exchange, symbol), 'unsupported exchange or symbol'

        start_dt = datetime.strptime(start, '%d.%m.%Y').replace(tzinfo=timezone.utc)
        end_dt = datetime.strptime(end, '%d.%m.%Y').replace(tzinfo=timezone.utc)

        start_timestamp = int(start_dt.timestamp())
        end_timestamp = int(end_dt.timestamp())

        ret = DataFrame(columns=['timestamp', 'OI Open', 'OI High', 'OI Low', 'OI Close'])

        increment = 4500 * self.convert_interval(interval)

        while start_timestamp <= end_timestamp:

            data = requests.get(
                self.baseURL + 'openInterest/ohlc-history',
                headers=self.headers,
                params={
                    'exchange': exchange,
                    'symbol': symbol,
                    'interval': interval,
                    'startTime': start_timestamp,
                    'endTime': end_timestamp,
                    'limit': 4500
                }
            ).json()['data']

            df = DataFrame(data)
            df['timestamp'] = to_datetime(df['t'], unit='s')
            df = df.rename(columns={'o': 'OI Open', 'h': 'OI High', 'l': 'OI Low', 'c': 'OI Close'})
            df = df.drop(columns=['t'])

            numeric_columns = ['OI Open', 'OI High', 'OI Low', 'OI Close']
            df[numeric_columns] = df[numeric_columns].astype(float)

            ret = concat([ret, df])

            start_timestamp += increment

        ret = ret.reset_index(drop=True)
        return ret


    def get_fr_ohlc(self, exchange, symbol, interval, start, end):
        assert self.validate(exchange, symbol), 'unsupported exchange or symbol'

        start_dt = datetime.strptime(start, '%d.%m.%Y').replace(tzinfo=timezone.utc)
        end_dt = datetime.strptime(end, '%d.%m.%Y').replace(tzinfo=timezone.utc)

        start_timestamp = int(start_dt.timestamp())
        end_timestamp = int(end_dt.timestamp())

        ret = DataFrame(columns=['timestamp', 'FR Open', 'FR High', 'FR Low', 'FR Close'])

        increment = 4500 * self.convert_interval(interval)

        while start_timestamp <= end_timestamp:

            data = requests.get(
                self.baseURL + 'fundingRate/ohlc-history',
                headers=self.headers,
                params={
                    'exchange': exchange,
                    'symbol': symbol,
                    'interval': interval,
                    'startTime': start_timestamp,
                    'endTime': end_timestamp,
                    'limit': 4500
                }
            ).json()['data']

            df = DataFrame(data)
            df['timestamp'] = to_datetime(df['t'], unit='s')
            df = df.rename(columns={'o': 'FR Open', 'h': 'FR High', 'l': 'FR Low', 'c': 'FR Close'})
            df = df.drop(columns=['t'])

            numeric_columns = ['FR Open', 'FR High', 'FR Low', 'FR Close']
            df[numeric_columns] = df[numeric_columns].astype(float)

            ret = concat([ret, df])

            start_timestamp += increment

        ret = ret.reset_index(drop=True)
        return ret


    def get_all(self, exchange, symbol, interval, start, end):
        oi_data = self.get_oi_ohlc(exchange, symbol, interval, start, end)
        fr_data = self.get_fr_ohlc(exchange, symbol, interval, start, end)
        ohlcv_data = self.get_ohlcv(exchange, symbol, interval, start, end)

        all_data = merge(ohlcv_data, oi_data, on='timestamp')
        all_data = merge(all_data, fr_data, on='timestamp')
        all_data = all_data.dropna()
        return all_data

In [186]:
api_key = os.environ.get('CG_API_KEY')

instance = DataCollector(api_key)

In [187]:
ohlcv_test = instance.get_ohlcv('Bitmex', 'XBTUSD', '1d', '01.01.2022', '01.01.2024')
ohlcv_test

Unnamed: 0,timestamp,Open,High,Low,Close,Volume
0,2022-01-01,46200.0,47925.5,46192.5,47728.5,5.436385e+08
1,2022-01-02,47728.5,47992.0,46647.5,47300.0,5.390720e+08
2,2022-01-03,47300.0,47577.0,45668.5,46425.0,7.605455e+08
3,2022-01-04,46425.0,47535.5,45500.0,45796.0,7.990866e+08
4,2022-01-05,45796.0,47067.5,42428.0,43395.0,1.753918e+09
...,...,...,...,...,...,...
725,2023-12-27,42533.0,43728.0,42130.0,43475.0,3.578441e+08
726,2023-12-28,43475.0,43893.5,42307.5,42610.0,3.358834e+08
727,2023-12-29,42610.0,43177.5,41284.0,42080.0,4.314916e+08
728,2023-12-30,42080.0,42639.5,41552.5,42187.0,2.451278e+08


In [188]:
ohlcv_test['timestamp'].is_monotonic_increasing

True

+- 18 seconds to fetch 2y of data for single ticker on local machine

In [189]:
oi_ohlc_test = instance.get_oi_ohlc('Bitmex', 'XBTUSD', '1d', '01.01.2022', '01.01.2024')
oi_ohlc_test

Unnamed: 0,timestamp,OI Open,OI High,OI Low,OI Close
0,2022-01-01,444722595.0,445874008.0,425712829.0,438938657.0
1,2022-01-02,438938657.0,449397149.0,428613465.0,441863565.0
2,2022-01-03,441863565.0,464784996.0,430903488.0,463022565.0
3,2022-01-04,463022565.0,464096077.0,427570437.0,441975519.0
4,2022-01-05,441975519.0,466338114.0,407119656.0,440711693.0
...,...,...,...,...,...
725,2023-12-27,286706940.0,290433084.0,278894286.0,284021968.0
726,2023-12-28,284021968.0,284191966.0,259734713.0,263685308.0
727,2023-12-29,263685308.0,275180992.0,260100598.0,274839373.0
728,2023-12-30,274839373.0,275549171.0,267190365.0,267238878.0


In [190]:
oi_ohlc_test['timestamp'].is_monotonic_increasing

True

In [191]:
fr_ohlc_test = instance.get_fr_ohlc('Bitmex', 'XBTUSD', '1d', '01.01.2022', '01.01.2024')
fr_ohlc_test

Unnamed: 0,timestamp,FR Open,FR High,FR Low,FR Close
0,2022-01-01,0.0068,0.0085,-0.0181,-0.0181
1,2022-01-02,-0.0181,0.0058,-0.0181,0.0012
2,2022-01-03,0.0012,0.0100,-0.0099,-0.0099
3,2022-01-04,-0.0099,0.0100,-0.0099,0.0099
4,2022-01-05,0.0099,0.0100,0.0072,0.0072
...,...,...,...,...,...
725,2023-12-27,0.0146,0.0426,0.0100,0.0426
726,2023-12-28,0.0426,0.0426,0.0253,0.0253
727,2023-12-29,0.0253,0.0253,0.0100,0.0100
728,2023-12-30,0.0100,0.0105,0.0100,0.0105


In [192]:
fr_ohlc_test = instance.get_fr_ohlc('Bitmex', 'XBTUSD', '8h', '01.01.2024', '05.01.2024')
fr_ohlc_test

Unnamed: 0,timestamp,FR Open,FR High,FR Low,FR Close
0,2024-01-01 00:00:00,0.0185,0.027,0.0185,0.027
1,2024-01-01 08:00:00,0.027,0.027,0.019,0.019
2,2024-01-01 16:00:00,0.019,0.033,0.019,0.033
3,2024-01-02 00:00:00,0.033,0.0803,0.033,0.0803
4,2024-01-02 08:00:00,0.0803,0.0803,0.0503,0.0503
5,2024-01-02 16:00:00,0.0503,0.0503,0.0255,0.0255
6,2024-01-03 00:00:00,0.0255,0.0255,0.01,0.01
7,2024-01-03 08:00:00,0.01,0.01,0.01,0.01
8,2024-01-03 16:00:00,0.01,0.01,0.01,0.01
9,2024-01-04 00:00:00,0.01,0.01,0.01,0.01


In [193]:
fr_ohlc_test['timestamp'].is_monotonic_increasing

True

In [194]:
all_test = instance.get_all('Bitmex', 'XBTUSD', '1d', '01.01.2022', '01.01.2024')
all_test

Unnamed: 0,timestamp,Open,High,Low,Close,Volume,OI Open,OI High,OI Low,OI Close,FR Open,FR High,FR Low,FR Close
0,2022-01-01,46200.0,47925.5,46192.5,47728.5,5.436385e+08,444722595.0,445874008.0,425712829.0,438938657.0,0.0068,0.0085,-0.0181,-0.0181
1,2022-01-02,47728.5,47992.0,46647.5,47300.0,5.390720e+08,438938657.0,449397149.0,428613465.0,441863565.0,-0.0181,0.0058,-0.0181,0.0012
2,2022-01-03,47300.0,47577.0,45668.5,46425.0,7.605455e+08,441863565.0,464784996.0,430903488.0,463022565.0,0.0012,0.0100,-0.0099,-0.0099
3,2022-01-04,46425.0,47535.5,45500.0,45796.0,7.990866e+08,463022565.0,464096077.0,427570437.0,441975519.0,-0.0099,0.0100,-0.0099,0.0099
4,2022-01-05,45796.0,47067.5,42428.0,43395.0,1.753918e+09,441975519.0,466338114.0,407119656.0,440711693.0,0.0099,0.0100,0.0072,0.0072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2023-12-27,42533.0,43728.0,42130.0,43475.0,3.578441e+08,286706940.0,290433084.0,278894286.0,284021968.0,0.0146,0.0426,0.0100,0.0426
726,2023-12-28,43475.0,43893.5,42307.5,42610.0,3.358834e+08,284021968.0,284191966.0,259734713.0,263685308.0,0.0426,0.0426,0.0253,0.0253
727,2023-12-29,42610.0,43177.5,41284.0,42080.0,4.314916e+08,263685308.0,275180992.0,260100598.0,274839373.0,0.0253,0.0253,0.0100,0.0100
728,2023-12-30,42080.0,42639.5,41552.5,42187.0,2.451278e+08,274839373.0,275549171.0,267190365.0,267238878.0,0.0100,0.0105,0.0100,0.0105


In [195]:
all_test['timestamp'].is_monotonic_increasing

True

+- 20 seconds to fetch 2y of data for single ticker on local machine