# Description

This notebook computes data statistics per exchange id and currency pair for a given vendor universe.

# Imports

In [1]:
import logging
import os

import core.config.config_ as ccocon
import helpers.dbg as hdbg
import helpers.env as henv
import helpers.printing as hprintin
import helpers.s3 as hs3
import im.data.universe as imdauni
import research.cc.statistics as rccsta

In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprintin.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-6c7d9735-b0bf-4807-82a0-ad8dc686ee45.json'
>>ENV<<: is_inside_container=True: code_version=None, container_version=cmamp-1.0.0, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CI=''
>>ENV<<: AM_AWS_PROFILE=True AM_ECR_BASE_PATH=True AM_S3_BUCKET=True AM_TELEGRAM_TOKEN=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True
[31m-----------------------------------------------------------------------------
This code is not in sync with the container:
code_version='None' != container_version='cmamp-1.0.0'
-----------------------------------------------------------------------------
You need to:
- merge origin/master into your branch with `invoke git_merge_master`
- pull the latest container with `invoke docker_pull`[0m
# Git
    branch_name='CmTask336_Refactor_research_lib_and_join_notebooks'
    h

# Config

In [3]:
def get_cmtask232_config() -> ccocon.Config:
    """
    Get task232-specific config.
    """
    config = ccocon.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = "am"
    config["load"]["data_dir"] = os.path.join(hs3.get_path(), "data")
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["data_type"] = "OHLCV"
    config["data"]["target_frequency"] = "T"
    config["data"]["universe_version"] = "v0_1"
    config["data"]["vendor"] = "CCXT"
    # Column names.
    config.add_subconfig("column_names")
    config["column_names"]["close_price"] = "close"
    config["column_names"]["currency_pair"] = "currency_pair"
    config["column_names"]["exchange_id"] = "exchange_id"
    return config


config = get_cmtask232_config()
print(config)

load:
  aws_profile: am
  data_dir: s3://alphamatic-data/data
data:
  data_type: OHLCV
  target_frequency: T
  universe_version: v0_1
  vendor: CCXT
column_names:
  close_price: close
  currency_pair: currency_pair
  exchange_id: exchange_id


# Compute start-end table

## Per exchange id and currency pair for a specified vendor

In [4]:
vendor_universe = imdauni.get_vendor_universe_as_tuples(
    config["data"]["universe_version"], config["data"]["vendor"]
)
vendor_universe

[ExchangeCurrencyTuple(exchange_id='binance', currency_pair='ADA/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='AVAX/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='BNB/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='BTC/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='DOGE/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='EOS/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='ETH/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='LINK/USDT'),
 ExchangeCurrencyTuple(exchange_id='binance', currency_pair='SOL/USDT'),
 ExchangeCurrencyTuple(exchange_id='bitfinex', currency_pair='ADA/USDT'),
 ExchangeCurrencyTuple(exchange_id='bitfinex', currency_pair='AVAX/USDT'),
 ExchangeCurrencyTuple(exchange_id='bitfinex', currency_pair='BTC/USDT'),
 ExchangeCurrencyTuple(exchange_id='bitfinex', currency_pair='DOGE/USDT'),
 ExchangeCurrencyTuple(exchange_id='bitfin

In [5]:
compute_start_end_stats = lambda data: rccsta.compute_start_end_stats(
    data, config
)

start_end_table = rccsta.compute_stats_for_universe(
    vendor_universe, config, compute_start_end_stats
)

Reading CCXT data for exchange id='binance', currencies='ADA/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/ADA_USDT.csv.gz'...
Processing CCXT data for exchange id='binance', currencies='ADA/USDT'...
Index length increased by 4520 = 1622000 - 1617480
Reading CCXT data for exchange id='binance', currencies='AVAX/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/AVAX_USDT.csv.gz'...
Processing CCXT data for exchange id='binance', currencies='AVAX/USDT'...
Index length increased by 1224 = 517498 - 516274
Reading CCXT data for exchange id='binance', currencies='BNB/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/BNB_USDT.csv.gz'...
Processing CCXT data for exchange id='binance', currencies='BNB/USDT'...
Index length increased by 4520 = 1622295 - 1617775
Reading CCXT data for exchange id='binance', currencies='BTC/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/BTC_USDT.csv.gz'...
Processing CCXT data for exchange id='binan

Processing CCXT data for exchange id='gateio', currencies='BNB/USDT'...
Index length=129250 has not changed
Reading CCXT data for exchange id='gateio', currencies='BTC/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/gateio/BTC_USDT.csv.gz'...
Processing CCXT data for exchange id='gateio', currencies='BTC/USDT'...
Index length=129606 has not changed
Reading CCXT data for exchange id='gateio', currencies='DOGE/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/gateio/DOGE_USDT.csv.gz'...
Processing CCXT data for exchange id='gateio', currencies='DOGE/USDT'...
Index length=129422 has not changed
Reading CCXT data for exchange id='gateio', currencies='EOS/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/gateio/EOS_USDT.csv.gz'...
Processing CCXT data for exchange id='gateio', currencies='EOS/USDT'...
Index length=129192 has not changed
Reading CCXT data for exchange id='gateio', currencies='ETH/USDT' from file='s3://alphamatic-data/data/ccxt/20210924/gateio/ETH_USDT.

Looking at the results we can see that all the exchanges except for Bitfinex have significantly big longest not-NaN sequence (>13% at least) in combine with high data coverage (>85%). Bitfinex has a very low data coverage and its longest not-NaN sequence lengths are less than 1 day long and comprise less than 1% of the original data. This means that Bitfinex data spottiness is too scattered and we should exclude it from our analysis until we get clearer data for it.

In [6]:
_LOG.info(
    "The number of unique exchange and currency pair combinations=%s",
    start_end_table.shape[0],
)
start_end_table

The number of unique exchange and currency pair combinations=48


Unnamed: 0,exchange_id,currency_pair,min_timestamp,max_timestamp,n_data_points,coverage,days_available,avg_data_points_per_day,longest_not_nan_seq_days,longest_not_nan_seq_perc,longest_not_nan_seq_start_date,longest_not_nan_seq_end_date,vendor
13,bitfinex,EOS/USDT,2019-04-12 06:02:00-04:00,2021-09-20 06:00:00-04:00,177355,13.81,891,199.05,0,0.01,2021-05-11 08:50:00-04:00,2021-05-11 11:14:00-04:00,CCXT
15,bitfinex,FIL/USDT,2020-10-15 14:54:00-04:00,2021-09-19 21:48:00-04:00,114422,23.42,339,337.53,0,0.02,2021-05-13 04:42:00-04:00,2021-05-13 06:13:00-04:00,CCXT
10,bitfinex,AVAX/USDT,2020-09-23 08:03:00-04:00,2021-09-20 07:02:00-04:00,124075,23.8,361,343.7,0,0.14,2021-09-07 15:54:00-04:00,2021-09-08 03:51:00-04:00,CCXT
9,bitfinex,ADA/USDT,2020-08-06 06:11:00-04:00,2021-09-19 23:24:00-04:00,166401,28.2,409,406.85,0,0.03,2021-03-18 10:51:00-04:00,2021-03-18 13:21:00-04:00,CCXT
16,bitfinex,LINK/USDT,2020-08-21 05:04:00-04:00,2021-09-20 00:21:00-04:00,165794,29.16,394,420.8,0,0.03,2021-09-11 09:43:00-04:00,2021-09-11 12:53:00-04:00,CCXT
18,bitfinex,XRP/USDT,2020-12-18 04:53:00-05:00,2021-09-19 20:57:00-04:00,152880,38.52,275,555.93,0,0.06,2021-04-13 11:32:00-04:00,2021-04-13 15:17:00-04:00,CCXT
14,bitfinex,ETH/USDT,2019-03-11 06:03:00-04:00,2021-09-19 19:13:00-04:00,622353,46.8,923,674.27,1,0.14,2021-05-19 09:59:00-04:00,2021-05-20 15:55:00-04:00,CCXT
12,bitfinex,DOGE/USDT,2021-04-21 07:03:00-04:00,2021-09-19 22:39:00-04:00,106829,48.92,151,707.48,0,0.13,2021-09-11 09:42:00-04:00,2021-09-11 14:20:00-04:00,CCXT
17,bitfinex,SOL/USDT,2021-02-25 06:10:00-05:00,2021-09-19 20:03:00-04:00,154215,51.85,206,748.62,0,0.14,2021-09-08 12:33:00-04:00,2021-09-08 19:27:00-04:00,CCXT
11,bitfinex,BTC/USDT,2019-03-11 06:05:00-04:00,2021-09-19 18:22:00-04:00,715022,53.77,923,774.67,1,0.17,2021-05-18 18:35:00-04:00,2021-05-20 07:16:00-04:00,CCXT


## Per currency pair

In [7]:
currency_start_end_table = rccsta.compute_start_end_table_by_currency(
    start_end_table
)
currency_start_end_table

The number of unique currency pairs=11


Unnamed: 0,currency_pair,min_timestamp,max_timestamp,exchange_id,days_available
0,BNB/USDT,2018-08-16 20:00:00-04:00,2021-09-21 21:13:00-04:00,"[binance, ftx, gateio, kucoin]",1132
1,BTC/USDT,2018-08-16 20:00:00-04:00,2021-09-21 17:13:00-04:00,"[bitfinex, kucoin, binance, ftx, gateio]",1131
2,ETH/USDT,2018-08-16 20:00:00-04:00,2021-09-21 17:54:00-04:00,"[bitfinex, kucoin, binance, ftx, gateio]",1131
3,ADA/USDT,2018-08-16 20:00:00-04:00,2021-09-20 12:39:00-04:00,"[bitfinex, binance, gateio, kucoin]",1130
4,EOS/USDT,2018-08-16 20:00:00-04:00,2021-09-20 12:39:00-04:00,"[bitfinex, kucoin, binance, gateio]",1130
5,XRP/USDT,2018-12-03 11:58:00-05:00,2021-09-21 19:13:00-04:00,"[bitfinex, kucoin, ftx, gateio]",1023
6,LINK/USDT,2019-01-16 05:00:00-05:00,2021-09-21 20:32:00-04:00,"[bitfinex, binance, ftx, gateio, kucoin]",979
7,DOGE/USDT,2019-07-05 08:00:00-04:00,2021-09-21 19:51:00-04:00,"[bitfinex, binance, ftx, gateio, kucoin]",809
8,SOL/USDT,2020-07-26 20:13:00-04:00,2021-09-21 18:33:00-04:00,"[bitfinex, binance, ftx, gateio, kucoin]",421
9,AVAX/USDT,2020-09-22 02:30:00-04:00,2021-09-20 12:39:00-04:00,"[bitfinex, binance, gateio, kucoin]",363
