# Imports

In [1]:
import logging
import os

import pandas as pd

import core.config.config_ as cconconf
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint
import helpers.hs3 as hs3
import im_v2.ccxt.data.client as icdcl
import im_v2.ccxt.universe.universe as imvccunun
import im_v2.common.data.client as icdc
import im_v2.cryptodatadownload.data.client.cdd_client as imcdaclcd
import research_amp.cc.statistics as ramccsta

In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-c3f495ba-b3df-4a39-9aad-c953717109dd.json'
>>ENV<<: is_inside_container=True: code_version=1.0.6, container_version=cmamp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CI=''
>>ENV<<: AM_AWS_PROFILE=True AM_ECR_BASE_PATH=True AM_S3_BUCKET=True AM_TELEGRAM_TOKEN=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True
[31m-----------------------------------------------------------------------------
This code is not in sync with the container:
code_version='1.0.6' != container_version='cmamp-1.0.3'
-----------------------------------------------------------------------------
You need to:
- merge origin/master into your branch with `invoke git_merge_master`
- pull the latest container with `invoke docker_pull`[0m
# Git
    branch_name='CMTask878_Clean_up_CCXT_vs_CDD_notebook'
    hash='d622

# Configs

In [3]:
# Generate configs for `CDD` and `CCXT`.

In [4]:
def get_cmtask324_config_ccxt() -> cconconf.Config:
    """
    Get task232-specific config.
    """
    config = cconconf.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = "am"
    config["load"]["data_dir"] = os.path.join(hs3.get_path(), "data")
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["data_type"] = "OHLCV"
    config["data"]["target_frequency"] = "T"
    config["data"]["universe_version"] = "v03"
    config["data"]["vendor"] = "CCXT"
    # Column names.
    config.add_subconfig("column_names")
    config["column_names"]["close_price"] = "close"
    config["column_names"]["currency_pair"] = "currency_pair"
    config["column_names"]["exchange_id"] = "exchange_id"
    return config

In [5]:
config_ccxt = get_cmtask324_config_ccxt()
print(config_ccxt)

load:
  aws_profile: am
  data_dir: s3://alphamatic-data/data
data:
  data_type: OHLCV
  target_frequency: T
  universe_version: v03
  vendor: CCXT
column_names:
  close_price: close
  currency_pair: currency_pair
  exchange_id: exchange_id


In [6]:
def get_cmtask324_config_cdd() -> cconconf.Config:
    """
    Get task324-specific config.
    """
    config = cconconf.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = "am"
    config["load"]["data_dir"] = os.path.join(hs3.get_path(), "data")
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["data_type"] = "OHLCV"
    config["data"]["target_frequency"] = "T"
    config["data"]["universe_version"] = "v01"
    config["data"]["vendor"] = "CDD"
    # Column names.
    config.add_subconfig("column_names")
    config["column_names"]["close_price"] = "close"
    config["column_names"]["currency_pair"] = "currency_pair"
    config["column_names"]["exchange_id"] = "exchange_id"
    return config

In [7]:
config_cdd = get_cmtask324_config_cdd()
print(config_cdd)

load:
  aws_profile: am
  data_dir: s3://alphamatic-data/data
data:
  data_type: OHLCV
  target_frequency: T
  universe_version: v01
  vendor: CDD
column_names:
  close_price: close
  currency_pair: currency_pair
  exchange_id: exchange_id


# Load the data universe

## CCXT

In [8]:
ccxt_universe = imvccunun.get_vendor_universe(version="v03")

## CDD

In [9]:
cdd_universe = imvccunun.get_vendor_universe(version="v01", vendor="CDD")
# Remove non-USDT elements, since we are not interested in them.
cdd_universe = [element for element in cdd_universe if element.endswith("USDT")]

# Compare universes

In [17]:
_LOG.info("Number of full symbols in 'CCXT': %s", len(ccxt_universe))
_LOG.info("Number of full symbols in 'CDD': %s", len(cdd_universe))

Number of full symbols in 'CCXT': 38
Number of full symbols in 'CDD': 36


In [11]:
# Intersection of full symbols between two vendors.
currency_pair_intersection = set(ccxt_universe).intersection(cdd_universe)
_LOG.info("Number of similar full symbols: %s", len(currency_pair_intersection))
display(currency_pair_intersection)

Number of similar full symbols: 18


{'binance::ADA_USDT',
 'binance::BNB_USDT',
 'binance::BTC_USDT',
 'binance::EOS_USDT',
 'binance::ETH_USDT',
 'binance::LINK_USDT',
 'binance::SOL_USDT',
 'ftx::BNB_USDT',
 'ftx::BTC_USDT',
 'ftx::ETH_USDT',
 'ftx::LINK_USDT',
 'ftx::XRP_USDT',
 'kucoin::ADA_USDT',
 'kucoin::BNB_USDT',
 'kucoin::BTC_USDT',
 'kucoin::EOS_USDT',
 'kucoin::ETH_USDT',
 'kucoin::XRP_USDT'}

In [12]:
# Full symbols that are included in `CCXT` but not in `CDD`.
ccxt_and_not_cdd = set(ccxt_universe).difference(cdd_universe)
_LOG.info(
    "Number of full symbols that are included in 'CCXT' but not in 'CDD': %s",
    len(ccxt_and_not_cdd),
)
display(ccxt_and_not_cdd)

Number of full symbols that are included in 'CCXT' but not in 'CDD': 20


{'binance::AVAX_USDT',
 'binance::DOGE_USDT',
 'ftx::DOGE_USDT',
 'ftx::SOL_USDT',
 'gateio::ADA_USDT',
 'gateio::AVAX_USDT',
 'gateio::BNB_USDT',
 'gateio::BTC_USDT',
 'gateio::DOGE_USDT',
 'gateio::EOS_USDT',
 'gateio::ETH_USDT',
 'gateio::FIL_USDT',
 'gateio::LINK_USDT',
 'gateio::SOL_USDT',
 'gateio::XRP_USDT',
 'kucoin::AVAX_USDT',
 'kucoin::DOGE_USDT',
 'kucoin::FIL_USDT',
 'kucoin::LINK_USDT',
 'kucoin::SOL_USDT'}

In [13]:
# Full symbols that are included in `CDD` but not in `CCXT`.
cdd_and_not_ccxt = set(cdd_universe).difference(ccxt_universe)
_LOG.info(
    "Number of full symbols that are included in 'CDD' but not in 'CCXT': %s",
    len(cdd_and_not_ccxt),
)
display(cdd_and_not_ccxt)

Number of full symbols that are included in 'CDD' but not in 'CCXT': 40


{'binance::AAVE_USDT',
 'binance::BAT_USDT',
 'binance::BTT_USDT',
 'binance::CELR_USDT',
 'binance::CVC_USDT',
 'binance::DAI_USDT',
 'binance::DASH_USDT',
 'binance::DOT_USDT',
 'binance::ETC_USDT',
 'binance::FIL_USDT',
 'binance::ICP_USDT',
 'binance::ICX_USDT',
 'binance::LRC_USDT',
 'binance::LTC_USDT',
 'binance::MATIC_USDT',
 'binance::MKR_USDT',
 'binance::NEO_USDT',
 'binance::ONE_USDT',
 'binance::PAX_USDT',
 'binance::QTUM_USDT',
 'binance::SCU_USDT',
 'binance::TRX_USDT',
 'binance::TUSD_USDT',
 'binance::UNI_USDT',
 'binance::USDC_USDT',
 'binance::VET_USDT',
 'binance::XLM_USDT',
 'binance::XMR_USDT',
 'binance::XRP_USDT',
 'binance::ZEC_USDT',
 'ftx::BCH_USDT',
 'ftx::LTC_USDT',
 'ftx::TRX_USDT',
 'kucoin::BCH_USDT',
 'kucoin::DASH_USDT',
 'kucoin::LTC_USDT',
 'kucoin::NEO_USDT',
 'kucoin::TRX_USDT',
 'kucoin::XTZ_USDT',
 'kucoin::ZEC_USDT'}

# Compare close prices / returns from Binance

## Load the data

The code below can be used to load all the existing data from two vendors 'CDD' and 'CCXT'. Current version is specified to Binance only, however, even for one exchange there's too many data to operate, that's why the output is the intersection of currency pairs between to universe, since one can compare only the intersection of currency pairs for two vendors.

In [14]:
# Load Binance-specific universe for `CCXT`.
ccxt_binance_universe = [
    element for element in ccxt_universe if element.startswith("binance")
]
# Load Binnance-specific universe for `CDD`.
cdd_binance_universe_initial = [
    element for element in cdd_universe if element.startswith("binance")
]
cdd_binance_universe = cdd_binance_universe_initial.copy()
# SCU_USDT has incorrect columns, so can not be downloaded.
# See CMTask244 - Cannot load CDD - binance - SCU/USDT from s3 for the reference.
cdd_binance_universe.remove("binance::SCU_USDT")
# The intersection of Binance currency pairs from two universes.
currency_pair_intersection_binance = set(ccxt_binance_universe).intersection(
    cdd_binance_universe_initial
)

In [16]:
cdd_data = []
data_type_cdd = config_cdd["data"]["data_type"]
root_dir_cdd = config_cdd["load"]["data_dir"]
aws_profile_cdd = config_cdd["load"]["aws_profile"]
cdd_loader = imcdaclcd.CddClient(
    data_type_cdd, root_dir_cdd, aws_profile=aws_profile_cdd
)

for full_symbol in currency_pair_intersection_binance:
    cur_data = cdd_loader.read_data(full_symbol)
    cdd_data.append(cur_data)
cdd_binance_df = pd.concat(cdd_data)

Reading CDD data for exchange id='binance', currencies='ETH_USDT', from file='s3://alphamatic-data/data/cryptodatadownload/20210924/binance/ETH_USDT.csv.gz'...
Processing CDD data for exchange id='binance', currencies='ETH_USDT'...
Index length increased by 5288 = 948591 - 943303
Reading CDD data for exchange id='binance', currencies='EOS_USDT', from file='s3://alphamatic-data/data/cryptodatadownload/20210924/binance/EOS_USDT.csv.gz'...
Processing CDD data for exchange id='binance', currencies='EOS_USDT'...
Index length increased by 5289 = 888096 - 882807
Reading CDD data for exchange id='binance', currencies='ADA_USDT', from file='s3://alphamatic-data/data/cryptodatadownload/20210924/binance/ADA_USDT.csv.gz'...
Processing CDD data for exchange id='binance', currencies='ADA_USDT'...
Index length increased by 5288 = 854976 - 849688
Reading CDD data for exchange id='binance', currencies='BTC_USDT', from file='s3://alphamatic-data/data/cryptodatadownload/20210924/binance/BTC_USDT.csv.gz'.

In [17]:
display(cdd_binance_df.head(3))
display(cdd_binance_df.shape)

Unnamed: 0_level_0,open,high,low,close,volume,epoch,currency_pair,exchange_id
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-11-27 07:45:00+00:00,146.0,146.0,146.0,146.0,0.01,1574841000000.0,ETH/USDT,binance
2019-11-27 07:46:00+00:00,146.0,146.0,146.0,146.0,0.0,1574841000000.0,ETH/USDT,binance
2019-11-27 07:47:00+00:00,146.0,146.0,146.0,146.0,0.0,1574841000000.0,ETH/USDT,binance


(5599808, 8)

In [18]:
extension = "csv.gz"
root_dir_ccxt=config_ccxt["load"]["data_dir"]
aws_profile_ccxt = config_ccxt["load"]["aws_profile"]
ccxt_csv_client = icdcl.CcxtCsvParquetByAssetClient(
    root_dir_ccxt, extension, aws_profile=aws_profile_ccxt
)
start_ts = None
end_ts = None
ccxt_binance_df = ccxt_csv_client.read_data(
    list(currency_pair_intersection_binance),
    start_ts,
    end_ts,
)

Reading CCXT data for exchange id='binance', currencies='ADA_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/ADA_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='BNB_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/BNB_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='BTC_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/BTC_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='EOS_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/EOS_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='ETH_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/ETH_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='LINK_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/LINK_USDT.csv.gz'...
Reading CCXT data for exchange id='binance', currencies='SOL_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/bina

In [19]:
display(ccxt_binance_df.head(3))
display(ccxt_binance_df.shape)

Unnamed: 0_level_0,full_symbol,open,high,low,close,volume,currency_pair,exchange_id
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-08-17 00:00:00+00:00,binance::ADA_USDT,0.0946,0.0948,0.09442,0.09479,41334.2,ADA_USDT,binance
2018-08-17 00:00:00+00:00,binance::BNB_USDT,9.7779,9.7791,9.7538,9.7778,520.66,BNB_USDT,binance
2018-08-17 00:00:00+00:00,binance::BTC_USDT,6316.0,6319.04,6310.32,6311.64,9.967395,BTC_USDT,binance


(10084929, 8)

## Calculate returns and correlation

In [20]:
# `CDD` names cleaning.
cdd_binance_df["currency_pair"] = cdd_binance_df["currency_pair"].str.replace(
    "/", "_"
)

In [21]:
def resample_close_price(df: pd.DataFrame, resampling_freq: str) -> pd.Series:
    """
    Resample close price on the currency level to the specified frequency using
    the last close price.

    :param df: OHLCV data
    :param resampling_freq: frequency from `pd.date_range()` to resample to
    :return: resampled close price per currency
    """
    # Reseting DateTime index, since pd.Grouper can't use index values.
    df = df.reset_index().rename(columns={"index": "stamp"})
    # Group by currency pairs and simultaneously resample to the desired frequency.
    resampler = df.groupby(
        ["currency_pair", pd.Grouper(key="timestamp", freq=resampling_freq)]
    )
    # Take the last close value from each resampling period.
    close_series = resampler.close.last()
    return close_series

In [22]:
def calculate_correlations(
    ccxt_close_price: pd.Series, cdd_close_price: pd.Series, compute_returns: bool
) -> pd.DataFrame:
    """
    Take CCXT and CDD close prices and calculate the correlations for each
    specific currency pair.

    :param ccxt_series: resampled close price per currency for CCXT
    :param cdd_series: resampled close price per currency for CDD
    :param compute_returns: if True - compare returns, if False - compare close prices
    :return: correlation matrix per currency
    """
    if compute_returns:
        # Group by currency pairs in order to calculate the percentage returns.
        grouper_cdd = cdd_close_price.groupby("currency_pair")
        cdd_close_price = grouper_cdd.pct_change()
        grouper_ccxt = ccxt_close_price.groupby("currency_pair")
        ccxt_close_price = grouper_ccxt.pct_change()
    # Combine and calculate correlations.
    combined = pd.merge(
        cdd_close_price, ccxt_close_price, left_index=True, right_index=True
    )
    # Rename the columns.
    if compute_returns:
        combined.columns = ["ccxt_returns", "cdd_returns"]
    else:
        combined.columns = ["cdd_close", "ccxt_close"]
    # Group by again to calculte returns correlation for each currency pair.
    corr_matrix = combined.groupby(level=0).corr()
    return corr_matrix

In [23]:
# Corresponding resampled Series.
ccxt_binance_series_1d = resample_close_price(ccxt_binance_df, "1D")
cdd_binance_series_1d = resample_close_price(cdd_binance_df, "1D")

ccxt_binance_series_5min = resample_close_price(ccxt_binance_df, "5min")
cdd_binance_series_5min = resample_close_price(cdd_binance_df, "5min")

### 1-day returns

In [24]:
returns_corr_1day = calculate_correlations(
    ccxt_binance_series_1d, cdd_binance_series_1d, compute_returns=True
)
display(returns_corr_1day)

Unnamed: 0_level_0,Unnamed: 1_level_0,ccxt_returns,cdd_returns
currency_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ADA_USDT,ccxt_returns,1.0,0.997807
ADA_USDT,cdd_returns,0.997807,1.0
BNB_USDT,ccxt_returns,1.0,0.99849
BNB_USDT,cdd_returns,0.99849,1.0
BTC_USDT,ccxt_returns,1.0,0.997763
BTC_USDT,cdd_returns,0.997763,1.0
EOS_USDT,ccxt_returns,1.0,0.998294
EOS_USDT,cdd_returns,0.998294,1.0
ETH_USDT,ccxt_returns,1.0,0.995563
ETH_USDT,cdd_returns,0.995563,1.0


### 5-min returns

In [25]:
returns_corr_5min = calculate_correlations(
    ccxt_binance_series_5min, cdd_binance_series_5min, compute_returns=True
)
display(returns_corr_5min)

Unnamed: 0_level_0,Unnamed: 1_level_0,ccxt_returns,cdd_returns
currency_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ADA_USDT,ccxt_returns,1.0,0.986913
ADA_USDT,cdd_returns,0.986913,1.0
BNB_USDT,ccxt_returns,1.0,0.987292
BNB_USDT,cdd_returns,0.987292,1.0
BTC_USDT,ccxt_returns,1.0,0.990746
BTC_USDT,cdd_returns,0.990746,1.0
EOS_USDT,ccxt_returns,1.0,0.993959
EOS_USDT,cdd_returns,0.993959,1.0
ETH_USDT,ccxt_returns,1.0,0.974821
ETH_USDT,cdd_returns,0.974821,1.0


## Compare close prices

### 1-day close prices

In [26]:
close_corr_1day = calculate_correlations(
    ccxt_binance_series_1d, cdd_binance_series_1d, compute_returns=False
)
display(close_corr_1day)

Unnamed: 0_level_0,Unnamed: 1_level_0,cdd_close,ccxt_close
currency_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ADA_USDT,cdd_close,1.0,0.999995
ADA_USDT,ccxt_close,0.999995,1.0
BNB_USDT,cdd_close,1.0,0.999999
BNB_USDT,ccxt_close,0.999999,1.0
BTC_USDT,cdd_close,1.0,1.0
BTC_USDT,ccxt_close,1.0,1.0
EOS_USDT,cdd_close,1.0,0.999906
EOS_USDT,ccxt_close,0.999906,1.0
ETH_USDT,cdd_close,1.0,0.999994
ETH_USDT,ccxt_close,0.999994,1.0


### 5-min close prices

In [27]:
close_corr_5min = calculate_correlations(
    ccxt_binance_series_5min, cdd_binance_series_5min, compute_returns=False
)
display(close_corr_5min)

Unnamed: 0_level_0,Unnamed: 1_level_0,cdd_close,ccxt_close
currency_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ADA_USDT,cdd_close,1.0,1.0
ADA_USDT,ccxt_close,1.0,1.0
BNB_USDT,cdd_close,1.0,1.0
BNB_USDT,ccxt_close,1.0,1.0
BTC_USDT,cdd_close,1.0,1.0
BTC_USDT,ccxt_close,1.0,1.0
EOS_USDT,cdd_close,1.0,0.999999
EOS_USDT,ccxt_close,0.999999,1.0
ETH_USDT,cdd_close,1.0,1.0
ETH_USDT,ccxt_close,1.0,1.0


# Statistical properties of a full symbol in CDD

In [14]:
# Clearing `CDD` currency pairs that are incorrect.

# Binance.
cdd_universe.remove("binance::SCU_USDT")

# FTX has some critical mistakes in the downloading process, so can not continue analysis with them.
# see CMTask801 - Downloading issues of FTX exchange from 'CDD' universe for further reference.
cdd_ftx_universe = [
    element for element in cdd_universe if element.startswith("ftx")
]
for elem in cdd_ftx_universe:
    cdd_universe.remove(elem)

# Kucoin exchange: the timestamps are obviously wrong and with too short time period.
# See CMTask253 - Fix timestamp for CDD - kucoin for reference.
cdd_kucoin_universe = [
    element for element in cdd_universe if element.startswith("kucoin")
]
for elem in cdd_kucoin_universe:
    cdd_universe.remove(elem)

## Comparison of intersection of full symbols between 'CCXT' and 'CDD'

In [15]:
# Full symbols that are included in `CDD` but not in `CCXT` (cleaned from unavailable full symbols).
cdd_and_ccxt_cleaned = set(ccxt_universe).intersection(cdd_universe)
len(cdd_and_ccxt_cleaned)

7

### Load the intersection of full symbols for 'CDD' and 'CCXT'

#### CDD

In [16]:
# After fixing `CCXT` loader below, the structural mistake appears with `CDD` loader.
# TODO(Max): Fix the code, once the vendor universe will be unified.
# see CMTask985 - Fix compute_start_end_stats in CCXT-CDD comparison notebook.
compute_start_end_stats = lambda data: ramccsta.compute_start_end_stats(
    data, config_cdd
)

cdd_start_end_table = ramccsta.compute_stats_for_universe(
    cdd_and_ccxt_cleaned, config_cdd, compute_start_end_stats
)

TypeError: read_data() takes 2 positional arguments but 4 were given

In [None]:
# `CDD` names cleaning.
cdd_start_end_table["currency_pair"] = cdd_start_end_table[
    "currency_pair"
].str.replace("/", "_")

In [None]:
cdd_start_end_table.head(3)

#### CCXT

In [31]:
# TODO(Max): Fix the code, once the vendor universe will be unified.
# see CMTask985 - Fix compute_start_end_stats in CCXT-CDD comparison notebook.
compute_start_end_stats = lambda data: ramccsta.compute_start_end_stats(
    data, config_ccxt
)
ccxt_start_end_table = ramccsta.compute_stats_for_universe(
    list(cdd_and_ccxt_cleaned),
    config_ccxt,
    compute_start_end_stats,
)

Reading CCXT data for exchange id='binance', currencies='ETH_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/ETH_USDT.csv.gz'...
Removed 215 / 1615369 = 0.01% rows
Index length increased by 4735 = 1619889 - 1615154


AssertionError: 
################################################################################
* Failed assertion *
'None'
==
'T'
################################################################################


In [None]:
ccxt_start_end_table.head(3)

### Display the union results

In [None]:
def unify_start_end_tables(
    cdd_df: pd.DataFrame, ccxt_df: pd.DataFrame
) -> pd.DataFrame:
    """
    Combine 'CCXT' and 'CDD' start-end stats tables into one table.

    :param cdd_df: start-end table for 'CCXT'
    :param ccxt_df: start-end table for 'CDD'
    :return: unified start-end table
    """
    # Set Multiindex.
    cdd_df = cdd_df.set_index(["exchange_id", "currency_pair"])
    ccxt_df = ccxt_df.set_index(["exchange_id", "currency_pair"])
    # Add suffixes.
    ccxt_df = ccxt_df.add_suffix("_ccxt")
    cdd_df = cdd_df.add_suffix("_cdd")
    # Combine two universes.
    ccxt_and_cdd = pd.concat([cdd_df, ccxt_df], axis=1)
    # Sort columns.
    cols_to_sort = ccxt_and_cdd.columns.to_list()
    ccxt_and_cdd = ccxt_and_cdd[sorted(cols_to_sort)]
    return ccxt_and_cdd

In [None]:
union_cdd_ccxt_stats = unify_start_end_tables(
    cdd_start_end_table, ccxt_start_end_table
)
display(union_cdd_ccxt_stats)

## Comparison of full symbols that are included in 'CDD' but not available in 'CCXT'

In [None]:
# Set of full symbols that are included in `CDD` but not available in `CCXT` (cleaned from unavailable full symbols).
cdd_and_not_ccxt_cleaned = set(cdd_universe).difference(ccxt_universe)
len(cdd_and_not_ccxt_cleaned)

In [None]:
# For 'avg_data_points_per_day' the amount of "days_available" is equal to 0, so it crashes the calculations.
cdd_and_not_ccxt_cleaned.remove("binance::DAI_USDT")

In [None]:
compute_start_end_stats = lambda data: ramccsta.compute_start_end_stats(
    data, config_cdd
)

cdd_unique_start_end_table = ramccsta.compute_stats_for_universe(
    cdd_and_not_ccxt_cleaned, config_cdd, compute_start_end_stats
)

In [None]:
display(cdd_unique_start_end_table)