# Description

This notebook is used to find currencies that are exactly the same in our universe.

# Imports

In [None]:
import logging
import os

import seaborn as sns

import core.config.config_ as cconconf
import core.plotting as coplotti
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint
import helpers.hs3 as hs3
import im_v2.common.universe as ivcu
import research_amp.cc.statistics as ramccsta

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

AM_AWS_PROFILE = "am"

# Config

In [None]:
def get_config() -> cconconf.Config:
    """
    Get config that controls parameters.
    """
    config = cconconf.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = AM_AWS_PROFILE
    config["load"]["data_dir"] = os.path.join(
        hs3.get_s3_bucket_path(AM_AWS_PROFILE), "data"
    )
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["universe_version"] = "v03"
    config["data"]["data_type"] = "OHLCV"
    config["data"]["vendor"] = "CCXT"
    config["data"]["price_column"] = "close"
    return config


config = get_config()
print(config)

# Get price data for a given universe

In [None]:
vendor_universe = ivcu.get_vendor_universe(
    config["data"]["vendor"],
    version=config["data"]["universe_version"],
    as_full_symbol=True,
)
vendor_universe

In [None]:
df_price = ramccsta.get_universe_price_data(vendor_universe, config)
df_price.head(3)

In [None]:
df_price.describe().round(2)

In [None]:
df_price.head()

# Find same currencies

In [None]:
df_returns = df_price.pct_change()
df_returns.head(3)

In [None]:
corr_matrix = df_returns.corr()
_ = coplotti.plot_heatmap(corr_matrix)

`cluster_and_select()` distinguishes clusters but some very highly correlated stable coins are clustered together so it seems like that we cannot rely on dendrodram and clustering alone.

In [None]:
_ = coplotti.cluster_and_select(df_returns, 11)

In [None]:
_ = sns.clustermap(corr_matrix, figsize=(20, 20))

In [None]:
# Display top 10 most correlated series for each currency pair.
for colname in corr_matrix.columns:
    corr_srs = corr_matrix[colname]
    corr_srs_sorted = corr_srs.sort_values(ascending=False)
    display(corr_srs_sorted.head(10))

# Calculations on data resampled to 1 day

In [None]:
df_price_1day = df_price.resample("D", closed="right", label="right").mean()
df_price_1day.head(3)

In [None]:
df_returns_1day = df_price_1day.pct_change()
df_returns_1day.head(3)

In [None]:
corr_matrix_1day = df_returns_1day.corr()
_ = coplotti.plot_heatmap(corr_matrix_1day)

Resampling to 1 day makes clusters much more visible. <br>
If we take a look at correlation numbers, we can see that equal currencies on different exchanges have a correlation above ~0.94 while different currencies correlate at much less rate.

Therefore, it seems that for detecting similar currencies we'd better use 1 day frequency.

In [None]:
_ = coplotti.cluster_and_select(df_returns_1day, 11)

In [None]:
_ = sns.clustermap(corr_matrix_1day, figsize=(20, 20))

In [None]:
# Display top 10 most correlated series for each currency pair.
for colname in corr_matrix_1day.columns:
    corr_srs = corr_matrix_1day[colname]
    corr_srs_sorted = corr_srs.sort_values(ascending=False)
    display(corr_srs_sorted.head(10))