# Imports

In [1]:
import logging
import os

import pandas as pd

import core.config.config_ as cconconf
import helpers.hdbg as hdbg
import helpers.hprint as hprint
import helpers.hsql as hsql
import im_v2.ccxt.data.client as icdcl
import im_v2.im_lib_tasks as imvimlita

  from tqdm.autonotebook import tqdm


[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-88154955-7272-4aff-b8b6-e82108025a09.json'


In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

hprint.config_notebook()



  File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/venv/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/venv/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/venv/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
    self.io_loop.start()
  File "/venv/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
    self._run_once()
  File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
    handle._run()
  File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/ve



# Config

In [3]:
def get_cmtask1704_config_ccxt() -> cconconf.Config:
    """
    Get task232-specific config.
    """
    config = cconconf.Config()
    # Load parameters.
    config.add_subconfig("load")
    env_file = imvimlita.get_db_env_path("dev")
    connection_params = hsql.get_connection_info_from_env_file(env_file)
    config["load"]["connection"] = hsql.get_connection(*connection_params)
    config["load"]["aws_profile"] = "ck"
    config["load"]["data_dir_hist"] = os.path.join(
        "s3://cryptokaizen-data", "historical"
    )
    config["load"]["data_snapshot"] = "latest"
    config["load"]["partition_mode"] = "by_year_month"
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["vendor"] = "CCXT"
    config["data"]["start_date"] = pd.Timestamp("2022-04-01", tz="UTC")
    config["data"]["end_date"] = pd.Timestamp("2022-04-15", tz="UTC")
    return config

In [4]:
config = get_cmtask1704_config_ccxt()
print(config)

load:
  connection: <connection object; dsn: 'user=postgres password=xxx dbname=im_data_db host=dev-im-db.cpox8ul7pzan.eu-north-1.rds.amazonaws.com port=5432', closed: 0>
  aws_profile: ck
  data_dir_hist: s3://cryptokaizen-data/historical
  data_snapshot: latest
  partition_mode: by_year_month
data:
  vendor: CCXT
  start_date: 2022-04-01 00:00:00+00:00
  end_date: 2022-04-15 00:00:00+00:00


# Load the data

## Real-time

In [5]:
# Specify params.
vendor = config["data"]["vendor"]
resample_1min = True
connection = config["load"]["connection"]
# Initiate the client.
ccxt_rt_client = icdcl.CcxtCddDbClient(vendor, resample_1min, connection)

### Universe

In [6]:
# Specify the universe.
rt_universe = ccxt_rt_client.get_universe()
len(rt_universe)

38

In [7]:
# Choose cc for analysis.
full_symbols = rt_universe[0:2]
full_symbols

['binance::ADA_USDT', 'binance::AVAX_USDT']

### Data Loader

In [8]:
# Specify time period.
start_date = config["data"]["start_date"]
end_date = config["data"]["end_date"]

# Load the data.
data = ccxt_rt_client.read_data(full_symbols, start_date, end_date)
display(data.shape)
display(data.head(3))



(40322, 6)

Unnamed: 0_level_0,full_symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-01 00:00:00+00:00,binance::ADA_USDT,1.141,1.144,1.141,1.141,258996.4
2022-04-01 00:00:00+00:00,binance::AVAX_USDT,97.43,97.75,97.3,97.38,8115.09
2022-04-01 00:01:00+00:00,binance::ADA_USDT,1.14,1.142,1.137,1.138,96911.5


## Historical

In [9]:
# Specify params.
resample_1min = True
root_dir = config["load"]["data_dir_hist"]
partition_mode = config["load"]["partition_mode"]
data_snapshot = config["load"]["data_snapshot"]
aws_profile = config["load"]["aws_profile"]

# Initiate the client.
historical_client = icdcl.CcxtHistoricalPqByTileClient(
    resample_1min,
    root_dir,
    partition_mode,
    data_snapshot=data_snapshot,
    aws_profile=aws_profile,
)

### Universe

In [10]:
# Specify the universe.
historical_universe = historical_client.get_universe()
len(historical_universe)

38

In [11]:
# Choose cc for analysis.
full_symbols = historical_universe[0:2]
full_symbols

['binance::ADA_USDT', 'binance::AVAX_USDT']

### Data Loader

In [12]:
# Specify time period.
start_date = pd.Timestamp("2021-09-01", tz="UTC")
end_date = pd.Timestamp("2021-09-15", tz="UTC")

# Load the data.
data_hist = historical_client.read_data(full_symbols, start_date, end_date)
display(data_hist.shape)
display(data_hist.head(3))

(40322, 6)

Unnamed: 0_level_0,full_symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-01 00:00:00+00:00,binance::ADA_USDT,2.768,2.77,2.762,2.762,307378.2
2021-09-01 00:00:00+00:00,binance::AVAX_USDT,39.51,39.54,39.3,39.32,2483.93
2021-09-01 00:01:00+00:00,binance::ADA_USDT,2.763,2.765,2.761,2.764,74199.3
