## Imports

In [1]:
import logging

import pandas as pd

import helpers.hdatetime as hdateti
import helpers.hdbg as hdbg
import helpers.hprint as hprint
import im_v2.ccxt.data.client.ccxt_clients as imvcdccccl
import im_v2.talos.data.client.talos_clients as imvtdctacl
import im_v2.talos.data.extract.exchange_class as imvtdeexcl

In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-7a92dd50-cb00-4cc1-ae02-cc5679c0d2f3.json'


## Functions

In [3]:
def get_data_from_talos_db(start_time, end_time):
    # Set start and end dates.
    start_timestamp = pd.Timestamp(start_time)
    end_timestamp = pd.Timestamp(end_time)

    # Load the data.
    df = talos_extract.download_ohlcv_data(
        currency_pair="BTC-USD",
        exchange="binance",
        start_timestamp=start_timestamp,
        end_timestamp=end_timestamp,
        bar_per_iteration=100,
    )
    df["timestamp"] = df["timestamp"].apply(
        lambda x: hdateti.convert_unix_epoch_to_timestamp(x)
    )
    return df


def get_data_from_ccxt_client(start_time, end_time):
    # Specify the params.
    full_symbol_binance = "binance::BTC_USDT"
    start_time = pd.to_datetime(start_time)
    end_time = pd.to_datetime(end_time)
    df = ccxt_client._read_data_for_one_symbol(
        full_symbol_binance, start_time, end_time
    )
    return df


def get_data_from_talos_client(start_time, end_time):
    # Specify the params.
    full_symbol_binance = "binance::BTC_USDT"
    start_time = pd.to_datetime(start_time)
    end_time = pd.to_datetime(end_time)
    # Load the data.
    df = talos_client._read_data_for_one_symbol(
        full_symbol_binance, start_time, end_time
    )
    return df

# Talos DB

In [4]:
# Initialize extractor.
talos_extract = imvtdeexcl.TalosExchange("sandbox")

In [5]:
data_talos_db = get_data_from_talos_db(
    "2022-01-01T10:00:24.000000Z", "2022-01-01T10:08:00.000000Z"
)
display(data_talos_db.head(3))
display(data_talos_db.tail(3))

Unnamed: 0,timestamp,open,high,low,close,volume,ticks,end_download_timestamp
0,2022-01-01 10:01:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00
1,2022-01-01 10:02:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00
2,2022-01-01 10:03:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00


Unnamed: 0,timestamp,open,high,low,close,volume,ticks,end_download_timestamp
4,2022-01-01 10:05:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00
5,2022-01-01 10:06:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00
6,2022-01-01 10:07:00+00:00,0,0,0,0,0,0,2022-03-19 16:39:56.363467+00:00


### Talos query summary

Beginning
- If proposing query for __a complete minute__ (e.g., __10:00:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:00:00__).
- If proposing query for __an incomplete minute__ (e.g., __10:00:36 or 10:00:24__) - it starts with __mentioned timestamp + 1min__ (i.e., __10:01:00__).
   - Since the ohlcv output is blank (equal to zero), it's hard to understand whether volume or prices data changes during incomplete minute query.
   
End
- If proposing query for __a complete minute__ (e.g., __10:07:00__) - it starts with __exactly mentioned timestamp - 1min__ (i.e., __10:06:00__).
- If proposing query for __an incomplete minute__ (e.g., __10:07:36 or 10:07:24__) - it starts with __exactly mentioned timestamp__ (i.e., __10:07:00__).
- If proposing query for __previous minute + 1min__ (e.g., __10:08:00__) - it starts with __exactly mentioned timestamp - 1min__ (i.e., __10:07:00__).   

# Current CCXT client

In [6]:
# Specify the params.
vendor = "CCXT"
root_dir = "s3://alphamatic-data/data"
extension = "csv.gz"
aws_profile_ccxt = "am"
# Initialize CCXT client.
ccxt_client = imvcdccccl.CcxtCddCsvParquetByAssetClient(
    vendor, root_dir, extension, aws_profile=aws_profile_ccxt
)

In [7]:
data_ccxt_client = get_data_from_ccxt_client(
    "2020-01-01 10:00:02", "2020-01-01 10:08:00"
)

INFO  Reading data for vendor=`CCXT`, exchange id='binance', currencies='BTC_USDT' from file='s3://alphamatic-data/data/ccxt/20210924/binance/BTC_USDT.csv.gz'...


In [8]:
display(data_ccxt_client.head(3))
display(data_ccxt_client.tail(3))

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 10:01:00+00:00,7187.12,7190.61,7185.93,7188.86,14.830744
2020-01-01 10:02:00+00:00,7190.41,7191.88,7188.1,7191.47,10.590978
2020-01-01 10:03:00+00:00,7191.92,7195.66,7191.59,7194.09,9.57639


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 10:06:00+00:00,7194.0,7194.99,7193.63,7194.98,8.034727
2020-01-01 10:07:00+00:00,7194.99,7196.27,7194.44,7196.17,15.91628
2020-01-01 10:08:00+00:00,7196.2,7198.08,7195.63,7197.45,7.849214


### Current CCXT client summary

Beginning
- If proposing query for __a complete minute__ (e.g., __10:00:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:00:00+00:00__).
- If proposing query for __an incomplete minute__ (e.g., __10:00:36 or 10:00:24__) - it starts with __mentioned timestamp + 1min__ (i.e., __10:01:00__).
   - - Since the ohlcv output is available, one can check through volume or prices data that changing the query within a minute (e.g., 10:00:02 or 10:00:45) doesn't affect the numbers, so it means that the timestamp indicates the end of time period.
   
End
- If proposing query for __a complete minute__ (e.g., __10:07:00__) - it starts with __exactly mentioned timestamp__ (i.e., 10:07:00).
- If proposing query for __an incomplete minute__ (e.g., __10:07:36 or 10:07:24__) - it starts with __exactly mentioned timestamp__ (i.e., __10:07:00__).
- If proposing query for __previous minute + 1min__ (e.g., __10:08:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:08:00__).   

# Current implemented Talos client

In [9]:
# Initialize Talos client.
root_dir_talos = "s3://cryptokaizen-data/historical"
aws_profile_talos = "ck"
talos_client = imvtdctacl.TalosParquetByTileClient(
    root_dir_talos, aws_profile=aws_profile_talos
)

In [10]:
def get_data_from_talos_client(start_time, end_time):
    # Specify the params.
    full_symbol_binance = "binance::BTC_USDT"
    start_time = pd.to_datetime(start_time)
    end_time = pd.to_datetime(end_time)
    # Load the data.
    df = talos_client._read_data_for_one_symbol(
        full_symbol_binance, start_time, end_time
    )
    return df

In [11]:
data_talos_client = get_data_from_talos_client(
    "2022-01-01 10:00:00", "2022-01-01 10:07:45"
)
display(data_talos_client.head(3))
display(data_talos_client.tail(3))

INFO  Reading data for `Talos`, exchange id='binance', currencies='BTC_USDT'...


Unnamed: 0,open,high,low,close,volume
2022-01-01 10:00:00+00:00,47143.98,47222.22,47135.53,47215.05,14.66732
2022-01-01 10:01:00+00:00,47215.05,47260.0,47175.25,47232.95,26.69466
2022-01-01 10:02:00+00:00,47232.95,47252.45,47213.43,47233.58,5.24489


Unnamed: 0,open,high,low,close,volume
2022-01-01 10:05:00+00:00,47221.55,47221.55,47173.78,47178.01,4.7045
2022-01-01 10:06:00+00:00,47178.01,47184.39,47126.53,47168.61,8.06666
2022-01-01 10:07:00+00:00,47168.61,47189.46,47151.22,47180.7,7.32318


### Talos client summary

Beginning
- If proposing query for __a complete minute__ (e.g., __10:00:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:00:00__).
- If proposing query for __an incomplete minute__ (e.g., __10:00:36 or 10:00:24__) - it starts with __mentioned timestamp + 1min__ (i.e., __10:01:00__).
   - Since the ohlcv output is available, one can check through volume or prices data that changing the query within a minute (e.g., 10:00:02 or 10:00:45) doesn't affect the numbers, so it means that the timestamp indicates end of time period.

End
- If proposing query for __a complete minute__ (e.g., __10:07:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:07:00__).
- If proposing query for __an incomplete minute__ (e.g., __10:07:36 or 10:07:24__) - it starts with __exactly mentioned timestamp__ (i.e., __10:07:00__).
- If proposing query for __previous minute +1min__ (e.g., __10:08:00__) - it starts with __exactly mentioned timestamp__ (i.e., __10:08:00__).