In [21]:
import pandas as pd
import json
from tqdm.notebook import tqdm
import requests

In [29]:
map_symbols = dict()
prc_dgts = dict() 
sz_dgts = dict()

In [30]:
def gather_symbology(list_pairs):
    global map_symbols, prc_dgts, sz_dgts

    # Getting product details from https: // api.kraken.com / 0 / public / AssetPairs
    req = requests.get('https://api.kraken.com/0/public/AssetPairs')
    dict_products_api = req.json()['result']

    # First we transform dict_products from Kraken name to websocket name
    dict_products = dict()
    for key in dict_products_api:
        try:
            wsname = dict_products_api[key]['wsname']
        except KeyError:
            continue

        dict_products[wsname] = dict_products_api[key]
        dict_products[wsname]['api_key'] = key

    symbology = []
    for sec in list_pairs:
        pair_specs = dict_products[sec]
        wsname = pair_specs['wsname']  # websocket name
        n = len(symbology)
        map_symbols[wsname] = n

        prc_dgts[n] = int(pair_specs['pair_decimals'])
        sz_dgts[n] = int(pair_specs['lot_decimals'])

        symbology.append({'symbol': wsname, 'price_tick': 10**prc_dgts[n], 'qty_tick': 10**sz_dgts[n]})

    return symbology

In [33]:
def str_to_intfield(s, ndigits):
    assert ndigits >= 0

    x = s.find('.')
    if x == -1:
        return int(s) * int(10 ** ndigits)
    else:
        a = s.rstrip('0')
        n = len(a)
        assert n - x - 1 <= ndigits, f'{n - x},{ndigits}'
        return int(a.replace('.', '')) * int(10 ** (ndigits - n + x + 1))

In [55]:
pair = 'XBT/USD'
symbology = gather_symbology([pair])
symbology

[{'symbol': 'XBT/USD', 'price_tick': 10, 'qty_tick': 100000000}]

In [7]:
dir_name = './60min'

In [61]:
list_connector = []

with open(f'{dir_name}/connector_0.txt', 'r') as fin:
    for line in tqdm(fin):
        msg = json.loads(line)
        assert msg[-1] == pair and (msg[-2] == 'trade' or msg[-2] == 'book-100'), f'{msg}'
        list_connector.append(msg[1])

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [62]:
def book_from_snapshot(snapshot):
    global prc_dgts, sz_dgts
    
    assert 'as' in snapshot or 'bs' in snapshot, f'{snapshot}'
    book = {'ask':{}, 'bid': {}}
    
    for qt, quote in zip(['as','bs'], ['ask','bid']):
        if qt in snapshot:
            for msg in snapshot[qt]:
                prc = str_to_intfield(msg[0], prc_dgts[0])
                qty = str_to_intfield(msg[1], sz_dgts[0])
                
                book[quote][prc] = qty
    return book

In [63]:
snap = list_connector[0]
book = book_from_snapshot(snap)

**Given the snapshot I try to align it to the connector stream and crosscheck that the snapshot is correct**

In [41]:
import os

In [47]:
list_snapshots = sorted([x for x in os.listdir(f'{dir_name}') if 'snapshot' in x], 
                           key=lambda y: int(y.split('_')[-1].split('.')[0]))
list_snapshots

['snapshot_0.txt',
 'snapshot_1.txt',
 'snapshot_2.txt',
 'snapshot_3.txt',
 'snapshot_4.txt',
 'snapshot_5.txt',
 'snapshot_6.txt',
 'snapshot_7.txt',
 'snapshot_8.txt',
 'snapshot_9.txt',
 'snapshot_10.txt',
 'snapshot_11.txt']

In [57]:
for fname in list_snapshots:
    
    l_messages = []
    with open(f'{dir_name}/{fname}', 'r') as fin:
        snap = json.loads(fin.readline())[1]
        assert 'as' in snap or 'bs' in snap, f'{snap}'
        
        for line in fin:
            msg = json.loads(line)
            assert msg[-1] == pair and (msg[-2] == 'trade' or msg[-2] == 'book-100'), f'{msg}'
            l_messages.append(msg[1])


In [64]:
def align_snapshot(connector_stream, snapshot_messages):
    assert isinstance(connector_stream, list)
    assert isinstance(snapshot_messages, list)
    
    n_algnd = 0
    starting_el = 0
    
    for msg in snapshot_messages:
        for 

[{'a': [['11646.90000', '0.00000000', '1596666698.661190'],
   ['11706.00000', '0.17085000', '1596666669.832774', 'r']],
  'c': '4131924453'},
 {'a': [['11659.60000', '0.10348648', '1596666698.681813']],
  'c': '4131924453'},
 {'a': [['11661.90000', '0.72426246', '1596666698.686038']],
  'c': '4131924453'},
 {'a': [['11664.20000', '3.53657250', '1596666698.751976']],
  'c': '4131924453'},
 {'a': [['11664.20000', '4.03412406', '1596666698.767277']],
  'c': '4131924453'},
 {'b': [['11640.00000', '1.57942622', '1596666698.790302']],
  'c': '3435430246'},
 {'b': [['11639.60000', '0.10000000', '1596666698.800300']], 'c': '38027548'},
 {'a': [['11648.10000', '0.00000000', '1596666698.805125'],
   ['11704.20000', '1.06000000', '1596666647.223134', 'r']],
  'c': '1047363772'},
 {'a': [['11648.70000', '1.50000000', '1596666698.816706']], 'c': '716383003'},
 {'a': [['11648.00000', '0.00000000', '1596666698.852017'],
   ['11704.20000', '1.06000000', '1596666647.223134', 'r']],
  'c': '1533875763'

In [58]:
l_messages

[{'a': [['11646.90000', '0.00000000', '1596666698.661190'],
   ['11706.00000', '0.17085000', '1596666669.832774', 'r']],
  'c': '4131924453'},
 {'a': [['11659.60000', '0.10348648', '1596666698.681813']],
  'c': '4131924453'},
 {'a': [['11661.90000', '0.72426246', '1596666698.686038']],
  'c': '4131924453'},
 {'a': [['11664.20000', '3.53657250', '1596666698.751976']],
  'c': '4131924453'},
 {'a': [['11664.20000', '4.03412406', '1596666698.767277']],
  'c': '4131924453'},
 {'b': [['11640.00000', '1.57942622', '1596666698.790302']],
  'c': '3435430246'},
 {'b': [['11639.60000', '0.10000000', '1596666698.800300']], 'c': '38027548'},
 {'a': [['11648.10000', '0.00000000', '1596666698.805125'],
   ['11704.20000', '1.06000000', '1596666647.223134', 'r']],
  'c': '1047363772'},
 {'a': [['11648.70000', '1.50000000', '1596666698.816706']], 'c': '716383003'},
 {'a': [['11648.00000', '0.00000000', '1596666698.852017'],
   ['11704.20000', '1.06000000', '1596666647.223134', 'r']],
  'c': '1533875763'