In [2]:
%load_ext autoreload
%autoreload 2

import requests
import msgpack
import zlib
import traceback
import time
import os
import boto3
from concurrent.futures import ThreadPoolExecutor 
import concurrent.futures 
import datetime 
import pandas as pd 
from tqdm.notebook import tqdm
import numpy as np 
import pickle 
from src.model import OrderBookChunksCollection, OrderBook, S3OrderBookDataSource, OrderBooksDataSequenceBase, ACCEPTABLE_PRICE_DIFF


In [3]:
order_book_col = OrderBookChunksCollection(bucket_name="btc-order-book")

all_keys = order_book_col.get_all_keys(
    start_date = datetime.datetime(2019, 10, 17), 
    end_date = datetime.datetime(2021,12, 30)
)
import random 
random.shuffle(all_keys)

In [4]:
s3_client = boto3.client("s3")
source = S3OrderBookDataSource(bucket_name="btc-order-book", s3_client=s3_client)
order_books_sample = source.get_all_order_books(keys=all_keys[::300])

In [84]:
amounts_usds = []
price_diff = []
for order_book in order_books_sample:
    for bid in order_book.bids:        
        if 1 - bid.price / order_book.current_price <= ACCEPTABLE_PRICE_DIFF:
            price_diff.append(1 - bid.price / order_book.current_price)                
            amounts_usds.append(bid.amount_usd)
    for ask in order_book.asks:
        
        if ask.price / order_book.current_price - 1.0 <= ACCEPTABLE_PRICE_DIFF:
            price_diff.append(ask.price / order_book.current_price - 1.0)
            amounts_usds.append(ask.amount_usd)
    


In [35]:
s = pd.Series( + [0, np.inf])

In [93]:
def glue(interval_a, interval_b):
    tuples_a = interval_a.to_tuples().to_list()
    tuples_b = interval_b.to_tuples().to_list()    
    return pd.IntervalIndex.from_tuples(
        tuples_a[:-1] + [(tuples_a[-1][0], tuples_b[0][1])] +  tuples_b[1:]        
    )
        
def get_bins(a, r, prepend_with= None, append_with=None):
    intervals = []
    for t_min, t_max, n in r:                
        s = pd.Series(a + [t_min, t_max])
        s_cut = pd.qcut(s[(s < t_max) & (s >= t_min)] , q=n) 
        intervals.append(s_cut.cat.categories)
    if prepend_with:
        interval = glue(pd.IntervalIndex.from_tuples(prepend_with), intervals[0])        
    else:
        interval = intervals[0]
        
    for i in range(1, len(intervals)):        
        interval = glue(interval, intervals[i])
    
    if append_with:
        interval = glue(interval, pd.IntervalIndex.from_tuples(append_with))
    
    return interval             
    

In [97]:
price_diff_bins = get_bins(
    price_diff, 
    [(0, 0.01, 180), (0.01, 0.02, 53), (0.02, 0.03, 22)], 
    prepend_with=[(-np.Inf, -0.05), (-0.05, -0.02), (-0.02, -0.01), (-0.01, 0)],
    append_with=[(0.03, np.Inf)]
)

In [None]:
amount_usd_indices = dict(zip(s.cat.categories, range(256)))
amount_best_bins = s.cat.categories

In [99]:
import pickle 
pickle.dump(file=open("data/amount_usd_best_bins_256_001.bin", "wb"), obj=amount_best_bins)
pickle.dump(file=open("data/price_diff_best_bins_256_003_handcrafted_including_negative.bin", "wb"), obj=price_diff_bins)