# Digest minute crypto data (prices) and coinmarketcap (market)

- https://www.bitfinex.com/
- https://medium.com/coinmonks/how-to-get-historical-crypto-currency-data-954062d40d2d
- https://www.kaggle.com/tencars/392-crypto-currency-pairs-at-minute-resolution


In [29]:
from glob import glob

from tqdm import tqdm

from utils import reduce_footprint
%matplotlib inline

from pathlib import Path
import pandas as pd

idx = pd.IndexSlice

In [30]:

# https://stackoverflow.com/questions/16466670/fill-nan-in-candlestick-ohlcv-data
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
def nans_to_prev_close_method1(df):
    df['volume'] = df['volume'].fillna(0.0)  # volume should always be 0 (if there were no trades in this interval)
    df['close'] = df.fillna(method='pad')  # ie pull the last close into this close
    # now copy the close that was pulled down from the last timestep into this row, across into o/h/l
    df['open'] = df['open'].fillna(df['close']) 
    df['low'] = df['low'].fillna(df['close'])
    df['high'] = df['high'].fillna(df['close'])
    # print(df.head(10))
    return df

# 
def resample_df_ohlcv(df, timeframe='1Min'):
    # Created a dictionary to tell Pandas how to re-sample, if this isn't in place it will re-sample each column separately
    ohlcv_dict = {'open':'first', 'high':'max', 'low':'min', 'close': 'last', 'volume': 'sum'}

    # Resample to 15Min (this format is needed) as per ohlc_dict, then remove any line with a NaN
    df = df.resample(timeframe).agg(ohlcv_dict)
    df = nans_to_prev_close_method1(df)
    # Resample mixes the columns so lets re-arrange them
    cols=['open', 'high', 'low', 'close', 'volume']  
    df = df[cols]
    return df




In [31]:
data_path ='../data/crypto/minute/'

cryptos = {
            'btcusd':  {'symbol': 'btc', 'base': 'usd', },
            'btceur':  {'symbol': 'btc', 'base': 'eur', },
            'btcgbp':  {'symbol': 'btc', 'base': 'gbp', },
            'btcjpy':  {'symbol': 'btc', 'base': 'jpy', },
            'btcxch':  {'symbol': 'btc', 'base': 'xch', },
            'ethusd':  {'symbol': 'eth', 'base': 'usd', },
            'ethust':  {'symbol': 'eth', 'base': 'ust', },
            'etheur':  {'symbol': 'eth', 'base': 'eur', },
            'ethbtc':  {'symbol': 'eth', 'base': 'btc', },
            'ethjpy':  {'symbol': 'eth', 'base': 'jpy', },
            'ethgbp':  {'symbol': 'eth', 'base': 'gbp', },
            # Binance Coin
            'xrpusd':  {'symbol': 'xrp', 'base': 'usd', },
            'xrpust':  {'symbol': 'xrp', 'base': 'ust', },
            'xrpbtc':  {'symbol': 'xrp', 'base': 'btc', },
            'ustusd':  {'symbol': 'usdt', 'base': 'usd', },
            #'ustbtc':  {'symbol': 'usdt', 'base': 'btc', }, #notfound
            'adausd':  {'symbol': 'ada', 'base': 'usd', }, # Cardano
            'adaust':  {'symbol': 'ada', 'base': 'ust', }, # Cardano
            'adabtc':  {'symbol': 'ada', 'base': 'btc', }, # Cardano
            'dogusd':  {'symbol': 'dog', 'base': 'usd', 'name': 'megadoge'},
            'dogust':  {'symbol': 'dog', 'base': 'ust', 'name': 'megadoge'},
            'dogbtc':  {'symbol': 'dog', 'base': 'btc', 'name': 'megadoge'},
            'dogeusd':  {'symbol': 'doge', 'base': 'usd', 'name': 'doge'},
            'dogeust':  {'symbol': 'doge', 'base': 'ust', 'name': 'doge'},
            'dotusd':  {'symbol': 'dot', 'base': 'usd', 'name': 'polkadot'}, # Polkadot
            'dotust':  {'symbol': 'dot', 'base': 'ust', 'name': 'polkadot' }, # Polkadot
            'dotbtc':  {'symbol': 'dot', 'base': 'btc', 'name': 'polkadot' }, # Polkadot
            'uniust':  {'symbol': 'uni', 'base': 'ust', 'name': 'Uniswap'},
            'uniusd':  {'symbol': 'uni', 'base': 'usd', 'name': 'Uniswap'},
            # 'unibtc':  {'symbol': 'uni', 'base': 'btc', },#notfound
            'ltcusd':  {'symbol': 'ltc', 'base': 'usd', },
            'ltcust':  {'symbol': 'ltc', 'base': 'ust', },
            'ltcbtc':  {'symbol': 'ltc', 'base': 'btc', },
            'bchnusd':  {'symbol': 'bch', 'base': 'usd', }, # Bitcoin Cash ? bch = bchn?
            # 'bchnbtc':  {'symbol': 'bch', 'base': 'btc', }, # Bitcoin Cash ? bch = bchn? #notfound
            'linkusd':  {'symbol': 'link', 'base': 'usd', }, # Chainlink
            'linkust':  {'symbol': 'link', 'base': 'ust', }, # Chainlink
            # 'linkbtc':  {'symbol': 'link', 'base': 'btc', }, # Chainlink #notfound
            'udcusd':  {'symbol': 'usdc', 'base': 'usd', },   # USDC
            # 'udcbtc':  {'symbol': 'usdc', 'base': 'btc', },   # USDC
            'vetusd':  {'symbol': 'vet', 'base': 'usd', }, # VeChain
            'vetbtc':  {'symbol': 'vet', 'base': 'btc', }, # VeChain
            'solust':  {'symbol': 'sol', 'base': 'ust',}, # Solana
            'solusd':  {'symbol': 'sol', 'base': 'usd',}, # Solana
            # 'solbtc':  {'symbol': 'sol', 'base': 'btc',}, # Solana #notfound
            'xlmusd':  {'symbol': 'xlm', 'base': 'usd',}, # Stellar
            'xlmust':  {'symbol': 'xlm', 'base': 'ust',}, # Stellar
            'xlmeth':  {'symbol': 'xlm', 'base': 'eth',}, # Stellar
            'xlmbtc':  {'symbol': 'xlm', 'base': 'btc',}, # Stellar
            # 'theta',
            'filusd':  {'symbol': 'fil', 'base': 'usd',}, # filecoin
            # 'filbtc':  {'symbol': 'fil', 'base': 'btc',}, # filecoin
            'okbusd': {'symbol': 'okb', 'base': 'usd'},
            'okbust': {'symbol': 'okb', 'base': 'ust'},
            # 'okbbtc': {'symbol': 'okb', 'base': 'btc'}, #notfound
            'wbtusd': {'symbol': 'wbtc', 'base': 'usd'},
            'wbteth': {'symbol': 'wbtc', 'base': 'eth'}, #notfound

            'trxusd':  {'symbol': 'trx', 'base': 'usd',}, # Tron
            'trxeth':  {'symbol': 'trx', 'base': 'eth',}, # Tron
            'trxeur':  {'symbol': 'trx', 'base': 'eur',}, # Tron
            'trxbtc':  {'symbol': 'trx', 'base': 'btc',}, # Tron
            # Binance USD busd
            
            'bsvbtc': {'symbol': 'bsv', 'base': 'btc'},

            'xmrusd':  {'symbol': 'xmr', 'base': 'usd',}, # Monero
            'xmrust':  {'symbol': 'xmr', 'base': 'ust',}, # Monero
            'xmrbtc':  {'symbol': 'xmr', 'base': 'btc',}, # Monero
            'lunausd':  {'symbol': 'luna', 'base': 'usd',}, # Luna
            'lunaust':  {'symbol': 'luna', 'base': 'ust',}, # Luna
            # 'lunabtc':  {'symbol': 'luna', 'base': 'btc',}, # Luna #notfound
            'neousd':  {'symbol': 'neo', 'base': 'usd',}, #
            'neoeur':  {'symbol': 'neo', 'base': 'eur',}, #
            'neobtc':  {'symbol': 'neo', 'base': 'btc',}, #
            # Klay
            'iotusd':  {'symbol': 'miota', 'base': 'usd',}, # iota 
            'iotbtc':  {'symbol': 'miota', 'base': 'btc',}, # iota
            'iotgbp':  {'symbol': 'miota', 'base': 'gbp',}, # iota
            'atousd':  {'symbol': 'atom', 'base': 'usd',}, # cosmos
            'atobtc':  {'symbol': 'atom', 'base': 'btc',}, # cosmos
            'atoeth':  {'symbol': 'atom', 'base': 'eth',}, # cosmos
            # 'cakusd':  {'symbol': 'cake', 'base': 'usd',}, # pancake
            'aaveusd':  {'symbol': 'aave', 'base': 'usd',}, # aave
            'aaveust':  {'symbol': 'aave', 'base': 'ust',}, # aave
            #notfound 'aavebtc':  {'symbol': 'aave', 'base': 'btc',}, # aave
            'etcbtc': {'symbol': 'etc', 'base': 'btc'},
            'etcusd': {'symbol': 'etc', 'base': 'usd'},
            # 'maticusd': {'symbol': '...', 'base': 'usd'},
            # ht
            'fttusd': {'symbol': 'ftt', 'base': 'usd'},
            #notfound'fttbtc': {'symbol': 'ftt', 'base': 'btc'},
            # cro
            'bttusd': {'symbol': 'btt', 'base': 'usd'},
            #notfound 'bttbtc': {'symbol': 'btt', 'base': 'btc'},
            # cusd
            'mkrusd': {'symbol': 'mkr', 'base': 'usd'},
            'mkreth': {'symbol': 'mkr', 'base': 'eth'},
            'mkrbtc': {'symbol': 'mkr', 'base': 'btc'},
            # 'mkreth': {'symbol': 'mkr', 'base': 'eth'},
            # 'mkrbtc': {'symbol': 'mkr', 'base': 'btc'},
            'xtzusd': {'symbol': 'xtz', 'base': 'usd'},
            'xtzbtc': {'symbol': 'xtz', 'base': 'btc'},
            # 'xtzbtc': {'symbol': 'xtz', 'base': 'btc'},
            # ceth
            'algusd': {'symbol': 'algo', 'base': 'usd'},
            'algbtc': {'symbol': 'algo', 'base': 'btc'},
            'algust': {'symbol': 'algo', 'base': 'ust'},
            'avaxusd': {'symbol': 'avax', 'base': 'usd'},
            'avaxust': {'symbol': 'avax', 'base': 'ust'},
            # notfound 'avaxbtc': {'symbol': 'avax', 'base': 'btc'},
            'ksmusd': {'symbol': 'ksm', 'base': 'usd'},
            # notfound 'ksmbtc': {'symbol': 'ksm', 'base': 'btc'},
            'daiusd': {'symbol': 'dai', 'base': 'usd'},
            'daibtc': {'symbol': 'dai', 'base': 'btc'},
            'daieth': {'symbol': 'dai', 'base': 'eth',},

            # cdai
            # dash
            'dshusd': {'symbol': 'dash', 'base': 'usd'},
            'dshbtc': {'symbol': 'dash', 'base': 'btc'},

            # notfound 'xemusd': {'symbol': 'xem', 'base': 'usd'},
            # notfound 'xembtc': {'symbol': 'xem', 'base': 'btc'},
            'zecbtc': {'symbol': 'zec', 'base': 'btc'}, # Zcash
            'zecusd': {'symbol': 'zec', 'base': 'usd'}, # Zcash
            'paxusd': {'symbol': 'pax', 'base': 'usd'}, # https://www.paxos.com/pax/
            'b21xusd': {'symbol': 'b21x', 'base': 'usd'}, # https://www.tradingview.com/symbols/B21XUST/
            'b21xust': {'symbol': 'b21x', 'base': 'ust'}, # https://www.tradingview.com/symbols/B21XUST/
            'qtmusd': {'symbol': 'qtum', 'base': 'usd'},
            '1inchusd': {'symbol': '1inch', 'base': 'usd'},
            '1inchust': {'symbol': '1inch', 'base': 'ust'},
            'mtnusd': {'symbol': 'mtn', 'base': 'usd'},
            # notfound 'mtnbtc': {'symbol': 'mtn', 'base': 'btc'},
            'manusd': {'symbol': 'man', 'base': 'usd'},
            # notfound 'manbtc': {'symbol': 'man', 'base': 'btc'},
            'albtusd': {'symbol': 'albt', 'base': 'usd'}, # AllianceBlock
            'albtust': {'symbol': 'albt', 'base': 'ust'}, # AllianceBlock
            'ampbtc': {'symbol': 'amp', 'base': 'btc'},
            'ampusd': {'symbol': 'amp', 'base': 'usd'},
            'ampust': {'symbol': 'amp', 'base': 'ust'},
            'antusd': {'symbol': 'ant', 'base': 'usd'}, # aragon usd
            'antbtc': {'symbol': 'ant', 'base': 'btc'}, # aragon usd
            'anteth': {'symbol': 'ant', 'base': 'eth'}, # aragon usd
            'astusd': {'symbol': 'ast', 'base': 'usd'}, # airswap
            'avtusd': {'symbol': 'avt', 'base': 'usd'}, # Aventus
            'balusd': {'symbol': 'bal', 'base': 'usd'}, # Balancer
            'balust': {'symbol': 'bal', 'base': 'ust'}, # Balancer
            'bandusd': {'symbol': 'band', 'base': 'usd'}, # band protocol (BAND)
            'bandust': {'symbol': 'band', 'base': 'ust'}, # band protocol (BAND)
            'batbtc': {'symbol': 'bat', 'base': 'btc'}, # BasicAttentionToken
            'batusd': {'symbol': 'bat', 'base': 'usd'}, # BasicAttentionToken
            'bateth': {'symbol': 'bat', 'base': 'eth'}, # BasicAttentionToken
            'bchabcusd': {'symbol': 'bcha', 'base': 'usd'}, # bitcoin cash abc https://es.tradingview.com/chart/BCHABUSD/SCPaSBCD-BCHABUSD-Bitcoin-Cash-ABC/
            'bestusd': {'symbol': 'best', 'base': 'usd'}, # Bitpanda Ecosystem Token
            'bftusd': {'symbol': 'bft', 'base': 'usd'}, # BnkToTheFuture
            'bmiusd': {'symbol': 'bmi', 'base': 'usd'}, # Bridge Mutual
            'bmiust': {'symbol': 'bmi', 'base': 'ust'}, # Bridge Mutual
            'bntusd': {'symbol': 'bnt', 'base': 'usd'}, # bancor
            'bosonusd': {'symbol': 'boson', 'base': 'usd'}, # Boson Protocol
            'bosonust': {'symbol': 'boson', 'base': 'ust'}, # Boson Protocol
            'boxusd': {'symbol': 'box', 'base': 'usd'}, # Contentbox
            'bsvusd': {'symbol': 'bsv', 'base': 'usd'}, # bitcoin satoshi vision
            'btgbtc': {'symbol': 'btg', 'base': 'btc'}, #bitcoin gold
            'btgusd': {'symbol': 'btg', 'base': 'usd'}, #bitcoin gold
            'btseusd': {'symbol': 'btse', 'base': 'usd'}, # btse
            'celusd': {'symbol': 'cel', 'base': 'usd'}, # ?
            'celust': {'symbol': 'cel', 'base': 'ust'}, # ?
            'chzusd': {'symbol': 'chz', 'base': 'usd'}, # chiliz
            'chzust': {'symbol': 'chz', 'base': 'ust'}, # chiliz
            'clousd': {'symbol': 'clo', 'base': 'usd'}, # calisto network
            'cndusd': {'symbol': 'cnd', 'base': 'usd'}, # cindicator
            'compusd': {'symbol': 'comp', 'base': 'usd'}, # compound
            'compust': {'symbol': 'comp', 'base': 'ust'}, # compound
            'ctkusd': {'symbol': 'ctk', 'base': 'usd', 'name:': 'certik'},
            'ctkust': {'symbol': 'ctk', 'base': 'ust', 'name:': 'certik'},
            'ctxusd': {'symbol': 'ctx', 'base': 'usd', 'name': 'cortex'},
            'dappusd': {'symbol': 'dapp', 'base': 'usd', 'name': 'liquidapps'},
            'dappust': {'symbol': 'dapp', 'base': 'ust', 'name': 'liquidapps'},
            'datusd': {'symbol': 'dat', 'base': 'usd', 'name': 'streamr'},
            'datbtc': {'symbol': 'dat', 'base': 'btc', 'name': 'streamr'},
            'dgbusd': {'symbol': 'dgb', 'base': 'usd', 'name': 'digibyte'},
            'dgxusd': {'symbol': 'dgx', 'base': 'usd', 'name': ''},
            'drnusd': {'symbol': 'drn', 'base': 'usd', 'name': 'Dragonchain'},
            'dtausd': {'symbol': 'dta', 'base': 'usd', 'name': 'data'},
            'dtxusd': {'symbol': 'dtx', 'base': 'usd', 'name': 'dragon token'},
            'duskbtc': {'symbol': 'dusk', 'base': 'btc', 'name': 'Dusk Network'},
            'duskusd': {'symbol': 'dusk', 'base': 'usd', 'name': 'Dusk Network'},
            'edobtc': {'symbol': 'edo', 'base': 'btc', 'name': ''},
            'edoeth': {'symbol': 'edo', 'base': 'eth', 'name': ''},
            'edousd': {'symbol': 'edo', 'base': 'usd', 'name': ''},
            'egldusd': {'symbol': 'egld', 'base': 'usd', 'name': 'Elrond'},
            'egldust': {'symbol': 'egld', 'base': 'ust', 'name': 'Elrond'},
            'enjusd': {'symbol': 'enj', 'base': 'usd', 'name': 'enjin coin'},
            'eosdtusd': {'symbol': 'eosdt', 'base': 'usd', 'name': 'eosdt'},
            'eosdtust': {'symbol': 'eosdt', 'base': 'ust', 'name': 'eosdt'},
            'eosbtc': {'symbol': 'eos', 'base': 'btc', 'name': 'eos'},
            'eoseth': {'symbol': 'eos', 'base': 'eth', 'name': 'eos'},
            'eoseur': {'symbol': 'eos', 'base': 'eur', 'name': 'eos'},
            'eosgbp': {'symbol': 'eos', 'base': 'gbp', 'name': 'eos'},
            'eosjpy': {'symbol': 'eos', 'base': 'jpy', 'name': 'eos'},
            'eosusd': {'symbol': 'eos', 'base': 'usd', 'name': 'eos'},
            'eosust': {'symbol': 'eos', 'base': 'ust', 'name': 'eos'},
            'essusd': {'symbol': 'ess', 'base': 'usd', 'name': 'essentia'},
            'etpusd': {'symbol': 'etp', 'base': 'usd', 'name': 'Metaverse ETP'},
            'etpbtc': {'symbol': 'etp', 'base': 'btc', 'name': 'Metaverse ETP'},
            'etpeth': {'symbol': 'etp', 'base': 'eth', 'name': 'Metaverse ETP'},
            'eususd': {'symbol': 'eus', 'base': 'usd', 'name': 'Stasis euro'},
            'eutusd': {'symbol': 'eut', 'base': 'usd', 'name': ''},
            'eutust': {'symbol': 'eut', 'base': 'ust', 'name': ''},
            'euteur': {'symbol': 'eut', 'base': 'eur', 'name': ''},
            'exrdusd': {'symbol': 'exrd', 'base': 'usd', 'name': 'Radix'},
            'exrdbtc': {'symbol': 'exrd', 'base': 'btc', 'name': 'Radix'},
            'fetusd': {'symbol': 'fet', 'base': 'usd', 'name': 'fetch.ai'},
            'fetust': {'symbol': 'fet', 'base': 'ust', 'name': 'fetch.ai'},
            'filust': {'symbol': 'fil', 'base': 'ust', 'name': ''},
            'fttust': {'symbol': 'ftt', 'base': 'ust', 'name': 'FTX Token'},
            'funusd': {'symbol': 'fun', 'base': 'usd', 'name': 'FunToken'},
            'genusd': {'symbol': 'gen', 'base': 'usd', 'name': 'DAOstack'},
            'gnousd': {'symbol': 'gno', 'base': 'usd', 'name': 'Gnosis'},
            'gntbtc': {'symbol': 'gnt', 'base': 'btc', 'name': 'Golem'},
            'gnteth': {'symbol': 'gnt', 'base': 'eth', 'name': 'Golem'},
            'gotusd': {'symbol': 'got', 'base': 'usd', 'name': 'ParkinGo'},
            'goteur': {'symbol': 'got', 'base': 'eur', 'name': 'ParkinGo'},
            'gtxusd': {'symbol': 'gtx', 'base': 'usd', 'name': 'GateToken'},
            'gtxust': {'symbol': 'gtx', 'base': 'ust', 'name': 'GateToken'},
            'hezusd': {'symbol': 'hez', 'base': 'usd', 'name': 'Hermez Network'},
            'hezust': {'symbol': 'hez', 'base': 'ust', 'name': 'Hermez Network'},
            #'hotusd': {'symbol': 'hot', 'base': 'usd', 'name': 'Hydro Protocol'},
            # 'hotust': {'symbol': 'hot', 'base': 'ust', 'name': 'Hydro Protocol'},
            'iceusd': {'symbol': 'ice', 'base': 'usd', 'name': 'Popsicle Finance'},
            'idxusd': {'symbol': 'idx', 'base': 'usd', 'name': ''},
            'idxbtc': {'symbol': 'idx', 'base': 'btc', 'name': ''},
            'iosusd': {'symbol': 'ios', 'base': 'usd', 'name': 'iost'},
            'iqxusd': {'symbol': 'iqx', 'base': 'usd', 'name': 'Everipedia'},
            'kanusd': {'symbol': 'kan', 'base': 'usd', 'name': 'BitKan'},
            'kanust': {'symbol': 'kan', 'base': 'ust', 'name': 'BitKan'},
            'kncusd': {'symbol': 'knc', 'base': 'usd', 'name': 'Kyber network'},
            'kncbtc': {'symbol': 'knc', 'base': 'btc', 'name': 'Kyber network'},
            'ksmust': {'symbol': 'ksm', 'base': 'ust', 'name': 'Kusama'},
            'leobtc': {'symbol': 'leo', 'base': 'btc', 'name': 'unus sed leo'},
            'leoeos': {'symbol': 'leo', 'base': 'eos', 'name': 'unus sed leo'},
            'leoeth': {'symbol': 'leo', 'base': 'eth', 'name': 'unus sed leo'},
            'leousd': {'symbol': 'leo', 'base': 'usd', 'name': 'unus sed leo'},
            'leoust': {'symbol': 'leo', 'base': 'ust', 'name': 'unus sed leo'},
            # 'lrcusd': {'symbol': 'lrc', 'base': 'usd', 'name': 'Loopring'},
            #'lrcust': {'symbol': 'lrc', 'base': 'ust', 'name': 'Loopring'},
            'lymusd': {'symbol': 'lym', 'base': 'usd', 'name': 'Lympo'},
            'mgousd': {'symbol': 'mgo', 'base': 'usd', 'name': 'MobileGo'},
            'lrcbtc': {'symbol': 'lrc', 'base': 'btc', 'name': 'Loopring'},
            'lrcusd': {'symbol': 'lrc', 'base': 'usd', 'name': 'Loopring'},
            'jstusd': {'symbol': 'jst', 'base': 'usd', 'name': ''},
            'jstust': {'symbol': 'jst', 'base': 'ust', 'name': ''},
            'mlnusd': {'symbol': 'mln', 'base': 'usd', 'name': ''},
            'mnabtc': {'symbol': 'mna', 'base': 'btc', 'name': 'Decetraland'},
            'mnausd': {'symbol': 'mna', 'base': 'usd', 'name': 'Decetraland'},
            'mobust': {'symbol': 'mob', 'base': 'ust', 'name': ''},
            'mobusd': {'symbol': 'mob', 'base': 'usd', 'name': ''},
            'ncausd': {'symbol': 'nca', 'base': 'usd', 'name': 'Nucleus Vision'},
            'nearust': {'symbol': 'near', 'base': 'ust', 'name': ''},
            'nearusd': {'symbol': 'near', 'base': 'usd', 'name': ''},
            'neceth': {'symbol': 'nec', 'base': 'eth', 'name': 'Nectar'},
            'necusd': {'symbol': 'nec', 'base': 'usd', 'name': 'Nectar'},
            'neoeth': {'symbol': 'neo', 'base': 'eth', 'name': 'Neo'},
            'neogbp': {'symbol': 'neo', 'base': 'gbp', 'name': 'Neo'},
            'neojpy': {'symbol': 'neo', 'base': 'jpy', 'name': 'Neo'},
            'nutust': {'symbol': 'nut', 'base': 'ust', 'name': 'native utility token'},
            'nutusd': {'symbol': 'nut', 'base': 'usd', 'name': 'native utility token'},
            'odeusd': {'symbol': 'ode', 'base': 'usd', 'name': 'odem'},
            'omgbtc': {'symbol': 'omg', 'base': 'btc', 'name': 'omgnetwork'},
            #'omgust': {'symbol': 'omg', 'base': 'ust', 'name': 'omgnetwork'},
            'omgusd': {'symbol': 'omg', 'base': 'usd', 'name': 'omgnetwork'},
            'omnbtc': {'symbol': 'omn', 'base': 'btc', 'name': 'omni'},
            'omnusd': {'symbol': 'omn', 'base': 'usd', 'name': 'omni'},
            'onlusd': {'symbol': 'onl', 'base': 'usd', 'name': 'on.live'},
            'orsusd': {'symbol': 'ors', 'base': 'usd', 'name': 'ors group'},
            'oxyust': {'symbol': 'oxy', 'base': 'ust', 'name': 'oxygen'},
            'oxyusd': {'symbol': 'oxy', 'base': 'usd', 'name': 'oxygen'},
            'pasusd': {'symbol': 'pas', 'base': 'usd', 'name': 'Paxos Standard'},
            'paxust': {'symbol': 'pax', 'base': 'ust', 'name': 'Paxos Standard'},
            'pluusd': {'symbol': 'plu', 'base': 'usd', 'name': 'Pluton'},
            'pnketh': {'symbol': 'pnk', 'base': 'eth', 'name': 'pink'},
            'pnkusd': {'symbol': 'pnk', 'base': 'usd', 'name': 'pink'},
            'poausd': {'symbol': 'poa', 'base': 'usd', 'name': 'poa'},
            'qshusd': {'symbol': 'qsh', 'base': 'usd', 'name': 'QASH'},
            # notfound 'qtfbtc': {'symbol': 'qtf', 'base': 'btc', 'name': 'Quantfury Token'},
            # notfound 'qtfusd': {'symbol': 'qtf', 'base': 'usd', 'name': 'Quantfury Token'},
            'qtmbtc': {'symbol': 'qtm', 'base': 'btc', 'name': 'Qtum'},
            'rbtbtc': {'symbol': 'rbt', 'base': 'btc', 'name': 'RSK Smart Bitcoin'},
            'rbtusd': {'symbol': 'rbt', 'base': 'usd', 'name': 'RSK Smart Bitcoin'},
            'rcnusd': {'symbol': 'rcn', 'base': 'usd', 'name': 'Ripio Credit Network'},
            'repbtc': {'symbol': 'rep', 'base': 'btc', 'name': 'Augur'},
            'repusd': {'symbol': 'rep', 'base': 'usd', 'name': 'Augur'},
            'requsd': {'symbol': 'req', 'base': 'usd', 'name': 'request'},
            'rifusd': {'symbol': 'rif', 'base': 'usd', 'name': 'RSK Infrastructure'},
            'ringxusd': {'symbol': 'ringx', 'base': 'usd', 'name': 'ringx platform'},
            'rrbusd': {'symbol': 'rrb', 'base': 'usd', 'name': 'renrenbit'},
            'rrbust': {'symbol': 'rrb', 'base': 'ust', 'name': 'renrenbit'},
            'rrtusd': {'symbol': 'rrt', 'base': 'usd', 'name': ''},
            'sanbtc': {'symbol': 'san', 'base': 'btc', 'name': 'santiment'},
            'sanusd': {'symbol': 'san', 'base': 'usd', 'name': 'santiment'},
            'saneth': {'symbol': 'san', 'base': 'eth', 'name': 'santiment'},
            'sngusd': {'symbol': 'sng', 'base': 'usd', 'name': 'singular'},
            'sntusd': {'symbol': 'snt', 'base': 'usd', 'name': 'status'},
            'snxust': {'symbol': 'snx', 'base': 'ust', 'name': 'synthetix'},
            'snxusd': {'symbol': 'snx', 'base': 'usd', 'name': 'synthetix'},
            'stjusd': {'symbol': 'stj', 'base': 'usd', 'name': 'storj'},
            'sunusd': {'symbol': 'sun', 'base': 'usd', 'name': 'sun'},
            'sunust': {'symbol': 'sun', 'base': 'ust', 'name': 'sun'},
            'sushiust': {'symbol': 'sushi', 'base': 'ust', 'name': 'SushiSwap'},
            'sushiusd': {'symbol': 'sushi', 'base': 'usd', 'name': 'SushiSwap'},
            'swmusd': {'symbol': 'swm', 'base': 'usd', 'name': ''},
            'tknusd': {'symbol': 'tkn', 'base': 'usd', 'name': 'Monolith'},
            'triusd': {'symbol': 'tri', 'base': 'usd', 'name': 'tripio'},
            'tsdust': {'symbol': 'tsd', 'base': 'ust', 'name': 'TrueUSD'},
            'tsdusd': {'symbol': 'tsd', 'base': 'usd', 'name': 'TrueUSD'},
            'uopusd': {'symbol': 'uop', 'base': 'usd', 'name': 'Utopia ..'},
            'uosbtc': {'symbol': 'uos', 'base': 'btc', 'name': 'ultra'},
            'uosusd': {'symbol': 'uos', 'base': 'usd', 'name': 'ultra'},
            'uskusd': {'symbol': 'usk', 'base': 'usd', 'name': 'usdk'},
            'utkusd': {'symbol': 'utk', 'base': 'usd', 'name': 'utrust'},
            'veeusd': {'symbol': 'vee', 'base': 'usd', 'name': 'blockv'},
            'vsybtc': {'symbol': 'vsy', 'base': 'btc', 'name': 'v.systems'},
            'vsyusd': {'symbol': 'vsy', 'base': 'usd', 'name': 'v.systems'},
            'waxusd': {'symbol': 'wax', 'base': 'usd', 'name': 'wax'},
            'wprusd': {'symbol': 'wpr', 'base': 'usd', 'name': 'wepower'},
            'wtcusd': {'symbol': 'wtc', 'base': 'usd', 'name': 'waltonchain'},
            'xautbtc': {'symbol': 'xaut', 'base': 'btc', 'name': 'Tether Gold'},
            'xautusd': {'symbol': 'xaut', 'base': 'usd', 'name': 'Tether Gold'},
            'xautust': {'symbol': 'xaut', 'base': 'ust', 'name': 'Tether Gold'},
            'xcheth': {'symbol': 'xch', 'base': 'eth', 'name': 'CryptoFranc'},
            'xchusd': {'symbol': 'xch', 'base': 'usd', 'name': 'CryptoFranc'},
            'xrausd': {'symbol': 'xra', 'base': 'usd', 'name': 'xriba'},
            'xsnusd': {'symbol': 'xsn', 'base': 'usd', 'name': 'Stakenet'},
            'xvgusd': {'symbol': 'xvg', 'base': 'usd', 'name': 'Verge'},
            'yfiusd': {'symbol': 'yfi', 'base': 'usd', 'name': 'yearn finance'},
            'yfiust': {'symbol': 'yfi', 'base': 'ust', 'name': 'yearn finance'},
            'yywusd': {'symbol': 'yyw', 'base': 'usd', 'name': 'YOYOW'},
            }


for symbol_key, symbol_dict in cryptos.items():
    if symbol_dict['symbol'] not in symbol_key:
        print("symbol {} not in {}".format(symbol_dict['symbol'], symbol_key))
    if symbol_dict['base'] not in symbol_key:
        print("base {} not in {}".format(symbol_dict['base'], symbol_key))

symbol usdt not in ustusd
symbol usdc not in udcusd
symbol wbtc not in wbtusd
symbol wbtc not in wbteth
symbol miota not in iotusd
symbol miota not in iotbtc
symbol miota not in iotgbp
symbol atom not in atousd
symbol atom not in atobtc
symbol atom not in atoeth
symbol algo not in algusd
symbol algo not in algbtc
symbol algo not in algust
symbol dash not in dshusd
symbol dash not in dshbtc
symbol qtum not in qtmusd


Load minute data 

In [32]:
data_path

'../data/crypto/minute/'

In [33]:
# read folder
file_list = glob(data_path + "*.csv")
pairs = []
for file in file_list:
    f = file.replace(data_path, '')[:-4]
    pairs.append(f)#[:-4])

In [34]:
used_tickers = []
unused_tickers = []
for pair in pairs:
    if pair in cryptos:
        used_tickers.append(pair)
    else:
        unused_tickers.append(pair)

print("n used tickers: {} \n n unused tickers {}".format(len(used_tickers), len(unused_tickers)))
print("unused tickers: ")
unused_tickers.sort()
unused_tickers




n used tickers: 294 
 n unused tickers 70
unused tickers: 


['aaabbb',
 'adaf0ustf0',
 'ampf0ustf0',
 'btccnht',
 'btcdomf0ustf0',
 'btcf0ustf0',
 'btcust',
 'chexusd',
 'cnhcnht',
 'dogef0ustf0',
 'dotf0btcf0',
 'dotf0ustf0',
 'eosf0ustf0',
 'eth2xeth',
 'eth2xusd',
 'eth2xust',
 'ethf0btcf0',
 'ethf0ustf0',
 'eurf0ustf0',
 'europe50ixf0ustf0',
 'eusbtc',
 'forthusd',
 'forthust',
 'ftmusd',
 'ftmust',
 'gbpf0ustf0',
 'germany30ixf0ustf0',
 'gntusd',
 'hotusd',
 'idxust',
 'ioteth',
 'ioteur',
 'iotf0ustf0',
 'iotjpy',
 'iqxust',
 'jpyf0ustf0',
 'jstbtc',
 'linkf0ustf0',
 'ltcf0btcf0',
 'ltcf0ustf0',
 'oceanusd',
 'oceanust',
 'omgeth',
 'planetsusd',
 'planetsust',
 'qtfbtc',
 'qtfusd',
 'sukuusd',
 'sukuust',
 'testbtcf0testusdtf0',
 'testbtctestusd',
 'testbtctestusdt',
 'udcust',
 'unif0ustf0',
 'uopust',
 'ustcnht',
 'xagf0ustf0',
 'xautf0btcf0',
 'xautf0ustf0',
 'xdcusd',
 'xdcust',
 'xlmf0ustf0',
 'yggusd',
 'zbtusd',
 'zcnusd',
 'zilbtc',
 'zilusd',
 'zrxbtc',
 'zrxeth',
 'zrxusd']

In [35]:
freq = '15Min'

def data_from_folder(csv_path, cryptos, frequency):

    prices = []
    for symbol_key, symbol_dict in tqdm(cryptos.items()):
        if symbol_dict['base'] == 'usd':
            file_path = csv_path + symbol_key + ".csv"
            file_path = Path(file_path)
            df = pd.read_csv(file_path) # read the csv
            df['date'] = df['time'].values.astype(dtype='datetime64[ms]') # for msec format
            df.drop(df.filter(regex="time"),axis=1, inplace=True) # otherwise time row is left behind as "time"
            df.set_index(['date'], inplace=True)

            # resample to desired frequency
            df = resample_df_ohlcv(df, frequency)
            # df.index = df.index.to_period(frequency)
            df['ticker'] = symbol_dict['symbol'] + '-' + symbol_dict['base']
            df['symbol'] = symbol_dict['symbol'] # get a symbol column
            df['base'] = symbol_dict['base'] # get a base column
            df.set_index(['ticker', 'symbol', 'base'], inplace=True, append=True)
            df.sort_index(inplace=True)
            df.drop(df.filter(regex="Unname"),axis=1, inplace=True) # otherwise index row is left behind as "unnamed"
            prices.append(df)
    prices = pd.concat(prices)
    #%%
    cols = ['ticker', 'symbol', 'base']
    prices.reset_index(cols, inplace=True)
    prices[cols] = prices[cols].astype('category')
    prices.set_index(cols, inplace=True, append=True)

    prices.sort_index(inplace=True)
    prices = reduce_footprint(prices, float_type='float32', except_cols=['volume'])

    print("found {} symbol(s):\n {}".format(len(prices.index.get_level_values('symbol').unique()),
                                          prices.index.get_level_values('symbol').unique()))
    print("found {} base(s):\n {}".format(len(prices.index.get_level_values('base').unique()),
                                       prices.index.get_level_values('base').unique()))

    return prices


prices = data_from_folder(data_path, cryptos, freq)
print("\n\n\n")
print('RESULTING DF: ')
print(prices.info())
print(prices.index.get_level_values('symbol').unique())

100%|██████████| 294/294 [00:13<00:00, 22.08it/s]


found 155 symbol(s):
 CategoricalIndex(['btc', 'ltc', 'eth', 'etc', 'rrt', 'zec', 'xmr', 'dash',
                  'xrp', 'miota',
                  ...
                  'link', 'luna', 'near', 'sushi', 'xaut', 'b21x', 'best',
                  'ringx', 'albt', 'eosdt'],
                 categories=['btc', 'ltc', 'eth', 'etc', 'rrt', 'zec', 'xmr', 'dash', ...], ordered=False, name='symbol', dtype='category', length=155)
found 1 base(s):
 CategoricalIndex(['usd'], categories=['usd'], ordered=False, name='base', dtype='category')




RESULTING DF: 
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 11805163 entries, (Timestamp('2013-04-01 00:00:00'), 'btc-usd', 'btc', 'usd') to (Timestamp('2021-06-23 14:45:00'), 'oxy-usd', 'oxy', 'usd')
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float32
 1   high    float32
 2   low     float32
 3   close   float32
 4   volume  float64
dtypes: float32(4), float64(1)
memory usage: 381.8 MB
None
CategoricalIndex(

In [36]:
print(prices.index.get_level_values('ticker'))

CategoricalIndex(['btc-usd', 'btc-usd', 'btc-usd', 'btc-usd', 'btc-usd',
                  'btc-usd', 'btc-usd', 'btc-usd', 'btc-usd', 'btc-usd',
                  ...
                  'sushi-usd', 'uni-usd', 'uop-usd', 'wbtc-usd', 'xaut-usd',
                  'xsn-usd', 'yfi-usd', 'doge-usd', 'luna-usd', 'oxy-usd'],
                 categories=['1inch-usd', 'aave-usd', 'ada-usd', 'albt-usd', 'algo-usd', 'amp-usd', 'ant-usd', 'ast-usd', ...], ordered=False, name='ticker', dtype='category', length=11805163)


In [37]:
multiindex_cols = ['date', 'ticker', 'symbol', 'base']
"""
DATA_STORE = '../data/crypto.h5'
with pd.HDFStore(DATA_STORE) as store:
    store.put('crypto/caggle/prices', prices.reset_index(), format='table')
    prices_from_store = store['crypto/caggle/prices'].set_index(multiindex_cols)

print(prices_from_store.info(memory_usage=True))
print(prices_from_store.memory_usage(deep=True))
"""

"\nDATA_STORE = '../data/crypto.h5'\nwith pd.HDFStore(DATA_STORE) as store:\n    store.put('crypto/caggle/prices', prices.reset_index(), format='table')\n    prices_from_store = store['crypto/caggle/prices'].set_index(multiindex_cols)\n\nprint(prices_from_store.info(memory_usage=True))\nprint(prices_from_store.memory_usage(deep=True))\n"

In [38]:
prices_from_store = None


In [39]:
feather_path = f'../data/crypto/prices_{freq}.feather'
prices.reset_index().to_feather(feather_path)
print("saved")
prices_from_feather = pd.read_feather(feather_path).set_index(multiindex_cols)
print(prices_from_feather.info(memory_usage=True))
print(prices_from_feather.memory_usage(deep=True))

saved
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 11805163 entries, (Timestamp('2013-04-01 00:00:00'), 'btc-usd', 'btc', 'usd') to (Timestamp('2021-06-23 14:45:00'), 'oxy-usd', 'oxy', 'usd')
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float32
 1   high    float32
 2   low     float32
 3   close   float32
 4   volume  float64
dtypes: float32(4), float64(1)
memory usage: 381.8 MB
None
Index     117037602
open       47220652
high       47220652
low        47220652
close      47220652
volume     94441304
dtype: int64


In [43]:
date_index = prices_from_feather.index.get_level_values('date')
date_index.unique()

DatetimeIndex(['2013-04-01 00:00:00', '2013-04-01 00:15:00',
               '2013-04-01 00:30:00', '2013-04-01 00:45:00',
               '2013-04-01 01:00:00', '2013-04-01 01:15:00',
               '2013-04-01 01:30:00', '2013-04-01 01:45:00',
               '2013-04-01 02:00:00', '2013-04-01 02:15:00',
               ...
               '2021-06-23 12:30:00', '2021-06-23 12:45:00',
               '2021-06-23 13:00:00', '2021-06-23 13:15:00',
               '2021-06-23 13:30:00', '2021-06-23 13:45:00',
               '2021-06-23 14:00:00', '2021-06-23 14:15:00',
               '2021-06-23 14:30:00', '2021-06-23 14:45:00'],
              dtype='datetime64[ns]', name='date', length=288540, freq=None)

In [41]:
print(prices_from_feather)

                                                  open          high  \
date                ticker   symbol base                               
2013-04-01 00:00:00 btc-usd  btc    usd      93.250000    100.000000   
2013-04-01 00:15:00 btc-usd  btc    usd      93.043991     93.349998   
2013-04-01 00:30:00 btc-usd  btc    usd      93.199989     93.349998   
2013-04-01 00:45:00 btc-usd  btc    usd      93.250000     93.300003   
2013-04-01 01:00:00 btc-usd  btc    usd      93.379990     93.487968   
...                                                ...           ...   
2021-06-23 14:30:00 xsn-usd  xsn    usd       0.171240      0.171240   
                    yfi-usd  yfi    usd   31226.000000  31257.000000   
2021-06-23 14:45:00 doge-usd doge   usd       0.242620      0.243730   
                    luna-usd luna   usd       5.414300      5.414300   
                    oxy-usd  oxy    usd       1.449000      1.449000   

                                                   low         

In [42]:
prices.memory_usage(deep=True)

Index     117037602
open       47220652
high       47220652
low        47220652
close      47220652
volume     94441304
dtype: int64

https://zaxrosenberg.com/pandas-multiindex-tutorial/