# Digest minute crypto data (prices) and coinmarketcap (market)

- https://www.bitfinex.com/
- https://medium.com/coinmonks/how-to-get-historical-crypto-currency-data-954062d40d2d
- https://www.kaggle.com/tencars/392-crypto-currency-pairs-at-minute-resolution


In [1]:
%matplotlib inline

from pathlib import Path
import pandas as pd

idx = pd.IndexSlice

In [2]:

# https://stackoverflow.com/questions/16466670/fill-nan-in-candlestick-ohlcv-data
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
def nans_to_prev_close_method1(df):
    df['volume'] = df['volume'].fillna(0.0)  # volume should always be 0 (if there were no trades in this interval)
    df['close'] = df.fillna(method='pad')  # ie pull the last close into this close
    # now copy the close that was pulled down from the last timestep into this row, across into o/h/l
    df['open'] = df['open'].fillna(df['close']) 
    df['low'] = df['low'].fillna(df['close'])
    df['high'] = df['high'].fillna(df['close'])
    # print(df.head(10))
    return df

# 
def resample_df_ohlcv(df, timeframe='15Min'):
    # Created a dictionary to tell Pandas how to re-sample, if this isn't in place it will re-sample each column separately
    ohlcv_dict = {'open':'first', 'high':'max', 'low':'min', 'close': 'last', 'volume': 'sum'}

    # Resample to 15Min (this format is needed) as per ohlc_dict, then remove any line with a NaN
    df = df.resample(timeframe).agg(ohlcv_dict)
    df = nans_to_prev_close_method1(df)
    # Resample mixes the columns so lets re-arrange them
    cols=['open', 'high', 'low', 'close', 'volume']  
    df = df[cols]
    return df




In [3]:
data_path ='../data/crypto_1min/'

cryptos = {
            'btcusd':  {'symbol': 'btc', 'base': 'usd', },
            'ethusd':  {'symbol': 'eth', 'base': 'usd', },
            # Binance Coin
            'xrpusd':  {'symbol': 'xrp', 'base': 'usd', },
            'ustusd':  {'symbol': 'usdt', 'base': 'usd', },
            'adausd':  {'symbol': 'ada', 'base': 'usd', }, # Cardano
            'dogusd':  {'symbol': 'doge', 'base': 'usd',},
            'dotusd':  {'symbol': 'dot', 'base': 'usd', }, # Polkadot
            'uniusd':  {'symbol': 'uni', 'base': 'usd', },
            'ltcusd':  {'symbol': 'ltc', 'base': 'usd', },
            'bchnusd':  {'symbol': 'bch', 'base': 'usd', }, # Bitcoin Cash ? bch = bchn? 
            'linkusd':  {'symbol': 'link', 'base': 'usd', }, # Chainlink
            'udcusd':  {'symbol': 'usdc', 'base': 'usd', },   # USDC
            'vetusd':  {'symbol': 'vet', 'base': 'usd', }, # VeChain
            'solusd':  {'symbol': 'sol', 'base': 'usd',}, # Solana
            'xlmusd':  {'symbol': 'xlm', 'base': 'usd',}, # Stellar
            # 'theta', 
            'filusd':  {'symbol': 'fil', 'base': 'usd',}, # filecoin
            'okbusd': {'symbol': 'okb', 'base': 'usd'},
            'wbtusd': {'symbol': 'wbtc', 'base': 'usd'},

            'trxusd':  {'symbol': 'trx', 'base': 'usd',}, # Tron
            # Binance USD busd
            
            'bsvusd': {'symbol': 'bsv', 'base': 'usd'},

            'xmrusd':  {'symbol': 'xmr', 'base': 'usd',}, # Monero
            'lunausd':  {'symbol': 'luna', 'base': 'usd',}, # Luna
            'neousd':  {'symbol': 'neo', 'base': 'usd',}, # 
            # Klay
            'iotusd':  {'symbol': 'miota', 'base': 'usd',}, # iota 
            'eosusd':  {'symbol': 'eos', 'base': 'usd',}, # iota 
            'atousd':  {'symbol': 'atom', 'base': 'usd',}, # cosmos 
            # 'cakusd':  {'symbol': 'cake', 'base': 'usd',}, # pancake 
            'aaveusd':  {'symbol': 'aave', 'base': 'usd',}, # aave
            'eosusd': {'symbol': 'eos', 'base': 'usd'},
            'etcusd': {'symbol': 'etc', 'base': 'usd'},
            'atousd': {'symbol': 'atom', 'base': 'usd'},
            # 'maticusd': {'symbol': '...', 'base': 'usd'},
            # ht
            'fttusd': {'symbol': 'ftt', 'base': 'usd'},
            # cro
            'bttusd': {'symbol': 'btt', 'base': 'usd'},
            # cusd
            'mkrusd': {'symbol': 'mkr', 'base': 'usd'},
            # 'mkreth': {'symbol': 'mkr', 'base': 'eth'},
            # 'mkrbtc': {'symbol': 'mkr', 'base': 'btc'},
            'xtzusd': {'symbol': 'xtz', 'base': 'usd'},
            # 'xtzbtc': {'symbol': 'xtz', 'base': 'btc'},
            # ceth
            'algusd': {'symbol': 'algo', 'base': 'usd'},
            'avaxusd': {'symbol': 'avax', 'base': 'usd'},
            'ksmusd': {'symbol': 'ksm', 'base': 'usd'},
            'daiusd': {'symbol': 'dai', 'base': 'usd'},
            # cdai
            # dash
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            'mtnusd': {'symbol': 'mtn', 'base': 'usd'},
            'manusd': {'symbol': 'man', 'base': 'usd'},

            }


<bound method IndexOpsMixin.tolist of Index(['btc', 'eth', 'bnb', 'xrp', 'usdt', 'doge', 'ada', 'dot', 'uni', 'bch',
       'ltc', 'link', 'usdc', 'vet', 'sol', 'xlm', 'theta', 'fil', 'okb','wbtc', 'trx', 'busd',
       'xmr', 'neo', 'luna', 'cake', 'bsv', 'aave', 'eos', 'klay', 'miota', 'etc', 'atom', 
       
       'matic', 'ht', 'ftt', 'cro',
       'btt', 'cusdc', 'mkr', 'comp', 'xtz', 'ceth', 'algo', 'AVAX', 'ksm',
       'dai',
       'cdai', 'rune', 'dash', 'xem', 'egld', 'chz', 'hot', 'zec',
       'hbar', 'dcr', 'snx', 'enj', 'zil', 'waves', 'cel', 'leo', 'dgb',
       'sushi', 'stx', 'amp', 'nexo', 'sc', 'ftm', 'grt', 'ust', 'near',
       'mana', 'bat', 'yfi', 'rvn', 'btg', 'icx', 'qtum', 'hbtc', 'uma', 'hnt',
       'zrx', 'lusd', 'nano', 'ont', 'iost', 'one', 'zen', 'bnt', 'chsb',
       'arrr', 'ankr', 'ar', 'xvs', 'pax', 'flow', 'bake', 'kcs'],
      dtype='object', name='symbol')>



Load minute data 

In [4]:
def three_spaces():
    print("")
    print("")
    print("")

def data_from_folder(csv_path, cryptos):
    
    prices = []
    for symbol_key, symbol_dict in cryptos.items():
        file_path = data_path + symbol_key + ".csv"
        file_path = Path(file_path)
        df = pd.read_csv(file_path) # read the csv 
        df['date'] = df['time'].values.astype(dtype='datetime64[ms]') # for msec format   
        df.drop(df.filter(regex="time"),axis=1, inplace=True) # otherwise time row is left behind as "time"
        df.set_index(['date'], inplace=True)

        # resample to desired frequency
        df = resample_df_ohlcv(df, '15Min')
        df['ticker'] = symbol_dict['symbol'] + '-' + symbol_dict['base']
        df['symbol'] = symbol_dict['symbol'] # get a symbol column
        df['base'] = symbol_dict['base'] # get a base column
        df.set_index(['ticker', 'symbol', 'base'], inplace=True, append=True)

        df.sort_index(inplace=True)
        df.drop(df.filter(regex="Unname"),axis=1, inplace=True) # otherwise index row is left behind as "unnamed"
        prices.append(df)
    prices = pd.concat(prices)
    
    prices.sort_index(inplace=True)
    print("found {} symbol(s):\n {}".format(len(prices.index.get_level_values('symbol').unique()),
                                          prices.index.get_level_values('symbol').unique()))
    print("found {} base(s):\n {}".format(len(prices.index.get_level_values('base').unique()),
                                       prices.index.get_level_values('base').unique()))
    return prices


prices = data_from_folder(data_path, cryptos)
three_spaces()
print('RESULTING DF: ')
print(prices.info())

print(prices.index.get_level_values('symbol').unique())

found 38 symbol(s):
 Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bsv', 'usdt', 'usdc',
       'btt', 'atom', 'wbtc', 'okb', 'algo', 'ftt', 'doge', 'ada', 'dot',
       'ksm', 'uni', 'fil', 'sol', 'aave', 'avax', 'bch', 'link', 'luna'],
      dtype='object', name='symbol')
found 1 base(s):
 Index(['usd'], dtype='object', name='base')



RESULTING DF: 
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 3261244 entries, (Timestamp('2013-04-01 00:00:00'), 'btc-usd', 'btc', 'usd') to (Timestamp('2021-05-18 07:00:00'), 'sol-usd', 'sol', 'usd')
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float64
 1   high    float64
 2   low     float64
 3   close   float64
 4   volume  float64
dtypes: float64(5)
memory usage: 156.4+ MB
None
Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bs

In [5]:
prices.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,open,high,low,close,volume
date,ticker,symbol,base,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-04-01 00:00:00,btc-usd,btc,usd,93.25,100.0,93.03,93.25,247.283889
2013-04-01 00:15:00,btc-usd,btc,usd,93.04399,93.35,93.04399,93.04399,17.689501
2013-04-01 00:30:00,btc-usd,btc,usd,93.19999,93.35,93.19999,93.19999,40.0
2013-04-01 00:45:00,btc-usd,btc,usd,93.25,93.3,93.1,93.25,40.610499
2013-04-01 01:00:00,btc-usd,btc,usd,93.37999,93.48797,93.2,93.37999,23.510608
2013-04-01 01:15:00,btc-usd,btc,usd,93.4847,93.4847,93.40001,93.4847,19.46
2013-04-01 01:30:00,btc-usd,btc,usd,93.10001,93.10001,93.1,93.10001,2.272727
2013-04-01 01:45:00,btc-usd,btc,usd,93.10001,93.10001,93.10001,93.10001,0.0
2013-04-01 02:00:00,btc-usd,btc,usd,93.17,93.29,93.16999,93.17,10.6276
2013-04-01 02:15:00,btc-usd,btc,usd,93.10999,93.10999,93.10999,93.10999,0.43382


In [6]:
prices.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,open,high,low,close,volume
date,ticker,symbol,base,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-05-18 06:45:00,dot-usd,dot,usd,40.913,40.957,40.913,40.913,169.37749
2021-05-18 06:45:00,fil-usd,fil,usd,104.68,104.68,103.57,104.68,18.480281
2021-05-18 06:45:00,ksm-usd,ksm,usd,606.21,606.21,606.21,606.21,0.04
2021-05-18 06:45:00,link-usd,link,usd,39.995,40.155,39.995,39.995,327.238809
2021-05-18 06:45:00,luna-usd,luna,usd,15.434,15.48,15.337,15.434,1607.956385
2021-05-18 06:45:00,sol-usd,sol,usd,50.506,50.661,50.163,50.506,1442.344018
2021-05-18 06:45:00,uni-usd,uni,usd,36.099,36.099,36.0,36.099,1.333727
2021-05-18 07:00:00,aave-usd,aave,usd,647.82,647.82,647.82,647.82,0.013275
2021-05-18 07:00:00,luna-usd,luna,usd,15.376,15.448,15.376,15.376,1829.802952
2021-05-18 07:00:00,sol-usd,sol,usd,50.304,50.304,50.301,50.304,36.807817


In [7]:
DATA_STORE = '../data/crypto.h5'
with pd.HDFStore(DATA_STORE) as store:
    store.put('crypto/caggle/prices', prices)
print("done")


done


https://zaxrosenberg.com/pandas-multiindex-tutorial/


In [8]:
prices.index.get_level_values('symbol').unique()

Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bsv', 'usdt', 'usdc',
       'btt', 'atom', 'wbtc', 'okb', 'algo', 'ftt', 'doge', 'ada', 'dot',
       'ksm', 'uni', 'fil', 'sol', 'aave', 'avax', 'bch', 'link', 'luna'],
      dtype='object', name='symbol')

In [9]:
# inplace
idx = pd.IndexSlice
prices.loc[idx['2021']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,open,high,low,close,volume
date,ticker,symbol,base,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-01 00:00:00,ada-usd,ada,usd,0.18110,0.18110,0.17850,0.18110,30101.807514
2021-01-01 00:00:00,algo-usd,algo,usd,0.33618,0.33803,0.33304,0.33618,1783.782902
2021-01-01 00:00:00,atom-usd,atom,usd,6.44000,6.44000,6.29790,6.44000,2028.048830
2021-01-01 00:00:00,bsv-usd,bsv,usd,163.28000,163.28000,162.21000,163.28000,1.099935
2021-01-01 00:00:00,btc-usd,btc,usd,28939.00000,29033.00000,28716.00000,28939.00000,150.194677
...,...,...,...,...,...,...,...,...
2021-05-18 06:45:00,sol-usd,sol,usd,50.50600,50.66100,50.16300,50.50600,1442.344018
2021-05-18 06:45:00,uni-usd,uni,usd,36.09900,36.09900,36.00000,36.09900,1.333727
2021-05-18 07:00:00,aave-usd,aave,usd,647.82000,647.82000,647.82000,647.82000,0.013275
2021-05-18 07:00:00,luna-usd,luna,usd,15.37600,15.44800,15.37600,15.37600,1829.802952


In [10]:
# copies.. 
prices.xs('btc', level='symbol').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,high,low,close,volume
date,ticker,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-04-01 00:00:00,btc-usd,usd,93.25,100.0,93.03,93.25,247.283889
2013-04-01 00:15:00,btc-usd,usd,93.04399,93.35,93.04399,93.04399,17.689501
2013-04-01 00:30:00,btc-usd,usd,93.19999,93.35,93.19999,93.19999,40.0
2013-04-01 00:45:00,btc-usd,usd,93.25,93.3,93.1,93.25,40.610499
2013-04-01 01:00:00,btc-usd,usd,93.37999,93.48797,93.2,93.37999,23.510608
2013-04-01 01:15:00,btc-usd,usd,93.4847,93.4847,93.40001,93.4847,19.46
2013-04-01 01:30:00,btc-usd,usd,93.10001,93.10001,93.1,93.10001,2.272727
2013-04-01 01:45:00,btc-usd,usd,93.10001,93.10001,93.10001,93.10001,0.0
2013-04-01 02:00:00,btc-usd,usd,93.17,93.29,93.16999,93.17,10.6276
2013-04-01 02:15:00,btc-usd,usd,93.10999,93.10999,93.10999,93.10999,0.43382


In [11]:
# buggy date
prices.xs(('2018-07-01', 'btc', 'usd'), level=['date', 'symbol', 'base'])


Unnamed: 0_level_0,open,high,low,close,volume
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
btc-usd,415.63,415.68,413.65,415.63,430.090239
btc-usd,414.11,414.56,414.11,414.11,53.036204
btc-usd,414.51,414.51,412.56,414.51,186.233079
btc-usd,413.0,414.0,412.92,413.0,57.181922
btc-usd,413.85,414.51,413.85,413.85,139.705708
btc-usd,414.51,414.51,413.68,414.51,75.307261
btc-usd,414.39,415.63,414.37,414.39,139.019006
btc-usd,414.52,416.79,414.51,414.52,475.344517
btc-usd,416.58,417.25,416.03,416.58,482.812425
btc-usd,417.17,417.79,417.0,417.17,174.598849


In [12]:
prices.xs(('btc', 'usd'), level=['symbol', 'base'])

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-04-01 00:00:00,btc-usd,93.250000,100.000000,93.030000,93.250000,247.283889
2013-04-01 00:15:00,btc-usd,93.043990,93.350000,93.043990,93.043990,17.689501
2013-04-01 00:30:00,btc-usd,93.199990,93.350000,93.199990,93.199990,40.000000
2013-04-01 00:45:00,btc-usd,93.250000,93.300000,93.100000,93.250000,40.610499
2013-04-01 01:00:00,btc-usd,93.379990,93.487970,93.200000,93.379990,23.510608
...,...,...,...,...,...,...
2021-05-18 05:00:00,btc-usd,45334.000000,45522.625989,45250.000000,45334.000000,74.981653
2021-05-18 05:15:00,btc-usd,45400.633418,45460.000000,45223.000000,45400.633418,45.373709
2021-05-18 05:30:00,btc-usd,45286.449570,45451.840095,45100.000000,45286.449570,70.526798
2021-05-18 05:45:00,btc-usd,45448.000000,45452.000000,45147.000000,45448.000000,64.627406


In [13]:

# seems only the first result
result = prices.query("'symbol =='btc'")
print(result)

SyntaxError: invalid syntax (<unknown>, line 1)

In [None]:
'btc' in prices.index.get_level_values('symbol') and 'usd' in prices.index.get_level_values('base')

In [None]:
'btc' in prices.index.get_level_values('symbol') and 'xxxn' in prices.index.get_level_values('base')