# Digest minute crypto data (prices) and coinmarketcap (market)

- https://www.bitfinex.com/
- https://medium.com/coinmonks/how-to-get-historical-crypto-currency-data-954062d40d2d
- https://www.kaggle.com/tencars/392-crypto-currency-pairs-at-minute-resolution


In [1]:
%matplotlib inline

from pathlib import Path
import pandas as pd

idx = pd.IndexSlice

In [2]:

# https://stackoverflow.com/questions/16466670/fill-nan-in-candlestick-ohlcv-data
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
def nans_to_prev_close_method1(df):
    df['volume'] = df['volume'].fillna(0.0)  # volume should always be 0 (if there were no trades in this interval)
    df['close'] = df.fillna(method='pad')  # ie pull the last close into this close
    # now copy the close that was pulled down from the last timestep into this row, across into o/h/l
    df['open'] = df['open'].fillna(df['close']) 
    df['low'] = df['low'].fillna(df['close'])
    df['high'] = df['high'].fillna(df['close'])
    # print(df.head(10))
    return df

# 
def resample_df_ohlcv(df, timeframe='15Min'):
    # Created a dictionary to tell Pandas how to re-sample, if this isn't in place it will re-sample each column separately
    ohlcv_dict = {'open':'first', 'high':'max', 'low':'min', 'close': 'last', 'volume': 'sum'}

    # Resample to 15Min (this format is needed) as per ohlc_dict, then remove any line with a NaN
    df = df.resample(timeframe).agg(ohlcv_dict)
    df = nans_to_prev_close_method1(df)
    # Resample mixes the columns so lets re-arrange them
    cols=['open', 'high', 'low', 'close', 'volume']  
    df = df[cols]
    return df




In [3]:
data_path ='../data/crypto_1min/'

cryptos = {
            'btcusd':  {'symbol': 'btc', 'base': 'usd', },
            'ethusd':  {'symbol': 'eth', 'base': 'usd', },
            # Binance Coin
            'xrpusd':  {'symbol': 'xrp', 'base': 'usd', },
            'ustusd':  {'symbol': 'usdt', 'base': 'usd', },
            'adausd':  {'symbol': 'ada', 'base': 'usd', }, # Cardano
            'dogusd':  {'symbol': 'doge', 'base': 'usd',},
            'dotusd':  {'symbol': 'dot', 'base': 'usd', }, # Polkadot
            'uniusd':  {'symbol': 'uni', 'base': 'usd', },
            'ltcusd':  {'symbol': 'ltc', 'base': 'usd', },
            'bchnusd':  {'symbol': 'bch', 'base': 'usd', }, # Bitcoin Cash ? bch = bchn? 
            'linkusd':  {'symbol': 'link', 'base': 'usd', }, # Chainlink
            'udcusd':  {'symbol': 'usdc', 'base': 'usd', },   # USDC
            'vetusd':  {'symbol': 'vet', 'base': 'usd', }, # VeChain
            'solusd':  {'symbol': 'sol', 'base': 'usd',}, # Solana
            'xlmusd':  {'symbol': 'xlm', 'base': 'usd',}, # Stellar
            # 'theta', 
            'filusd':  {'symbol': 'fil', 'base': 'usd',}, # filecoin
            'okbusd': {'symbol': 'okb', 'base': 'usd'},
            'wbtusd': {'symbol': 'wbtc', 'base': 'usd'},

            'trxusd':  {'symbol': 'trx', 'base': 'usd',}, # Tron
            # Binance USD busd
            
            'bsvusd': {'symbol': 'bsv', 'base': 'usd'},

            'xmrusd':  {'symbol': 'xmr', 'base': 'usd',}, # Monero
            'lunausd':  {'symbol': 'luna', 'base': 'usd',}, # Luna
            'neousd':  {'symbol': 'neo', 'base': 'usd',}, # 
            # Klay
            'iotusd':  {'symbol': 'miota', 'base': 'usd',}, # iota 
            'eosusd':  {'symbol': 'eos', 'base': 'usd',}, # iota 
            'atousd':  {'symbol': 'atom', 'base': 'usd',}, # cosmos 
            # 'cakusd':  {'symbol': 'cake', 'base': 'usd',}, # pancake 
            'aaveusd':  {'symbol': 'aave', 'base': 'usd',}, # aave
            'eosusd': {'symbol': 'eos', 'base': 'usd'},
            'etcusd': {'symbol': 'etc', 'base': 'usd'},
            'atousd': {'symbol': 'atom', 'base': 'usd'},
            # 'maticusd': {'symbol': '...', 'base': 'usd'},
            # ht
            'fttusd': {'symbol': 'ftt', 'base': 'usd'},
            # cro
            'bttusd': {'symbol': 'btt', 'base': 'usd'},
            # cusd
            'mkrusd': {'symbol': 'mkr', 'base': 'usd'},
            # 'mkreth': {'symbol': 'mkr', 'base': 'eth'},
            # 'mkrbtc': {'symbol': 'mkr', 'base': 'btc'},
            'xtzusd': {'symbol': 'xtz', 'base': 'usd'},
            # 'xtzbtc': {'symbol': 'xtz', 'base': 'btc'},
            # ceth
            'algusd': {'symbol': 'algo', 'base': 'usd'},
            'avaxusd': {'symbol': 'avax', 'base': 'usd'},
            'ksmusd': {'symbol': 'ksm', 'base': 'usd'},
            'daiusd': {'symbol': 'dai', 'base': 'usd'},
            # cdai
            # dash
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            # '...usd': {'symbol': '...', 'base': 'usd'},
            'mtnusd': {'symbol': 'mtn', 'base': 'usd'},
            'manusd': {'symbol': 'man', 'base': 'usd'},

            }


<bound method IndexOpsMixin.tolist of Index(['btc', 'eth', 'bnb', 'xrp', 'usdt', 'doge', 'ada', 'dot', 'uni', 'bch',
       'ltc', 'link', 'usdc', 'vet', 'sol', 'xlm', 'theta', 'fil', 'okb','wbtc', 'trx', 'busd',
       'xmr', 'neo', 'luna', 'cake', 'bsv', 'aave', 'eos', 'klay', 'miota', 'etc', 'atom', 
       
       'matic', 'ht', 'ftt', 'cro',
       'btt', 'cusdc', 'mkr', 'comp', 'xtz', 'ceth', 'algo', 'AVAX', 'ksm',
       'dai',
       'cdai', 'rune', 'dash', 'xem', 'egld', 'chz', 'hot', 'zec',
       'hbar', 'dcr', 'snx', 'enj', 'zil', 'waves', 'cel', 'leo', 'dgb',
       'sushi', 'stx', 'amp', 'nexo', 'sc', 'ftm', 'grt', 'ust', 'near',
       'mana', 'bat', 'yfi', 'rvn', 'btg', 'icx', 'qtum', 'hbtc', 'uma', 'hnt',
       'zrx', 'lusd', 'nano', 'ont', 'iost', 'one', 'zen', 'bnt', 'chsb',
       'arrr', 'ankr', 'ar', 'xvs', 'pax', 'flow', 'bake', 'kcs'],
      dtype='object', name='symbol')>



Load minute data 

In [4]:
def three_spaces():
    print("")
    print("")
    print("")

def data_from_folder(csv_path, cryptos):
    
    prices = []
    for symbol_key, symbol_dict in cryptos.items():
        file_path = data_path + symbol_key + ".csv"
        file_path = Path(file_path)
        df = pd.read_csv(file_path) # read the csv 
        df['date'] = df['time'].values.astype(dtype='datetime64[ms]') # for msec format   
        df.drop(df.filter(regex="time"),axis=1, inplace=True) # otherwise time row is left behind as "time"
        df.set_index(['date'], inplace=True)

        # resample to desired frequency
        df = resample_df_ohlcv(df, '1Min')
        df['symbol'] = symbol_dict['symbol'] # get a symbol column
        df['base'] = symbol_dict['base'] # get a base column
        df.set_index(['symbol', 'base'], inplace=True, append=True)

        df.sort_index(inplace=True)
        df.drop(df.filter(regex="Unname"),axis=1, inplace=True) # otherwise index row is left behind as "unnamed"
        prices.append(df)
    prices = pd.concat(prices)
    
    prices.sort_index(inplace=True)
    print("found {} symbol(s):\n {}".format(len(prices.index.get_level_values('symbol').unique()),
                                          prices.index.get_level_values('symbol').unique()))
    print("found {} base(s):\n {}".format(len(prices.index.get_level_values('base').unique()),
                                       prices.index.get_level_values('base').unique()))
    return prices


prices = data_from_folder(data_path, cryptos)
three_spaces()
print('RESULTING DF: ')
print(prices.info())

print(prices.index.get_level_values('symbol').unique())

found 38 symbol(s):
 Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bsv', 'usdt', 'usdc',
       'btt', 'atom', 'wbtc', 'okb', 'algo', 'ftt', 'doge', 'ada', 'dot',
       'ksm', 'uni', 'fil', 'sol', 'aave', 'avax', 'link', 'luna', 'bch'],
      dtype='object', name='symbol')
found 1 base(s):
 Index(['usd'], dtype='object', name='base')



RESULTING DF: 
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 48918134 entries, (Timestamp('2013-04-01 00:07:00'), 'btc', 'usd') to (Timestamp('2021-05-18 07:04:00'), 'luna', 'usd')
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float64
 1   high    float64
 2   low     float64
 3   close   float64
 4   volume  float64
dtypes: float64(5)
memory usage: 2.3+ GB
None
Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bsv', 'usdt', 'usdc',
  

In [5]:
prices.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,high,low,close,volume
date,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-04-01 00:07:00,btc,usd,93.25,93.3,93.25,93.25,93.3
2013-04-01 00:08:00,btc,usd,100.0,100.0,100.0,100.0,93.3
2013-04-01 00:09:00,btc,usd,93.3,93.3,93.3,93.3,33.676862
2013-04-01 00:10:00,btc,usd,93.3,93.3,93.3,93.3,0.0
2013-04-01 00:11:00,btc,usd,93.35,93.47,93.35,93.35,20.0
2013-04-01 00:12:00,btc,usd,93.47,93.47,93.47,93.47,2.021627
2013-04-01 00:13:00,btc,usd,93.03001,93.03001,93.03,93.03001,4.9854
2013-04-01 00:14:00,btc,usd,93.03001,93.03001,93.03001,93.03001,0.0
2013-04-01 00:15:00,btc,usd,93.03001,93.03001,93.03001,93.03001,0.0
2013-04-01 00:16:00,btc,usd,93.03001,93.03001,93.03001,93.03001,0.0


In [6]:
prices.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,high,low,close,volume
date,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-05-18 06:59:00,luna,usd,15.388,15.388,15.388,15.388,0.0
2021-05-18 06:59:00,sol,usd,50.163,50.163,50.163,50.163,0.0
2021-05-18 07:00:00,aave,usd,647.82,647.82,647.82,647.82,0.013275
2021-05-18 07:00:00,luna,usd,15.388,15.388,15.388,15.388,0.0
2021-05-18 07:00:00,sol,usd,50.163,50.163,50.163,50.163,0.0
2021-05-18 07:01:00,luna,usd,15.376,15.406,15.376,15.376,579.2982
2021-05-18 07:01:00,sol,usd,50.304,50.304,50.301,50.304,36.807817
2021-05-18 07:02:00,luna,usd,15.42,15.425,15.42,15.42,1198.3831
2021-05-18 07:03:00,luna,usd,15.438,15.438,15.438,15.438,17.528177
2021-05-18 07:04:00,luna,usd,15.448,15.448,15.448,15.448,34.593475


In [7]:
DATA_STORE = '../data/crypto.h5'
with pd.HDFStore(DATA_STORE) as store:
    store.put('crypto/caggle/prices', prices)
print("done")


done


https://zaxrosenberg.com/pandas-multiindex-tutorial/


In [8]:
prices.index.get_level_values('symbol').unique()

Index(['btc', 'ltc', 'eth', 'etc', 'xmr', 'xrp', 'miota', 'eos', 'neo', 'trx',
       'dai', 'mtn', 'xlm', 'mkr', 'man', 'vet', 'xtz', 'bsv', 'usdt', 'usdc',
       'btt', 'atom', 'wbtc', 'okb', 'algo', 'ftt', 'doge', 'ada', 'dot',
       'ksm', 'uni', 'fil', 'sol', 'aave', 'avax', 'link', 'luna', 'bch'],
      dtype='object', name='symbol')

In [9]:
# inplace
idx = pd.IndexSlice
prices.loc[idx['2021']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,high,low,close,volume
date,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,ada,usd,0.18148,0.18148,0.18148,0.18148,0.000000
2021-01-01 00:00:00,algo,usd,0.33618,0.33631,0.33618,0.33618,498.968034
2021-01-01 00:00:00,atom,usd,6.46280,6.46280,6.46280,6.46280,0.000000
2021-01-01 00:00:00,bsv,usd,163.20000,163.20000,163.20000,163.20000,0.000000
2021-01-01 00:00:00,btc,usd,28939.00000,28968.00000,28927.00000,28939.00000,8.170374
...,...,...,...,...,...,...,...
2021-05-18 07:01:00,luna,usd,15.37600,15.40600,15.37600,15.37600,579.298200
2021-05-18 07:01:00,sol,usd,50.30400,50.30400,50.30100,50.30400,36.807817
2021-05-18 07:02:00,luna,usd,15.42000,15.42500,15.42000,15.42000,1198.383100
2021-05-18 07:03:00,luna,usd,15.43800,15.43800,15.43800,15.43800,17.528177


In [10]:
# copies.. 
prices.xs('btc', level='symbol').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
date,base,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-04-01 00:07:00,usd,93.25,93.3,93.25,93.25,93.3
2013-04-01 00:08:00,usd,100.0,100.0,100.0,100.0,93.3
2013-04-01 00:09:00,usd,93.3,93.3,93.3,93.3,33.676862
2013-04-01 00:10:00,usd,93.3,93.3,93.3,93.3,0.0
2013-04-01 00:11:00,usd,93.35,93.47,93.35,93.35,20.0
2013-04-01 00:12:00,usd,93.47,93.47,93.47,93.47,2.021627
2013-04-01 00:13:00,usd,93.03001,93.03001,93.03,93.03001,4.9854
2013-04-01 00:14:00,usd,93.03001,93.03001,93.03001,93.03001,0.0
2013-04-01 00:15:00,usd,93.03001,93.03001,93.03001,93.03001,0.0
2013-04-01 00:16:00,usd,93.03001,93.03001,93.03001,93.03001,0.0


In [11]:
# buggy date
prices.xs(('2018-07-01', 'btc', 'usd'), level=['date', 'symbol', 'base'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,high,low,close,volume
date,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-12-09 19:42:00,btc,usd,415.73,415.73,415.73,415.73,0.030000
2015-12-09 19:43:00,btc,usd,415.73,415.73,415.73,415.73,8.024132
2015-12-09 19:44:00,btc,usd,415.72,415.72,415.68,415.72,3.773917
2015-12-09 19:45:00,btc,usd,415.63,415.68,415.00,415.63,52.890743
2015-12-09 19:46:00,btc,usd,415.01,415.01,414.74,415.01,70.141385
...,...,...,...,...,...,...,...
2015-12-10 07:37:00,btc,usd,412.69,413.49,412.66,412.69,38.824753
2015-12-10 07:38:00,btc,usd,413.50,413.50,412.79,413.50,1.347561
2015-12-10 07:39:00,btc,usd,413.50,413.50,413.50,413.50,0.000000
2015-12-10 07:40:00,btc,usd,413.46,413.46,413.46,413.46,0.023000


In [12]:
prices.xs(('btc', 'usd'), level=['symbol', 'base'])

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-04-01 00:07:00,93.25,93.300000,93.250000,93.25,93.300000
2013-04-01 00:08:00,100.00,100.000000,100.000000,100.00,93.300000
2013-04-01 00:09:00,93.30,93.300000,93.300000,93.30,33.676862
2013-04-01 00:10:00,93.30,93.300000,93.300000,93.30,0.000000
2013-04-01 00:11:00,93.35,93.470000,93.350000,93.35,20.000000
...,...,...,...,...,...
2021-05-18 06:08:00,45541.00,45554.000000,45511.000000,45541.00,7.038603
2021-05-18 06:09:00,45518.00,45657.000000,45515.000000,45518.00,10.710958
2021-05-18 06:10:00,45657.00,45665.000000,45569.000000,45657.00,5.758582
2021-05-18 06:11:00,45605.00,45746.162878,45604.000000,45605.00,12.267985


In [13]:

# seems only the first result
result = prices.query("'symbol =='btc'")
print(result)

SyntaxError: invalid syntax (<unknown>, line 1)

In [None]:
'btc' in prices.index.get_level_values('symbol') and 'usd' in prices.index.get_level_values('base')

In [None]:
'btc' in prices.index.get_level_values('symbol') and 'xxxn' in prices.index.get_level_values('base')