# Market Cap

- load coinmarketcap data 
- create an crypto equivalent of "stocks" with marketcap, IPO, sector 



In [1]:
import time
import random
import pandas as pd
from tqdm import tqdm
from pycoingecko import CoinGeckoAPI

from utils import reduce_footprint

cg = CoinGeckoAPI()
import numpy as np

In [2]:
# get top 100 coins from coingecko 

top_100_coins = cg.get_coins_markets(vs_currency='usd')

top_100_coins_df = pd.DataFrame(top_100_coins)
top_100_coins_df.set_index('id',inplace=True)
col_list = ['market_cap', ]
top_100_coins_df = top_100_coins_df.loc[:, col_list]

top_100_coins_list = top_100_coins_df.index.tolist()


top_100_coins_df

Unnamed: 0_level_0,market_cap
id,Unnamed: 1_level_1
bitcoin,650002292205
ethereum,247594738948
tether,62391302640
binancecoin,45205438269
cardano,42653622607
...,...
bitcoin-cash-abc-2,630498881
compound-usdt,619453162
ontology,610967251
curve-dao-token,594484815


In [3]:
# for each coin id get detailed information and store it into a market_df

market = []
raw_data = []
for idx in tqdm(top_100_coins_list):
    coin = cg.get_coin_by_id(id=idx, localization=False, vs_currencies='usd',include_market_cap=False,
                             include_24hr_vol=False,include_24hr_change=False,include_last_updated_at=False)
    raw_data.append(coin)
    coin_info = [idx]
    coin_info.append(coin['symbol'])
    coin_info.append(str(coin['name']))
    symbol = coin['symbol']
    categories = coin['categories']
    #todo: take most popular sector to achieve biggest groups
    if type(categories) is list:
        if len(categories) > 0:
            sector = categories[0]
        else:
            sector = 'None'
    else:
        raise NotImplementedError
    coin_info.append(sector)

    coin_info.append(coin['genesis_date'])
    coin_info.append(coin['market_cap_rank'])
    coin_info.append(str(coin['hashing_algorithm']))
    coin_info.append(coin['coingecko_rank'])
    coin_info.append(coin['coingecko_score'])
    coin_info.append(coin['developer_score'])
    coin_info.append(coin['community_score'])
    coin_info.append(coin['liquidity_score'])
    coin_info.append(coin['public_interest_score'])
    market.append(coin_info)
    time.sleep(random.randint(0, 5))
    
market_df = pd.DataFrame(market, columns = ['id', 'symbol', 'name', 'sector', 'genesis_date',
                                            'market_cap_rank', 'hashing_algorithm', 
                                            'coingecko_rank', 'coingecko_score', 'developer_score', 'community_score', 
                                            'liquidity_score', 'public_interest_score'])
    
market_df['genesis_date'] = pd.to_datetime(market_df['genesis_date'])
market_df['base'] = 'usd'

100%|██████████| 100/100 [04:36<00:00,  2.76s/it]


In [4]:
market_df['ticker'] = market_df['symbol'] + '-' + market_df['base']

In [5]:
# combine the information into the market

market = top_100_coins_df.merge(market_df, on='id')

market.set_index(['ticker', 'symbol', 'base'], inplace=True)

In [6]:
market = reduce_footprint(market, float_type='float32')

market

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,market_cap,name,sector,genesis_date,market_cap_rank,hashing_algorithm,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score
ticker,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
btc-usd,btc,usd,bitcoin,-8723,Bitcoin,Cryptocurrency,2009-01-03,1,SHA-256,1,81.994003,104.080002,72.552002,99.973999,0.365
eth-usd,eth,usd,ethereum,-6908,Ethereum,Smart Contract Platform,2015-07-30,2,Ethash,2,77.875999,101.994003,61.507000,97.810997,0.507
usdt-usd,usdt,usd,tether,-17936,Tether,USD Stablecoin,NaT,3,,149,41.532001,0.000000,10.866000,104.026001,0.069
bnb-usd,bnb,usd,binancecoin,16189,Binance Coin,Centralized Exchange Token (CEX),2017-07-08,4,,5,67.561996,77.110001,63.431999,77.880997,14.394
ada-usd,ada,usd,cardano,-24241,Cardano,Smart Contract Platform,NaT,5,,6,65.686996,73.917000,58.050999,82.440002,0.290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
bcha-usd,bcha,usd,bitcoin-cash-abc-2,-22975,Bitcoin Cash ABC,Cryptocurrency,NaT,96,,867,22.743000,0.000000,0.000000,41.719002,0.005
cusdt-usd,cusdt,usd,compound-usdt,6890,cUSDT,Finance / Banking,NaT,97,,380,30.802000,0.000000,38.096001,37.498001,0.086
ont-usd,ont,usd,ontology,-24877,Ontology,Near Protocol Ecosystem,2018-02-26,98,,25,57.389999,84.526001,41.320999,61.799000,0.014
crv-usd,crv,usd,curve-dao-token,7759,Curve DAO Token,Decentralized Exchange Token (DEX),NaT,99,,99,46.368999,68.565002,10.923000,60.393002,0.104


In [7]:
market.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,market_cap,name,sector,genesis_date,market_cap_rank,hashing_algorithm,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score
ticker,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
btc-usd,btc,usd,bitcoin,-8723,Bitcoin,Cryptocurrency,2009-01-03,1,SHA-256,1,81.994003,104.080002,72.552002,99.973999,0.365
eth-usd,eth,usd,ethereum,-6908,Ethereum,Smart Contract Platform,2015-07-30,2,Ethash,2,77.875999,101.994003,61.507,97.810997,0.507
usdt-usd,usdt,usd,tether,-17936,Tether,USD Stablecoin,NaT,3,,149,41.532001,0.0,10.866,104.026001,0.069
bnb-usd,bnb,usd,binancecoin,16189,Binance Coin,Centralized Exchange Token (CEX),2017-07-08,4,,5,67.561996,77.110001,63.431999,77.880997,14.394
ada-usd,ada,usd,cardano,-24241,Cardano,Smart Contract Platform,NaT,5,,6,65.686996,73.917,58.050999,82.440002,0.29
doge-usd,doge,usd,dogecoin,-26110,Dogecoin,Meme Tokens,2013-12-08,6,Scrypt,3,70.210999,78.168999,75.267998,82.249001,0.213
xrp-usd,xrp,usd,ripple,-20166,XRP,Cryptocurrency,NaT,7,,4,68.098999,89.317001,52.574001,82.374001,0.168
usdc-usd,usdc,usd,usd-coin,1162,USD Coin,USD Stablecoin,NaT,8,,45,53.308998,77.436996,0.0,81.487,0.052
dot-usd,dot,usd,polkadot,12227,Polkadot,Polkadot Ecosystem,NaT,9,,143,41.915001,0.0,44.015999,74.639999,0.101
busd-usd,busd,usd,binance-usd,26391,Binance USD,USD Stablecoin,NaT,10,,233,35.965,0.0,9.263,84.891998,0.047


In [8]:
market.index.tolist

<bound method IndexOpsMixin.tolist of MultiIndex([(     'btc-usd',      'btc', 'usd'),
            (     'eth-usd',      'eth', 'usd'),
            (    'usdt-usd',     'usdt', 'usd'),
            (     'bnb-usd',      'bnb', 'usd'),
            (     'ada-usd',      'ada', 'usd'),
            (    'doge-usd',     'doge', 'usd'),
            (     'xrp-usd',      'xrp', 'usd'),
            (    'usdc-usd',     'usdc', 'usd'),
            (     'dot-usd',      'dot', 'usd'),
            (    'busd-usd',     'busd', 'usd'),
            (     'bch-usd',      'bch', 'usd'),
            (     'ltc-usd',      'ltc', 'usd'),
            (     'uni-usd',      'uni', 'usd'),
            (     'sol-usd',      'sol', 'usd'),
            (    'link-usd',     'link', 'usd'),
            (     'etc-usd',      'etc', 'usd'),
            (   'matic-usd',    'matic', 'usd'),
            (    'wbtc-usd',     'wbtc', 'usd'),
            (   'theta-usd',    'theta', 'usd'),
            (     'icp-usd',   

## Create categorical data from raw data and store in seperate data frame

In [9]:
def categorical_dict_to_np(cat_dict, unique_cats):
    # create array of symbols and categoricals 
    cat_np = np.zeros((len(cat_dict), len(unique_cats)), dtype=int)
    for ii, [key, values] in enumerate(cat_dict.items()): 
        for value in values: 
            idx = np.where(unique_cats == value)
            # print("{} is on idx {}".format(value, idx))
            cat_np[ii][idx] = 1
        assert(np.sum(cat_np[ii]) == len(values))
    return cat_np


def create_cat_dict(raw_data): 
    cat_dict = {}
    cat_list = []
    symbols = []
    max_len_cat = 0
    for coin in raw_data:
        categories = coin['categories']
        symbol = coin['symbol']
        cat_dict[symbol] = categories
        cat_list.extend(categories)
        symbols.append(symbol)
        max_len_cat = max(max_len_cat, len(coin['categories']))
    
    unique_cats = np.unique(cat_list)

    return cat_dict, unique_cats, symbols,

def categoricals_from_raw_coingecko(raw_data): 
    cat_dict, unique_cats, symbols = create_cat_dict(raw_data)

    cat_np = categorical_dict_to_np(cat_dict, unique_cats)
    
    cat_df = pd.DataFrame(cat_np, columns=unique_cats)
    cat_df['symbol'] = symbols
    cat_df['base'] = 'usd'
    cat_df['ticker'] = cat_df['symbol'] + '-' + cat_df['base']

    cat_df.set_index(['ticker', 'symbol', 'base'], inplace=True)
    print(unique_cats)
    print(cat_dict)
    return cat_df


cat_df = categoricals_from_raw_coingecko(raw_data)

cat_df = reduce_footprint(cat_df, int_type='uint16')
cat_df

['Analytics' 'Artificial Intelligence' 'Asset-backed Tokens'
 'Automated Market Maker (AMM)' 'Avalanche Ecosystem'
 'Binance Smart Chain Ecosystem' 'Business Platform' 'Business Services'
 'Centralized Exchange Token (CEX)' 'Communication' 'Compound Tokens'
 'Cosmos Ecosystem' 'Cryptocurrency' 'Decentralized Exchange Token (DEX)'
 'Decentralized Finance (DeFi)' 'Derivatives' 'Entertainment'
 'Eth 2.0 Staking' 'Exchange-based Tokens' 'Finance / Banking' 'Gaming'
 'Governance' 'HECO Chain Ecosystem' 'Infrastructure'
 'Internet of Things (IOT)' 'Lending/Borrowing' 'Masternodes'
 'Meme Tokens' 'Metaverse' 'Near Protocol Ecosystem'
 'Non-Fungible Tokens (NFT)' 'Oracle' 'Polkadot Ecosystem'
 'Polygon Ecosystem' 'Privacy Coins' 'Protocol' 'Seigniorage'
 'Smart Contract Platform' 'Solana Ecosystem' 'Sports' 'Stablecoins'
 'Storage' 'Synthetic Issuer' 'Terra Ecosystem' 'Tokenized BTC'
 'USD Stablecoin' 'Wrapped-Tokens' 'Yearn Ecosystem' 'Yield Aggregator'
 'Yield Farming' 'xDAI Ecosystem']
{'bt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Analytics,Artificial Intelligence,Asset-backed Tokens,Automated Market Maker (AMM),Avalanche Ecosystem,Binance Smart Chain Ecosystem,Business Platform,Business Services,Centralized Exchange Token (CEX),Communication,...,Storage,Synthetic Issuer,Terra Ecosystem,Tokenized BTC,USD Stablecoin,Wrapped-Tokens,Yearn Ecosystem,Yield Aggregator,Yield Farming,xDAI Ecosystem
ticker,symbol,base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
btc-usd,btc,usd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
eth-usd,eth,usd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
usdt-usd,usdt,usd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
bnb-usd,bnb,usd,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
ada-usd,ada,usd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
bcha-usd,bcha,usd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cusdt-usd,cusdt,usd,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ont-usd,ont,usd,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
crv-usd,crv,usd,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [10]:
"""
DATA_STORE = '../data/crypto.h5'
with pd.HDFStore(DATA_STORE) as store:
    store.put('coingecko/top100/market', market)
    store.put('coingecko/top100/cats', cat_df)
print("done")
"""

'\nDATA_STORE = \'../data/crypto.h5\'\nwith pd.HDFStore(DATA_STORE) as store:\n    store.put(\'coingecko/top100/market\', market)\n    store.put(\'coingecko/top100/cats\', cat_df)\nprint("done")\n'

In [14]:
market.reset_index().to_feather('../data/crypto/crypto_marketcap.ftr')
cat_df.reset_index().to_feather('../data/crypto/cat_df.ftr')
print("saved")




saved
