# Scrape crypto currencies from coinmarketcap
https://coinmarketcap.com/

## Import python standard modules

In [1]:
import sys       # sys.path
import os        # os stuff
import codecs    # codecs.open
import json      # json.dump/load
import itertools # itertools.chain
import datetime  # datetime.utcnow, ...
import csv       # csv.writer/reader

__Constants and helper functions__

In [2]:
def getWorkingDir(path='.'):
    assert(path and path != "/") # may need adaption for Windows
    path = os.path.abspath(path)
    if os.path.exists(os.path.join(path, '.git')):
        return path
    return getWorkingDir(os.path.dirname(path))

# BASEDIR is path to the working directory of this git repository
BASEDIR = getWorkingDir()
EXTMODULES = os.path.join(BASEDIR, "ext")
CACHEDIR = os.path.join(BASEDIR, "cache")
COINS = "coins"
TOKENS = "tokens"

if not os.path.exists(CACHEDIR):
    os.makedirs(CACHEDIR)
def getPath(filename):
    return os.path.join(CACHEDIR, filename)

## Import third party modules
* https://github.com/dahuebi/coinmarketcap-history.git
* https://github.com/dahuebi/coinmarketcap.git

In [3]:
# set path for thirdparty modules
sys.path.insert(0, os.path.join(EXTMODULES, "coinmarketcap-scraper"))
sys.path.insert(0, os.path.join(EXTMODULES, "coinmarketcap-history"))

# import thirdparty modules
import coinmarketcap
import coinmarketcap_usd_history

### coinmarketcap-scraper wrapper
Scrape __coins__ and __tokens__ from __coinmarketcap.com__

In [4]:
# please see https://github.com/dahuebi/coinmarketcap.git
def scrapeCoinList():
    """Scrape coin list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('coins', 'all')
    data = coinmarketcap.parseList(html, 'currencies')
    return data

def scrapeTokenList():
    """Scrape token list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('tokens', 'all')
    data = coinmarketcap.parseList(html, 'assets')
    return data

### coinmarketcap_usd_history wrapper
Scrape __coin/token__ data like _open_, _high_, _low_, _close_, _volume_, _marketcap_.

In [5]:
# please see https://github.com/dahuebi/coinmarketcap-history.git
def downloadHistoricalData(currency, startDate, endDate):
    # date format required by coinmarketcap_usd_history
    def historicalDate(date):
        if isinstance(date, datetime.datetime):
            return date.strftime("%Y%m%d")
        return date
    startDate, endDate = historicalDate(startDate), historicalDate(endDate)
    print(currency, startDate, endDate)
    html = coinmarketcap_usd_history.download_data(currency, startDate, endDate)
    header, rows = coinmarketcap_usd_history.extract_data(html)
    return header, rows

## Scrape currencies

__Cache__ for the currencies, __json__ encoded files.

In [6]:
def readCachedCurrenciesFile(filename):
    """Read cached currencies from *filename*.
    Returns the previously saved dictionary.
    """
    path = getPath(filename)
    if not os.path.exists(path):
        return []
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        try:
            return json.load(fp)
        except json.JSONDecodeError:
            return []

def writeCachedCurrenciesFile(filename, data):
    """Write dictionary to filename.
    """
    path = getPath(filename)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        json.dump(data, fp, indent=4)

The currencies scrape function.

In [7]:
def scrapeCurrencies():
    # coins
    coinsFilename = "coins.txt"
    coins = readCachedCurrenciesFile(coinsFilename)
    if not coins:
        coins = scrapeCoinList()
        writeCachedCurrenciesFile(coinsFilename, coins)
    print(len(coins))
    
    # tokens
    tokensFilename = "tokens.txt"
    tokens = readCachedCurrenciesFile(tokensFilename)
    if not tokens:
        tokens = scrapeTokenList()
        writeCachedCurrenciesFile(tokensFilename, tokens)
    print(len(tokens))
    return coins, tokens

## Scrape historical data

__Cache__ for the historical data, __csv__.

In [8]:
def loadCurrencyFromCsv(currency):
    """Load historical data for the *currency*.
    Returns *header* and *rows* of the csv.
    """
    path = getPath("{}.csv".format(currency))
    header, data = [], []
    if not os.path.exists(path):
        return header, data
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        reader = csv.reader(fp)
        try:
            header = next(reader)
        except StopIteration:
            pass
        data = list(reader)
    return header, data

def saveCurrencyToCsv(currency, header, data):
    """Save historical data for the *currency*.
    """
    path = getPath("{}.csv".format(currency))
    newData = sorted(data, key=lambda row: row[0], reverse=True)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        writer = csv.writer(fp, quoting=csv.QUOTE_NONE)
        [writer.writerow(row) for row in itertools.chain([header], newData)]

The function to download the historical data for one currency.

In [9]:
def downloadCurrency(currency):
    """Download historical data for a single *currency*.
    Only request new data if the *date* is not yet in the cache.
    **Updates** all caches.
    """
    parseDate = lambda s: datetime.datetime.strptime(s, "%Y-%m-%d")
    slug = currency["slug"]
    # set default startTime
    startDate = parseDate("2001-01-01")
    # get current UTC datetime
    endDate = datetime.datetime.utcnow()
    # floor to day
    endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)
   
    # try to load data from cache
    header, data = loadCurrencyFromCsv(slug)
    for row in data:
        startDate = max(startDate, parseDate(row[0]))
    # increment one date
    startDate += datetime.timedelta(days=1)
    if startDate >= endDate:
        return
    
    header, newData= downloadHistoricalData(slug, startDate, endDate)
    if not newData:
        return
    # remove average
    header = header[:-1]
    for row in newData:
        row = row[:-1] # remove Average
        # parse date from module
        row[0] = datetime.datetime.strptime(row[0], "%b %d %Y").strftime("%Y-%m-%d")
        data.append(row)
    saveCurrencyToCsv(slug, header, data)

## Run the code
The result is a filled __CACHE__ directory.
* _coins.txt_/_tokens.txt_ cointain the coin and token information
* _<coin/token name>.csv_ cointain the historical data

In [10]:
# scrape coins and tokens
coins, tokens = scrapeCurrencies()

# get historical data for coins and tokens
for currency in itertools.chain(coins, tokens):
    downloadCurrency(currency)

896
546
dash 20180121 20180123
monero 20180121 20180123
bitcoin-gold 20180121 20180123
qtum 20180121 20180123
ethereum-classic 20180121 20180123
lisk 20180121 20180123
raiblocks 20180121 20180123
vechain 20180121 20180123
verge 20180121 20180123
zcash 20180121 20180123
siacoin 20180121 20180123
stratis 20180121 20180123
bytecoin-bcn 20180121 20180123
steem 20180121 20180123
bitshares 20180121 20180123
kucoin-shares 20180121 20180123
waves 20180121 20180123
dogecoin 20180121 20180123
electroneum 20180121 20180123
komodo 20180121 20180123
decred 20180121 20180123
ark 20180121 20180123
digibyte 20180121 20180123
hshare 20180121 20180123
smartcash 20180121 20180123
pivx 20180121 20180123
byteball 20180121 20180123
zclassic 20180121 20180123
factom 20180121 20180123
monacoin 20180121 20180123
reddcoin 20180121 20180123
syscoin 20180121 20180123
nexus 20180121 20180123
neblio 20180121 20180123
experience-points 20180121 20180123
emercoin 20180121 20180123
zcoin 20180121 20180123
bitcore 2018

tittiecoin 20180121 20180123
copico 20180121 20180123
aurumcoin 20180121 20180123
cream 20180121 20180123
mazacoin 20180121 20180123
altcommunity-coin 20180121 20180123
unbreakablecoin 20180121 20180123
42-coin 20180121 20180123
fastcoin 20180121 20180123
op-coin 20180121 20180123
teslacoin 20180121 20180123
incakoin 20180121 20180123
abjcoin 20180121 20180123
piggycoin 20180121 20180123
halcyon 20180121 20180123
steneum-coin 20180121 20180123
desire 20180121 20180123
inflationcoin 20180121 20180123
kekcoin 20180121 20180123
unitus 20180121 20180123
bata 20180121 20180123
centurion 20180121 20180123
machinecoin 20180121 20180123
droxne 20180121 20180123
bunnycoin 20180121 20180123
cryptocarbon 20180121 20180123
i0coin 20180121 20180123
growers-international 20180121 20180123
elementrem 20180121 20180123
smileycoin 20180121 20180123
zetacoin 20180121 20180123
bitcurrency 20180121 20180123
zennies 20180121 20180123
orbitcoin 20180121 20180123
sagacoin 20180121 20180123
digitalcoin 201801

cryptcoin 20171229 20180123
fimkrypto 20180121 20180123
shadowcash 20180121 20180123
corgicoin 20180121 20180123
yashcoin 20180121 20180123
shorty 20180121 20180123
ultimate-secure-cash 20180121 20180123
virtacoin 20180121 20180123
truckcoin 20180117 20180123
pascal-lite 20180117 20180123
tattoocoin 20180121 20180123
metalcoin 20180121 20180123
universe 20180121 20180123
grimcoin 20180121 20180123
signatum 20180121 20180123
ambercoin 20180121 20180123
fincoin 20180121 20180123
stress 20180121 20180123
wayguide 20180121 20180123
valorbit 20180121 20180123
bitz 20180121 20180123
px 20180121 20180123
globalcoin 20180121 20180123
firecoin 20180121 20180123
virtacoinplus 20180121 20180123
joincoin 20180121 20180123
satoshimadness 20180121 20180123
bitcoin-planet 20180121 20180123
secretcoin 20180121 20180123
cashcoin 20180121 20180123
wmcoin 20180121 20180123
franko 20180121 20180123
yacoin 20180121 20180123
virta-unique-coin 20180121 20180123
sling 20180121 20180123
freicoin 20180121 20180

icon 20180121 20180123
omisego 20180121 20180123
tether 20180121 20180123
populous 20180121 20180123
binance-coin 20180121 20180123
ardor 20180121 20180123
status 20180121 20180123
maker 20180121 20180123
augur 20180121 20180123
0x 20180121 20180123
veritaseum 20180121 20180123
walton 20180121 20180123
dragonchain 20180121 20180123
loopring 20180121 20180123
dentacoin 20180121 20180123
salt 20180121 20180123
basic-attention-token 20180121 20180123
qash 20180121 20180123
golem-network-tokens 20180121 20180123
gas 20180121 20180123
kyber-network 20180121 20180123
wax 20180121 20180123
ethos 20180121 20180123
rchain 20180121 20180123
funfair 20180121 20180123
aion 20180121 20180123
dent 20180121 20180123
aelf 20180121 20180123
power-ledger 20180121 20180123
kin 20180121 20180123
digixdao 20180121 20180123
nebulas-token 20180121 20180123
bytom 20180121 20180123
aeternity 20180121 20180123
enigma-project 20180121 20180123
request-network 20180121 20180123
chainlink 20180121 20180123
cindica

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
