# Scrape crypto currencies from coinmarketcap
https://coinmarketcap.com/

## Import python standard modules

In [3]:
import sys       # sys.path
import os        # os stuff
import codecs    # codecs.open
import json      # json.dump/load
import itertools # itertools.chain
import datetime  # datetime.utcnow, ...
import csv       # csv.writer/reader

__Constants and helper functions__

In [5]:
def getWorkingDir(path='.'):
    assert(path and path != "/") # may need adaption for Windows
    path = os.path.abspath(path)
    if os.path.exists(os.path.join(path, '.git')):
        return path
    return getWorkingDir(os.path.dirname(path))

# BASEDIR is path to the working directory of this git repository
BASEDIR = getWorkingDir()
EXTMODULES = os.path.join(BASEDIR, "ext")
CACHEDIR = os.path.join(BASEDIR, "cache")
COINS = "coins"
TOKENS = "tokens"

if not os.path.exists(CACHEDIR):
    os.makedirs(CACHEDIR)
def getPath(filename):
    return os.path.join(CACHEDIR, filename)

## Import third party modules
* https://github.com/dahuebi/coinmarketcap-history.git
* https://github.com/dahuebi/coinmarketcap.git

In [6]:
# set path for thirdparty modules
sys.path.insert(0, os.path.join(EXTMODULES, "coinmarketcap-scraper"))
sys.path.insert(0, os.path.join(EXTMODULES, "coinmarketcap-history"))

# import thirdparty modules
import coinmarketcap
import coinmarketcap_usd_history

### coinmarketcap-scraper wrapper
Scrape __coins__ and __tokens__ from __coinmarketcap.com__

In [7]:
# please see https://github.com/dahuebi/coinmarketcap.git
def scrapeCoinList():
    """Scrape coin list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('coins', 'all')
    data = coinmarketcap.parseList(html, 'currencies')
    return data

def scrapeTokenList():
    """Scrape token list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('tokens', 'all')
    data = coinmarketcap.parseList(html, 'assets')
    return data

### coinmarketcap_usd_history wrapper
Scrape __coin/token__ data like _open_, _high_, _low_, _close_, _volume_, _marketcap_.

In [8]:
# please see https://github.com/dahuebi/coinmarketcap-history.git
def downloadHistoricalData(currency, startDate, endDate):
    # date format required by coinmarketcap_usd_history
    def historicalDate(date):
        if isinstance(date, datetime.datetime):
            return date.strftime("%Y%m%d")
        return date
    startDate, endDate = historicalDate(startDate), historicalDate(endDate)
    print(currency, startDate, endDate)
    html = coinmarketcap_usd_history.download_data(currency, startDate, endDate)
    header, rows = coinmarketcap_usd_history.extract_data(html)
    return header, rows

## Scrape currencies

__Cache__ for the currencies, __json__ encoded files.

In [9]:
def readCachedCurrenciesFile(filename):
    """Read cached currencies from *filename*.
    Returns the previously saved dictionary.
    """
    path = getPath(filename)
    if not os.path.exists(path):
        return []
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        try:
            return json.load(fp)
        except json.JSONDecodeError:
            return []

def writeCachedCurrenciesFile(filename, data):
    """Write dictionary to filename.
    """
    path = getPath(filename)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        json.dump(data, fp, indent=4)

The currencies scrape function.

In [10]:
def scrapeCurrencies():
    # coins
    coinsFilename = "coins.txt"
    coins = readCachedCurrenciesFile(coinsFilename)
    if not coins:
        coins = scrapeCoinList()
        writeCachedCurrenciesFile(coinsFilename, coins)
    print(len(coins))
    
    # tokens
    tokensFilename = "tokens.txt"
    tokens = readCachedCurrenciesFile(tokensFilename)
    if not tokens:
        tokens = scrapeTokenList()
        writeCachedCurrenciesFile(tokensFilename, tokens)
    print(len(tokens))
    return coins, tokens

## Scrape historical data

__Cache__ for the historical data, __csv__.

In [11]:
def loadCurrencyFromCsv(currency):
    """Load historical data for the *currency*.
    Returns *header* and *rows* of the csv.
    """
    path = getPath("{}.csv".format(currency))
    header, data = [], []
    if not os.path.exists(path):
        return header, data
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        reader = csv.reader(fp)
        try:
            header = next(reader)
        except StopIteration:
            pass
        data = list(reader)
    return header, data

def saveCurrencyToCsv(currency, header, data):
    """Save historical data for the *currency*.
    """
    path = getPath("{}.csv".format(currency))
    newData = sorted(data, key=lambda row: row[0], reverse=True)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        writer = csv.writer(fp, quoting=csv.QUOTE_NONE)
        [writer.writerow(row) for row in itertools.chain([header], newData)]

The function to download the historical data for one currency.

In [12]:
def downloadCurrency(currency):
    """Download historical data for a single *currency*.
    Only request new data if the *date* is not yet in the cache.
    **Updates** all caches.
    """
    parseDate = lambda s: datetime.datetime.strptime(s, "%Y-%m-%d")
    slug = currency["slug"]
    # set default startTime
    startDate = parseDate("2001-01-01")
    # get current UTC datetime
    endDate = datetime.datetime.utcnow()
    # floor to day
    endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)
   
    # try to load data from cache
    header, data = loadCurrencyFromCsv(slug)
    for row in data:
        startDate = max(startDate, parseDate(row[0]))
    # increment one date
    startDate += datetime.timedelta(days=1)
    if startDate >= endDate:
        return
    
    header, newData= downloadHistoricalData(slug, startDate, endDate)
    if not newData:
        return
    # remove average
    header = header[:-1]
    for row in newData:
        row = row[:-1] # remove Average
        # parse date from module
        row[0] = datetime.datetime.strptime(row[0], "%b %d %Y").strftime("%Y-%m-%d")
        data.append(row)
    saveCurrencyToCsv(slug, header, data)

## Run the code

In [13]:
# scrape coins and tokens
coins, tokens = scrapeCurrencies()

# get historical data for coins and tokens
for currency in itertools.chain(coins, tokens):
    downloadCurrency(currency)

896
546
bitcoin 20180121 20180123
ethereum 20180121 20180123
ripple 20180121 20180123
bitcoin-cash 20180121 20180123
cardano 20180121 20180123
litecoin 20180121 20180123
nem 20180121 20180123
neo 20180121 20180123
stellar 20180121 20180123
iota 20180121 20180123
dash 20180121 20180123


KeyboardInterrupt: 