Import modules

In [22]:
import sys, os
import codecs
import json

sys.path.insert(0, os.path.join(BASEDIR, "coinmarketcap-scraper"))
sys.path.insert(0, os.path.join(BASEDIR, "coinmarketcap-history"))

BASEDIR = ""
CACHEDIR = os.path.join(BASEDIR, "cache")
COINS = "coins"
TOKENS = "tokens"

if not os.path.exists(CACHEDIR):
    os.makedirs(CACHEDIR)
def getPath(filename):
    return os.path.join(CACHEDIR, filename)

Wrapper for coinmarketcap-scraper

In [10]:
import coinmarketcap
def scrapeCoinList():
    """Scrape coin list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('coins', 'all')
    data = coinmarketcap.parseList(html, 'currencies')
    return data


def scrapeTokenList():
    """Scrape token list."""
    coinmarketcap.lastReqTime = None
    html = coinmarketcap.requestList('tokens', 'all')
    data = coinmarketcap.parseList(html, 'assets')
    return data

Wrapper around coinmarketcap_usd_history

In [45]:
import coinmarketcap_usd_history
def downloadHistoricalData(currency, startDate, endDate):
    # date format required by coinmarketcap_usd_history
    def historicalDate(date):
        if isinstance(date, datetime.datetime):
            return date.strftime("%Y%m%d")
        return date
    startDate, endDate = historicalDate(startDate), historicalDate(endDate)
    print(currency, startDate, endDate)
    html = coinmarketcap_usd_history.download_data(currency, startDate, endDate)
    header, rows = coinmarketcap_usd_history.extract_data(html)
    return header, rows

function for scrape currencies

In [31]:
def readCachedCurrenciesFile(filename):
    path = getPath(filename)
    if not os.path.exists(path):
        return []
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        try:
            return json.load(fp)
        except json.JSONDecodeError:
            return []

def writeCachedCurrenciesFile(filename, data):
    path = getPath(filename)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        json.dump(data, fp, indent=4)
      
def scrapeCurrencies():
    # coins
    coinsFilename = "coins.txt"
    coins = readCachedCurrenciesFile(coinsFilename)
    if not coins:
        coins = scrapeCoinList()
        writeCachedCurrenciesFile(coinsFilename, coins)
    print(len(coins))
    
    # tokens
    tokensFilename = "tokens.txt"
    tokens = readCachedCurrenciesFile(tokensFilename)
    if not tokens:
        tokens = scrapeTokenList()
        writeCachedCurrenciesFile(tokensFilename, tokens)
    print(len(tokens))
    return coins, tokens
   
coins, tokens = scrapeCurrencies()

896
546


Download currency

In [None]:
#import logging
#logging.basicConfig(
#    level=logging.ERROR,
#    format='%(asctime)s %(levelname)s: %(message)s',
#    datefmt='%m/%d/%Y %I:%M:%S %p')

import itertools
import datetime
import csv

def loadCurrencyFromCsv(currency):
    path = getPath("{}.csv".format(currency))
    header, data = [], []
    if not os.path.exists(path):
        return header, data
    with codecs.open(path, "r", encoding="UTF-8") as fp:
        reader = csv.reader(fp)
        try:
            header = next(reader)
        except StopIteration:
            pass
        data = list(reader)
    return header, data

def saveCurrencyToCsv(currency, header, data):
    path = getPath("{}.csv".format(currency))
    newData = sorted(data, key=lambda row: row[0], reverse=True)
    with codecs.open(path, "w", encoding="UTF-8") as fp:
        writer = csv.writer(fp, quoting=csv.QUOTE_NONE)
        [writer.writerow(row) for row in itertools.chain([header], newData)]


def downloadCurrency(currency):
    parseDate = lambda s: datetime.datetime.strptime(s, "%Y-%m-%d")
    slug = currency["slug"]
    # set default startTime
    startDate = parseDate("2001-01-01")
    # get current UTC datetime
    endDate = datetime.datetime.utcnow()
    # floor to day
    endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)
   
    # try to load data from cache
    header, data = loadCurrencyFromCsv(slug)
    for row in data:
        startDate = max(startDate, parseDate(row[0]))
    # increment one date
    startDate += datetime.timedelta(days=1)
    if startDate >= endDate:
        return
    
    header, newData= downloadHistoricalData(slug, startDate, endDate)
    if not newData:
        return
    # remove average
    header = header[:-1]
    for row in newData:
        row = row[:-1] # remove Average
        # parse date from module
        row[0] = datetime.datetime.strptime(row[0], "%b %d %Y").strftime("%Y-%m-%d")
        data.append(row)
    saveCurrencyToCsv(slug, header, data)
    
for currency in itertools.chain(coins, tokens):
    downloadCurrency(currency)

ethereum 20010102 20180121
ripple 20010102 20180121
bitcoin-cash 20010102 20180121
cardano 20010102 20180121
litecoin 20010102 20180121
nem 20010102 20180121
neo 20010102 20180121
stellar 20010102 20180121
iota 20010102 20180121
dash 20010102 20180121
monero 20010102 20180121
bitcoin-gold 20010102 20180121
qtum 20010102 20180121
ethereum-classic 20010102 20180121
lisk 20010102 20180121
raiblocks 20010102 20180121
vechain 20010102 20180121
verge 20010102 20180121
zcash 20010102 20180121
siacoin 20010102 20180121
stratis 20010102 20180121
bytecoin-bcn 20010102 20180121
steem 20010102 20180121
bitshares 20010102 20180121
kucoin-shares 20010102 20180121
waves 20010102 20180121
dogecoin 20010102 20180121
electroneum 20010102 20180121
komodo 20010102 20180121
decred 20010102 20180121
ark 20010102 20180121
digibyte 20010102 20180121
hshare 20010102 20180121
smartcash 20010102 20180121
pivx 20010102 20180121
byteball 20010102 20180121
zclassic 20010102 20180121
factom 20010102 20180121
monacoi