In [29]:
import time
import crycompare
import pandas as pd
import os
def download_hist_price_data(base_cur, dest_cur, granularity='hour', GET_DATA_UNTIL_DATE='today', verbose=False):
    """
    Load data from cryptocompare for base_cur to dest_cur with the given granularity (either 'hour' or 'minute').
    For every timesteps 2000 datapoints are fetched.
    """

    hist = crycompare.History()

    dfs = []

    # fetch data
    if granularity == 'hour':
        df = pd.DataFrame(hist.histoHour(base_cur, dest_cur, limit=2000)['Data'])
    elif granularity == 'minute':
        df = pd.DataFrame(hist.histoMinute(base_cur, dest_cur, limit=2000)['Data'])
    else:
        raise Exception('Granularity should be either "hour" or "minute", not "{}"'.format(granularity))

    # transform timestamps

    df['date'] = df.time.apply(pd.to_datetime, unit='s')
    last_time = df.iloc[0].time
    dfs.append(df)

    while df.date.min() > pd.to_datetime(GET_DATA_UNTIL_DATE):
        if verbose:
            print(df.date.min())
            print(df.date.max())
            print()

        if granularity == 'hour':
            df = pd.DataFrame(hist.histoHour(base_cur, dest_cur, limit=2000, toTs=last_time)['Data'])
        elif granularity == 'minute':
            df = pd.DataFrame(hist.histoMinute(base_cur, dest_cur, limit=2000, toTs=last_time)['Data'])

        df['date'] = df.time.apply(pd.to_datetime, unit='s')
        dfs.append(df)
        last_time = df.iloc[0].time

    out = pd.concat(dfs).reset_index(drop=True)
    out.index = out.date
    out = out.sort_index()
    return out

def load_and_save_coin_prices(coinsymbols, PRICE_DATA_PATH, GET_DATA_UNTIL_DATE):
    """

    :param coinsymbols: iterable of coin symbols, e.g. ['BAT','BTC']
    :param PRICE_DATA_PATH: Save dir
    :param GET_DATA_UNTIL_DATE: Starting from now, back to when shall the data be retrieved?
    :return: Nothing. Saves data into Price_DATA_PATH
    """
    for coin in coinsymbols:
        try:
            print(coin)
            base_cur = coin
            dest_cur = 'USD'

            # download data and rename columns
            df = download_hist_price_data(base_cur, dest_cur, granularity='hour',
                                                GET_DATA_UNTIL_DATE=GET_DATA_UNTIL_DATE,
                                                verbose=True)
            df = df[['time', 'low', 'high', 'open', 'close', 'volumefrom']]
            df.columns = ['time', 'low', 'high', 'open', 'close', 'volume']

            # save data
            save_path_coin = os.path.abspath(os.path.join(PRICE_DATA_PATH, f'{base_cur}-{dest_cur}'))
            df.to_csv(f'{save_path_coin}.tsv', header=True, index=False, sep='\t')

            # be gentle to the api
            time.sleep(2)
        except Exception as e:
            print(coin, 'did not work bc', e)
            
            
coinsymbols = ["ETH", "BTC"]
PRICE_DATA_PATH = '../data/'
GET_DATA_UNTIL_DATE = "2017-08-01"
%time load_and_save_coin_prices(coinsymbols, PRICE_DATA_PATH, GET_DATA_UNTIL_DATE)

ETH
2019-12-15 08:00:00
2020-03-07 16:00:00

2019-09-23 00:00:00
2019-12-15 08:00:00

2019-07-01 16:00:00
2019-09-23 00:00:00

2019-04-09 08:00:00
2019-07-01 16:00:00

2019-01-16 00:00:00
2019-04-09 08:00:00

2018-10-24 16:00:00
2019-01-16 00:00:00

2018-08-02 08:00:00
2018-10-24 16:00:00

2018-05-11 00:00:00
2018-08-02 08:00:00

2018-02-16 16:00:00
2018-05-11 00:00:00

2017-11-25 08:00:00
2018-02-16 16:00:00

2017-09-03 00:00:00
2017-11-25 08:00:00

BTC
2019-12-15 08:00:00
2020-03-07 16:00:00

2019-09-23 00:00:00
2019-12-15 08:00:00

2019-07-01 16:00:00
2019-09-23 00:00:00

2019-04-09 08:00:00
2019-07-01 16:00:00

2019-01-16 00:00:00
2019-04-09 08:00:00

2018-10-24 16:00:00
2019-01-16 00:00:00

2018-08-02 08:00:00
2018-10-24 16:00:00

2018-05-11 00:00:00
2018-08-02 08:00:00

2018-02-16 16:00:00
2018-05-11 00:00:00

2017-11-25 08:00:00
2018-02-16 16:00:00

2017-09-03 00:00:00
2017-11-25 08:00:00

Wall time: 21.2 s
