In [None]:
from audiolizer import get_history

In [None]:
help(get_history)

In [None]:
from audiolizer import granularity, audiolizer_temp_dir, get_gaps, load_date
import pandas as pd
import os

def get_history(ticker, start_date, end_date = None, granularity=granularity, fetch=True):
    """Fetch/load historical data from Coinbase API at specified granularity

    params:
        start_date: (str) (see pandas.to_datetime for acceptable formats)
        end_date: (str)
        granularity: (int) seconds (default: 300)

    price data is saved by ticker and date and stored in audiolizer_temp_dir
    """
    start_date = pd.to_datetime(start_date).tz_localize(None)
    
    today = pd.Timestamp.now().tz_localize(None)
    if end_date is None:
        end_date = today + pd.Timedelta('1D')
    else:
        end_date = min(today, pd.to_datetime(end_date).tz_localize(None))
        
    fnames = []
    foundlings = []
    dates = []
    batch = []
    batch_number = 0
    last_found = -1
    for int_ in pd.interval_range(start_date, end_date):
        dates.append(int_.left)
        fname = audiolizer_temp_dir + '/{}-{}.csv.gz'.format(
            ticker, int_.left.strftime('%Y-%m-%d'))
        found = int(os.path.exists(fname))
        foundlings.append(found)
        if found != last_found:
            batch_number += 1
        last_found = found
        batch.append(batch_number)
        if not found:
            if fetch:
                print('missing {}'.format(int_))
                int_df = load_date(ticker, granularity, int_)
                int_df.to_csv(fname, compression='gzip')
        fnames.append(fname)
        
    return dates, fnames, foundlings, batch

    df = pd.concat(map(lambda file: pd.read_csv(file, index_col='time', parse_dates=True),
                         fnames)).drop_duplicates()
    gaps = get_gaps(df, granularity)

    if len(gaps) > 0:
        print('found data gaps')
        # fetch the data for each date
        for start_date in gaps.groupby(pd.Grouper(freq='1d')).first().index:
            print('\tfetching {}'.format(start_date))
            int_ = pd.interval_range(start=start_date, periods=1, freq='1d')
            int_ = pd.Interval(int_.left[0], int_.right[0])
            int_df = load_date(ticker, granularity, int_)
            fname = audiolizer_temp_dir + '/{}-{}.csv.gz'.format(
                ticker, int_.left.strftime('%Y-%m-%d'))
            int_df.to_csv(fname, compression='gzip')

    df = pd.concat(map(lambda file: pd.read_csv(file,index_col='time', parse_dates=True, compression='gzip'),
                         fnames)).drop_duplicates()

    return df

In [None]:
dates, files, found, batch = get_history('BTC-USD', '2020-04-28', '2020-05-10', fetch=False)

In [None]:
df = pd.DataFrame(dict(files=files, found=found, batch=batch), index=dates)
df

In [None]:
df[df.found==0]

In [None]:
ticker='BTC-USD'

In [None]:
# Iterate over batches of missing dates

for batch, g in df[df.found==0].groupby('batch', sort=False):
    endpoints = [t.strftime('%Y-%m-%d-%H-%M') for t in g.iloc[[0,-1]].index]
    print(len(g), endpoints)
    for t in g.index:
        tstr = t.strftime('%Y-%m-%d-%H-%M')
        fname = '{}-{}.csv.gz'.format(ticker, tstr)
#     data = fetch_data(ticker, granularity, *endpoints)

In [None]:
fname

In [None]:
endpoints[0].strftime('%Y-%m-%d-%H-%M')

In [None]:
from audiolizer import HistoricalData

def fetch_data(ticker, granularity, start_, end_):
    """Need dates in this format %Y-%m-%d-%H-%M"""
    try:
        return HistoricalData(ticker,
                              granularity,
                              start_,
                              end_,
                              ).retrieve_data()
    except:
        print('could not load using {} {}'.format(start_, end_))
        raise


In [None]:
data = fetch_data('BTC-USD', 300, *endpoints)

In [None]:
data

In [None]:
start_ = int_.left.strftime('%Y-%m-%d-%H-%M')
end_ = int_.right.strftime('%Y-%m-%d-%H-%M')

Objective: get_history should fetch all the data at once then save it to separate files.

In [None]:
df