In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from common import *
import qgrid

In [None]:
# https://github.com/danpaquin/gdax-python
# https://docs.gdax.com

# Use the sandbox API (requires a different set of API access credentials)
gdax_client = gdax.AuthenticatedClient(cfg.GDAX_API_KEY, cfg.GDAX_API_SECRET_KEY, 
                                       cfg.GDAX_PASSPHRASE, api_url=cfg.GDAX_ENDPOINT)
gdax_client.get_time()

In [None]:
# Default request is 400 minutes (~7 hours), 1 minute gap
# ~4 requests to get 24 hours of data
# 1460 requests for 1 year, ~3000 requests for 2 years
def write_to_df(data, fpath):
    if os.path.exists(fpath):
        df = pd.read_csv(fpath)
        data = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
        df = pd.concat([df, data]).drop_duplicates().reset_index(drop=True)
    else:
        df = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
    df.to_csv(fpath, index=False)
    return df

def get_data(currency_pair, start_time, end_time, timestep_sec):
#     print("Start", start_utc.isoformat())
#     print("End", end_utc.isoformat())
    delta = end_time - start_time
#     print("Years", round(delta.days/365,2))
#     print("Days", delta.days)
#     print("Hours", delta.days*24)
#     print("Minutes", delta.days*24*60)
#     print("Seconds", round(delta.total_seconds()))
    data = gdax_client.get_product_historic_rates(
        currency_pair, start=start_time.isoformat(), 
        end=end_time.isoformat(), granularity=timestep_sec)
    return data

def get_all_data(pair, start_utc, end_utc, timesteps_per_request, timestep_sec, outfpath):
    start_time = datetime.datetime.strptime(
        start_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    end_time = datetime.datetime.strptime(
        end_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    cur_time = start_time
    time_delta = datetime.timedelta(
        seconds=timesteps_per_request*timestep_sec)
    n_records = 0
    retry = 0
    while cur_time < end_time and retry < 10:
        try:
            data = get_data(pair, cur_time, cur_time+time_delta, timestep_sec)
            data = np.array(data)
            last_time = datetime.datetime.fromtimestamp(np.max(data[:,0]))
            if last_time < cur_time:
                break
            print("Records", n_records, "Start:", cur_time, "End:", last_time)
            cur_time = datetime.datetime.fromtimestamp(
                data[0][0] + timestep_sec)
            df = write_to_df(data, outfpath)
            n_records += len(data)
            retry = 0
        except Exception as e:
            retry += 1
            print("Error! Retrying!", e)
            traceback.print_exc()
        finally:
            time.sleep(1)
    return df

In [None]:
START_UTC = '2015-02-01T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
PRICE_COLUMNS = ['time', 'low', 'high', 'open', 'close', 'volume']
TIMESTEP_INTERVAL = 1800
TIMESTEPS_PER_REQUEST = 100
PRODUCT = c.BTC_USD
EXCHANGE = c.GDAX
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
PRICE_FPATH

In [None]:
# Test
# Older time periods may not work (looks like they keep 2 years of data?)
START_UTC = '2017-01-01T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
start_time = datetime.datetime.strptime(
    START_UTC, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
time_delta = datetime.timedelta(
    seconds=TIMESTEPS_PER_REQUEST*TIMESTEP_INTERVAL)
end_time = start_time + time_delta
start_time, end_time
PRODUCT = c.LTC_USD
gdax_client.get_product_historic_rates(
    PRODUCT, start=start_time.isoformat(), end=end_time.isoformat(),
    granularity=TIMESTEP_INTERVAL)

In [None]:
# Fetch
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRODUCT = c.ETH_USD
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRODUCT = c.LTC_USD
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
start_utc = '2017-12-23T00:00:00Z'
end_utc = '2017-12-31T00:00:00Z'
df = get_all_data(PRODUCT, start_utc, end_utc, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
s = datetime.datetime.fromtimestamp(1514641260)
next_ = datetime.datetime.fromtimestamp(1514641320)
e = datetime.datetime.fromtimestamp(1514665260)
s,next_,e,len(prices),(e-s).total_seconds()/3600
#datetime.datetime.timestamp(s)

### Load Prices

In [None]:
# https://github.com/bfortuner/computer-vision/blob/master/applied/libraries/PandasQuickstart.ipynb
df = pd.read_csv(PRICE_FPATH)
epochsec = [datetime.datetime.fromtimestamp(t) for t in df['time'].values]
df = df.assign(utc = epochsec)
df.sort_values(by='utc', ascending=True, inplace=True)

In [None]:
# Check for NULL
df.isnull().sum()

In [None]:
last_time = df.iloc[-1]['time']
last_record_utc = datetime.datetime.fromtimestamp(last_time)
print(last_record_utc.isoformat())
df.tail()

In [None]:
len(df)

In [None]:
# Query Date Range
start = datetime.datetime(2017, 4, 15, 12, 0)
end = datetime.datetime(2017, 4, 15, 16, 10)
results = df[ (df['utc'] >= start) & (df['utc'] < end) ]

In [None]:
# Check for missing timesteps
last_time = df.iloc[0]['utc']
n_missing = 0
for idx,row in df[1:].iterrows():
    cur_time = row['utc']
#     print(cur_time, last_time + datetime.timedelta(seconds=60))
    if cur_time != last_time + datetime.timedelta(seconds=3600):
        n_missing += 1 #(cur_time - last_time).seconds//60
    last_time = cur_time
n_missing

### Streaming

In [None]:
class myWebsocketClient(gdax.WebsocketClient):
    def on_open(self):
        self.url = cfg.GDAX_WEBSOCKET
        self.products = [c.BTC_USD]
        self.message_count = 0
        print("Lets count the messages!")
    
    def on_message(self, msg):
        self.message_count += 1
        if 'price' in msg and 'type' in msg:
            print ("Message type:", msg["type"],
                   "\t@ {:.3f}".format(float(msg["price"])))
    
    def on_close(self):
        print("-- Goodbye! --")

        
wsClient = myWebsocketClient()
wsClient.start()
print(wsClient.url, wsClient.products)
while (wsClient.message_count < 500):
    print ("\nmessage_count =", "{} \n".format(wsClient.message_count))
    time.sleep(1)
wsClient.close()

In [None]:
wsClient.close()