## Config

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from common import *
import qgrid

## GDAX

In [None]:
# https://github.com/danpaquin/gdax-python
# https://docs.gdax.com

# Use the sandbox API (requires a different set of API access credentials)
gdax_client = gdax.AuthenticatedClient(cfg.GDAX_API_KEY, cfg.GDAX_API_SECRET_KEY, 
                                       cfg.GDAX_PASSPHRASE, api_url=cfg.GDAX_ENDPOINT)
gdax_client.get_time()

### Historical Prices

In [103]:
# Default request is 400 minutes (~7 hours), 1 minute gap
# ~4 requests to get 24 hours of data
# 1460 requests for 1 year, ~3000 requests for 2 years
def write_to_df(data, fpath):
    if os.path.exists(fpath):
        df = pd.read_csv(fpath)
        data = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
        df = pd.concat([df, data]).drop_duplicates().reset_index(drop=True)
    else:
        df = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
    df.to_csv(fpath, index=False)
    return df

def get_data(currency_pair, start_time, end_time, timestep_sec):
#     print("Start", start_utc.isoformat())
#     print("End", end_utc.isoformat())
    delta = end_time - start_time
#     print("Years", round(delta.days/365,2))
#     print("Days", delta.days)
#     print("Hours", delta.days*24)
#     print("Minutes", delta.days*24*60)
#     print("Seconds", round(delta.total_seconds()))
    data = gdax_client.get_product_historic_rates(
        currency_pair, start=start_time.isoformat(), 
        end=end_time.isoformat(), granularity=timestep_sec)
    return data

def get_all_data(pair, start_utc, end_utc, timesteps_per_request, timestep_sec, outfpath):
    start_time = datetime.datetime.strptime(
        start_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    end_time = datetime.datetime.strptime(
        end_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    cur_time = start_time
    time_delta = datetime.timedelta(
        seconds=timesteps_per_request*timestep_sec)
    n_records = 0
    retry = 0
    while cur_time < end_time and retry < 10:
        try:
            data = get_data(pair, cur_time, cur_time+time_delta, timestep_sec)
            data = np.array(data)
            last_time = datetime.datetime.fromtimestamp(np.max(data[:,0]))
            if last_time < cur_time:
                break
            print("Records", n_records, "Start:", cur_time, "End:", last_time)
            cur_time = datetime.datetime.fromtimestamp(
                data[0][0] + timestep_sec)
            df = write_to_df(data, outfpath)
            n_records += len(data)
            retry = 0
        except Exception as e:
            retry += 1
            print("Error! Retrying!", e)
            traceback.print_exc()
        finally:
            time.sleep(1)
    return df

In [105]:
START_UTC = '2015-02-01T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
PRICE_COLUMNS = ['time', 'low', 'high', 'open', 'close', 'volume']
TIMESTEP_INTERVAL = 1800
TIMESTEPS_PER_REQUEST = 100
PRODUCT = c.BTC_USD
EXCHANGE = c.GDAX
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
PRICE_FPATH

'/bigguy/data/punisher/gdax_BTC-USD_1800.csv'

In [None]:
# Test
# Older time periods may not work (looks like they keep 2 years of data?)
start_time = datetime.datetime.strptime(
    START_UTC, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
time_delta = datetime.timedelta(
    seconds=TIMESTEPS_PER_REQUEST*TIMESTEP_INTERVAL)
end_time = start_time + time_delta
start_time, end_time
gdax_client.get_product_historic_rates(PRODUCT, start=start_time.isoformat(), end=end_time.isoformat(), granularity=TIMESTEP_INTERVAL)

In [None]:
# Fetch
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

Records 0 Start: 2015-02-01 00:00:00 End: 2015-02-02 17:30:00
Records 100 Start: 2015-02-02 18:00:00 End: 2015-02-04 11:30:00
Records 200 Start: 2015-02-04 12:00:00 End: 2015-02-06 05:30:00
Records 300 Start: 2015-02-06 06:00:00 End: 2015-02-07 23:30:00
Records 400 Start: 2015-02-08 00:00:00 End: 2015-02-09 17:30:00
Records 500 Start: 2015-02-09 18:00:00 End: 2015-02-11 11:30:00
Records 600 Start: 2015-02-11 12:00:00 End: 2015-02-13 05:30:00
Records 700 Start: 2015-02-13 06:00:00 End: 2015-02-14 23:30:00
Records 800 Start: 2015-02-15 00:00:00 End: 2015-02-16 17:30:00
Records 900 Start: 2015-02-16 18:00:00 End: 2015-02-18 11:30:00
Records 1000 Start: 2015-02-18 12:00:00 End: 2015-02-20 05:30:00
Records 1100 Start: 2015-02-20 06:00:00 End: 2015-02-21 23:30:00
Records 1200 Start: 2015-02-22 00:00:00 End: 2015-02-23 17:30:00
Records 1300 Start: 2015-02-23 18:00:00 End: 2015-02-25 11:30:00
Records 1400 Start: 2015-02-25 12:00:00 End: 2015-02-27 05:30:00
Records 1500 Start: 2015-02-27 06:00:

In [90]:
start_utc = '2017-12-23T00:00:00Z'
end_utc = '2017-12-31T00:00:00Z'
df = get_all_data(PRODUCT, start_utc, end_utc, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

2017-12-23 00:00:00 2017-12-31 00:00:00
Start 2017-12-23 00:00:00
End 2017-12-31 00:00:00
2 days, 2:00:00
Records 0 Start: 2017-12-23 00:00:00 End: 2017-12-24 17:30:00
Records 100 Start: 2017-12-24 18:00:00 End: 2017-12-26 11:30:00
Records 200 Start: 2017-12-26 12:00:00 End: 2017-12-28 05:30:00
Records 300 Start: 2017-12-28 06:00:00 End: 2017-12-29 23:30:00
Records 400 Start: 2017-12-30 00:00:00 End: 2017-12-31 10:30:00


In [None]:
s = datetime.datetime.fromtimestamp(1514641260)
next_ = datetime.datetime.fromtimestamp(1514641320)
e = datetime.datetime.fromtimestamp(1514665260)
s,next_,e,len(prices),(e-s).total_seconds()/3600
#datetime.datetime.timestamp(s)

### Load Prices

In [95]:
# https://github.com/bfortuner/computer-vision/blob/master/applied/libraries/PandasQuickstart.ipynb
df = pd.read_csv(PRICE_FPATH)
epochsec = [datetime.datetime.fromtimestamp(t) for t in df['time'].values]
df = df.assign(time_utc = epochsec)
df.sort_values(by='time_utc', ascending=True, inplace=True)

In [100]:
# Check for NULL
df.isnull().sum()

time        0
low         0
high        0
open        0
close       0
volume      0
time_utc    0
dtype: int64

In [101]:
last_time = df.iloc[-1]['time']
last_record_utc = datetime.datetime.fromtimestamp(last_time)
print(last_record_utc.isoformat())
df.tail()

2017-12-31T10:30:00


Unnamed: 0,time,low,high,open,close,volume,time_utc
51555,1514740000.0,13741.41,14009.96,14009.96,13970.01,186.758709,2017-12-31 09:00:00
51556,1514741000.0,13875.96,14062.0,13970.02,13980.0,150.162813,2017-12-31 09:30:00
51559,1514743000.0,13930.0,14049.0,13979.99,13931.02,126.425282,2017-12-31 10:00:00
51557,1514743000.0,13933.49,14049.0,13979.99,13996.64,110.429898,2017-12-31 10:00:00
51558,1514745000.0,13882.0,13950.0,13931.02,13882.59,13.848905,2017-12-31 10:30:00


In [102]:
len(df)

51715

In [None]:
# Query Date Range
start = datetime.datetime(2017, 4, 15, 12, 0)
end = datetime.datetime(2017, 4, 15, 16, 10)
results = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]

In [None]:
# Check for missing timesteps
last_time = df.iloc[0]['time_utc']
n_missing = 0
for idx,row in df[1:].iterrows():
    cur_time = row['time_utc']
#     print(cur_time, last_time + datetime.timedelta(seconds=60))
    if cur_time != last_time + datetime.timedelta(seconds=3600):
        n_missing += 1 #(cur_time - last_time).seconds//60
    last_time = cur_time
n_missing

### Plot

In [None]:
# https://github.com/quantopian/qgrid
# https://hub.mybinder.org/user/quantopian-qgrid-notebooks-bu5joi0d/notebooks/index.ipynb
# https://ipywidgets.readthedocs.io/en/stable/examples/Widget%20Basics.html

qgrid_widget = qgrid.QgridWidget(df=df, show_toolbar=True)
qgrid_widget
#qgrid_widget.get_changed_df()

In [None]:
def plot_prices(time, close):
    fig, ax = plt.subplots()
    ax.plot(time, close)

    years = mdates.YearLocator()   # every year
    months = mdates.MonthLocator()  # every month
    yearsFmt = mdates.DateFormatter('%Y')
    monthsFmt = mdates.DateFormatter('%m')
    ax.xaxis.set_major_locator(years)
    ax.xaxis.set_major_formatter(yearsFmt)
    ax.xaxis.set_minor_locator(months)
    ax.xaxis.set_minor_formatter(monthsFmt)

    # datemin = datetime.date(r.date.min().year, 1, 1)
    # datemax = datetime.date(r.date.max().year + 1, 1, 1)
    # ax.set_xlim(datemin, datemax)

    # # format the coords message box
    # def price(x):
    #     return '$%1.2f' % x
    # ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
    # ax.format_ydata = price
    ax.grid(True)

    # rotates and right aligns the x labels, and moves the bottom of the
    # axes up to make room for them
    fig.autofmt_xdate(rotation=30)
    fig.set_size_inches(12,6)
    plt.show()

def plot_range(df, start, end):
    df = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]
    vals = df[['time_utc','close']].values
    plot_prices(vals[:,0], vals[:,1])

vals = df[['time_utc','close']].values
plot_prices(vals[:,0], vals[:,1])
# start = datetime.datetime(2017, 4, 15, 12, 0)
# end = datetime.datetime(2017, 4, 15, 16, 10)
# results = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]
# results = results.drop(684239)
# plot_range(results, start, end)

### Exchange Metadata

In [None]:
gdax_client.get_products()
gdax_client.get_currencies()
gdax_client.get_time()

### Current Prices

In [None]:
# Get the order book at the default level.
gdax_client.get_product_order_book('BTC-USD')
# Get the order book at a specific level.
gdax_client.get_product_order_book('BTC-USD', level=1)

In [None]:
# Get the product ticker for a specific product.
gdax_client.get_product_ticker(product_id='ETH-USD')

In [None]:
# Get the product trades for a specific product.
gdax_client.get_product_trades(product_id='ETH-USD')

In [None]:
gdax_client.get_product_24hr_stats('ETH-USD')

### Streaming

In [None]:
class myWebsocketClient(gdax.WebsocketClient):
    def on_open(self):
        self.url = cfg.GDAX_WEBSOCKET
        self.products = [c.BTC_USD]
        self.message_count = 0
        print("Lets count the messages!")
    
    def on_message(self, msg):
        self.message_count += 1
        if 'price' in msg and 'type' in msg:
            print ("Message type:", msg["type"],
                   "\t@ {:.3f}".format(float(msg["price"])))
    
    def on_close(self):
        print("-- Goodbye! --")

        
wsClient = myWebsocketClient()
wsClient.start()
print(wsClient.url, wsClient.products)
while (wsClient.message_count < 500):
    print ("\nmessage_count =", "{} \n".format(wsClient.message_count))
    time.sleep(1)
wsClient.close()

In [None]:
wsClient.close()