In [261]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime,date

In [262]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

In [263]:
def get_quandl_data(quandl_id):
    '''Download and cache Quandle dataseries'''
    cache_path='{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f=open(cache_path,'rb')
        df=pickle.load(f)
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError,IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df=quandl.get(quandl_id,returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id,cache_path))
    return df    
    

In [264]:
# Pull Kraken BTC price exchange data
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')
btc_usd_price_kraken.head()

Loaded BCHARTS/KRAKENUSD from cache


Unnamed: 0_level_0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-07,874.6704,892.06753,810.0,810.0,15.622378,13151.472844,841.835522
2014-01-08,810.0,899.84281,788.0,824.98287,19.182756,16097.329584,839.156269
2014-01-09,825.56345,870.0,807.42084,841.86934,8.158335,6784.249982,831.572913
2014-01-10,839.99,857.34056,817.0,857.33056,8.02451,6780.220188,844.938794
2014-01-11,858.2,918.05471,857.16554,899.84105,18.748285,16698.566929,890.671709


In [265]:
btc_usd_price_kraken.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-16,4170.8,4399.0,3950.0,4388.0,7600.447025,31826110.0,4187.399662
2017-08-17,4381.01,4479.0,4200.0,4291.0,7641.769694,33215010.0,4346.508031
2017-08-18,4291.0,4354.996,3980.0,4141.0,9739.865739,40865530.0,4195.697579
2017-08-19,4130.0,4211.683,4002.636,4179.733,5163.253241,21279690.0,4121.371679
2017-08-20,4184.937,4198.0,4066.0,4150.005,4147.063496,17062340.0,4114.31898


In [266]:
# Chart the BTC pricing data
btc_trace = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
py.iplot([btc_trace])

In [267]:
# Pull pricing data for 3 more BTC exhcnages
exchanges = ['COINBASE','BITSTAMP','BTCC','MTGOX','BITFINEX']

exchange_data = {}

exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df

Loaded BCHARTS/COINBASEUSD from cache
Loaded BCHARTS/BITSTAMPUSD from cache
Loaded BCHARTS/BTCCUSD from cache
Loaded BCHARTS/MTGOXUSD from cache
Loaded BCHARTS/BITFINEXUSD from cache


In [268]:
def merge_dfs_on_column(dataframes,labels,col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)
    

In [269]:
# Merge the BTC price dataseries into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

In [270]:
btc_usd_datasets.tail()

Unnamed: 0_level_0,BITFINEX,BITSTAMP,BTCC,COINBASE,KRAKEN,MTGOX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-16,,4193.426713,4179.243416,4193.469553,4187.399662,
2017-08-17,,4338.694675,4332.251716,4334.11521,4346.508031,
2017-08-18,,4180.171091,4239.286413,4167.053043,4195.697579,
2017-08-19,,4030.604133,4062.586365,4096.284462,4121.371679,
2017-08-20,,4053.512769,4100.537141,4107.63399,4114.31898,


In [271]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type='log'
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [272]:
# Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

In [273]:
# Remove "0" values
btc_usd_datasets.replace(0,np.nan,inplace=True)

In [274]:
# Plot revised dataframes
df_scatter(btc_usd_datasets,'Bitcoin Price (USD) By Exchange')

In [275]:
btc_usd_datasets['MTGOX'].loc['2013-12-18':btc_usd_datasets['MTGOX'].index[-1]]=np.nan

In [276]:
# Plot with MTGOX trimmed
df_scatter(btc_usd_datasets,'Bitcoin Price (USD) By Exchange')

In [277]:
btc_usd_datasets['avg_btc_price_usd']=btc_usd_datasets.mean(axis=1)

In [278]:
btc_trace = go.Scatter(x=btc_usd_datasets.index,y=btc_usd_datasets['avg_btc_price_usd'])
data=[btc_trace]

layout = go.Layout(
    xaxis=dict(
        type='scale',
        autorange=True
    ),
    yaxis=dict(
        type='log',
        autorange=True
    )
)

In [279]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [280]:
# Just trying to figure out how to use dataframes and add nans over a certain date range

#btc_usd_datasets['MTGOX'].iloc[range(0,3)]=np.nan
#btc_usd_datasets['MTGOX'].head()

#dec 18 2013

#btc_usd_datasets['MTGOX'].index[0]
#print(date(2013,12,18))

#btc_usd_datasets['MTGOX'].loc['2013-12-18':btc_usd_datasets['MTGOX'].index[-1]]=np.nan