In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime

In [2]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

In [3]:
def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df

In [4]:
# Pull bitstamp BTC price exchange data
btc_usd_price_bitstamp = get_quandl_data('BCHARTS/BITSTAMPUSD')


Loaded BCHARTS/BITSTAMPUSD from cache


In [5]:
btc_usd_price_bitstamp.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-09-13,5.8,6.0,5.65,5.97,58.371382,346.097389,5.929231
2011-09-14,5.58,5.72,5.52,5.53,61.145984,341.854813,5.590798
2011-09-15,5.12,5.24,5.0,5.13,80.140795,408.259002,5.094272
2011-09-16,4.82,4.87,4.8,4.85,39.914007,193.763147,4.854515
2011-09-17,4.87,4.87,4.87,4.87,0.3,1.461,4.87


In [6]:
# Chart the BTC pricing data
btc_trace = go.Scatter(x=btc_usd_price_bitstamp.index, y=btc_usd_price_bitstamp['Weighted Price'])
py.iplot([btc_trace])

In [7]:
# Pull pricing data for 1 more BTC exchanges
exchanges = ['COINBASE']

exchange_data = {}

exchange_data['BITSTAMP'] = btc_usd_price_bitstamp

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df

Loaded BCHARTS/COINBASEUSD from cache


In [8]:
# Merge All Of The Pricing Data Into A Single Dataframe
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

In [9]:
btc_usd_datasets.tail()

Unnamed: 0_level_0,BITSTAMP,COINBASE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-06-15,6545.672008,6535.241459
2018-06-16,6460.211171,6455.774489
2018-06-17,6509.017847,6498.813185
2018-06-18,6551.333545,6586.614178
2018-06-19,6731.632282,6728.605931


In [10]:
# Visualize The Pricing Datasets
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [11]:
# Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

In [12]:
# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)
# Plot the revised dataframe
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

In [13]:
# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)
# Plot the average BTC price
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])

In [14]:
def get_json_data(json_url, cache_path):
    '''Download and cache JSON data, return as a dataframe.'''
    try:        
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(json_url, cache_path))
    return df


In [15]:
base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2015-01-01', '%Y-%m-%d') # get data from the start of 2015
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    '''Retrieve cryptocurrency data from poloniex'''
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    return data_df

In [16]:
altcoins = ['ETH', 'ETC', 'XMR', 'DCR', 'BURST', 'SC', 'BCN', 'ZEC']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df

Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DCR&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_BURST&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420041600.0&end=1529580337.660345&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_BCN&start=1420041600.0&end=1529580337.660345&period=8

In [17]:
altcoin_data['ETH'].tail()

Unnamed: 0_level_0,close,high,low,open,quoteVolume,volume,weightedAverage
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-06-16,0.07655,0.077454,0.075934,0.076288,5206.688936,399.521964,0.076732
2018-06-17,0.077045,0.07756,0.07649,0.07655,6411.51423,493.196666,0.076924
2018-06-18,0.077141,0.077606,0.07627,0.07705,7174.540153,551.517968,0.076872
2018-06-19,0.079783,0.080102,0.076925,0.077194,9000.489109,710.32048,0.07892
2018-06-20,0.079235,0.079833,0.07842,0.079783,6037.890485,478.485335,0.079247


In [18]:
# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']

In [19]:
# Merge USD price of each altcoin into single dataframe 
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

In [20]:
# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

In [21]:
# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [22]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2018
combined_df_2016 = combined_df[combined_df.index.year == 2018]
combined_df_2016.pct_change().corr(method='pearson')

Unnamed: 0,ETH,ETC,XMR,DCR,BURST,SC,BCN,ZEC,BTC
ETH,1.0,0.786395,0.809302,0.669311,0.74926,0.663596,0.509829,0.758099,0.821023
ETC,0.786395,1.0,0.691035,0.530652,0.648089,0.577277,0.385968,0.679409,0.721851
XMR,0.809302,0.691035,1.0,0.66719,0.752363,0.662433,0.454892,0.775663,0.865273
DCR,0.669311,0.530652,0.66719,1.0,0.612424,0.548875,0.427635,0.554905,0.74062
BURST,0.74926,0.648089,0.752363,0.612424,1.0,0.793967,0.581313,0.68178,0.816616
SC,0.663596,0.577277,0.662433,0.548875,0.793967,1.0,0.61638,0.637236,0.738833
BCN,0.509829,0.385968,0.454892,0.427635,0.581313,0.61638,1.0,0.48932,0.578288
ZEC,0.758099,0.679409,0.775663,0.554905,0.68178,0.637236,0.48932,1.0,0.73907
BTC,0.821023,0.721851,0.865273,0.74062,0.816616,0.738833,0.578288,0.73907,1.0


In [23]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [25]:
correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2018")


Method .as_matrix will be removed in a future version. Use .values instead.

