In [1]:
import nbimporter
import crypto_package as CP
import os, pickle, quandl

import numpy as np
import pandas as pd
from datetime import datetime
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import time
import matplotlib.pyplot as plt
%matplotlib notebook

py.init_notebook_mode(connected = True)    

#----------------------------------------------------------------------
#------------- MAIN ---------------------------------------------------
# exchange info
exchanges = ['KRAKEN','COINBASE', 'BITSTAMP','ITBIT','OKCOIN', 'GETBTC']#,'COINSBANK','HITBTC','LYBIT','ANXHK','BITME','BITBOX','INTRSNG','BTCE','WEEX','JUST','CBX']
exch_data = {}

# retrieve exchange data and read into dictionary
for exchange in exchanges:
    exchange_df = CP.get_quandl_data('BCHARTS/{}USD'.format(exchange))
    #time.sleep(1000)
    exch_data[exchange] = exchange_df
    
# Merge BTC price data series' into single dataframe
btc_usd_datasets = CP.merge_dfs_on_column(list(exch_data.values()), list(exch_data.keys()), 'Weighted Price')
btc_usd_datasets.replace(0, np.nan, inplace = True) # remove 0 values

# Plot all of the BTC EXCHANGE prices
CP.df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) by Exchange')

# Calc avg in new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis = 1)

#plot the average price 
btc_trace = go.Scatter(x=btc_usd_datasets.index, y = btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])
##########################################################################################
#                            ALT COINS
##########################################################################################

# retreive alternate coin data
altcoins = ['ETH','LTC','XRP','ETC','STR','DASH','SC','XMR','XEM', 'GNT']
alt_data = {}

#change dir to poloniex_cache & create dir if it doesn't exist 
if not 'poloniex_cache' in os.listdir(os.getcwd()):
    os.mkdir('poloniex_cache')
    
path = os.path.join(os.getcwd(), 'poloniex_cache')
os.chdir(path)

for alt in altcoins:
    coinpair = 'BTC_{}'.format(alt)
    crypto_price_df = CP.get_crypto_data(coinpair)
    alt_data[alt] = crypto_price_df

os.chdir('../')

# calculate USD Price as new col in each dataframe
for alt in alt_data.keys():
    alt_data[alt]['price_usd'] = alt_data[alt]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']
#alt_data
# merge price into single dataframe
combined_df = CP.merge_dfs_on_column(list(alt_data.values()), list(alt_data.keys()), 'price_usd')
# add BTC to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']
# chart alt coins (LOG)
CP.df_scatter(combined_df, 'Alternate Currency Prices (USD)', seperate_y_axis = False, y_axis_label = 'Coin Value (USD)', scale = 'linear')






Importing Jupyter notebook from crypto_package.ipynb


ValueError: unsupported pickle protocol: 4

In [2]:
########################
# nested dictionary that contains the key value pairs prices and their corresponding number of occurrences
price_count = {
    'ETH':{},
    'LTC':{},
    'XRP':{},
    'ETC':{},
    'STR':{},
    'DASH':{},
    'XMR':{},
    'XEM':{},
    'GNT':{}
}

    
############################################################
#                       HISTOGRAMS
############################################################
def plot_histogram(valueList, plotName, bins):
    #normalize the data
    xVals, pxVals, dx, avg_val, var = ND.histogram_from_real_data (valueList, bins)
    
    plot = [go.Bar(
        x = xVals,
        y = pxVals,
        name = str(plotName)
    )]
    layout = go.Layout(
    title='Sampled Results',
    xaxis=dict(
        title='Value'
    ),
    yaxis=dict(
        title='Count'
    ),
    bargap=0.025,
    bargroupgap=0.025
    )
    fig = go.Figure(data=plot, layout=layout)
    py.iplot(fig, filename = 'hist-plot')
    
# organize into daily returns
for alt in alt_data.keys():
    for i in range(len(alt_data[alt]['price_usd']) - 1):
        # calculate returns for day
        dayReturn = (alt_data[alt]['price_usd'][i + 1] - alt_data[alt]['price_usd'][i]) / alt_data[alt]['price_usd'][i]

        if dayReturn not in price_count[alt].keys():
            price_count[alt][dayReturn] = 1 # make new count
        else:
            price_count[alt][dayReturn] += 1 # increment count
    
        
    
for alt in price_count.keys():
    print("*" * 20, end= " ")
    print(alt, end= " ")
    print("*" * 20)
    plot_histogram(price_count[alt].keys(), price_count[alt].values(), 100)


SyntaxError: invalid syntax (<ipython-input-2-629ba60de96f>, line 100)

In [5]:
import math
from scipy import stats

#(HELPER FOR crypto_correlation function)date match function
# purpose: returns 2 numpy arrays of daily returns for corresponding days of 2 different currencies
# match_dates(left_df: dataframe, right_df: dataframe) -> np.Array, np.Array
def match_dates(left_df, symbol1, right_df, symbol2):
    
    # merge dataframes on the date (e.g. the index of the df)
    merged_df = pd.merge(left_df, right_df, how = 'inner', left_index=True, right_index=True, validate = "one_to_one")
    #######################################################################
    # so it's merging on date, but I need to differentiate between prices of each currency
    #######################################################################
    
    lst1 = []
    lst2 = []
    
    #actually range of merged 
    range_df = range(len(merged_df) - 1)

    # calculate daily returns
    for i in range_df:
        lst1.append((merged_df['price_usd_x'][i + 1] - merged_df['price_usd_x'][i]) / merged_df['price_usd_x'][i])
        #print("i:", i, "\trange\[-1\]",range_df[-1])
        #if i < range_df[-1]:
        lst2.append((merged_df['price_usd_y'][i + 1] - merged_df['price_usd_y'][i]) / merged_df['price_usd_y'][i])
        
    # return 2 numpy arrays containing daily returns for matching dates
    npA1 = np.array(lst1)
    npA2 = np.array(lst2)

    return npA1, npA2

#purpose: This is a new function for correlating two crypto coins via a plot
# crypto_correlation(pandas.DataFrame, string, pandas.DataFrame, string) 
# --> *returns: pearson coeff *prints: Plot
def crypto_correlation(df_left, name1, df_right, name2, plotBool=False):
    #make sure dates match && read df into arrays
    left_returns, right_returns = match_dates(df_left, name1, df_right, name2)
    print(str(len(left_returns)),str(len(right_returns)))
    
    # calculate values for linear regression
    slope, intercept, r_val, p_val, std_err = stats.linregress(left_returns, right_returns)
    
    best_fit_x = np.arange(min(left_returns), max(left_returns), (max(left_returns)- min(left_returns)) / 10000.0)#not sure this is right for my model?
    print("slope:", slope, "\tintercept:", intercept)
    best_fit_y = slope * best_fit_x + intercept
    plot = [go.Scatter(
        x = left_returns,
        y = right_returns,
        name = str(name1).title() + "vs. " + str(name2).title() + "Correlation",
        mode = 'markers',
        marker = dict(
            size = 10,
            color = 'rgba(255, 0, 0, .9)',
            line = dict(width = 2,)
        )
    ), go.Scatter(x = best_fit_x,
                  y = best_fit_y,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Fit'
              )]
    layout = dict( 
        title = str(name1)+ ' vs. '+str(name2)+' correlation',
        #yaxis = dict(zeroline = False),
        #xaxis = dict(zeroline = False)
    )
          
    if plotBool:
       fig = dict(data=plot,layout=layout)
       py.iplot(fig, filename='plot')
    
    return pearson_coefficient(left_returns, right_returns)
    
    
# a function which takes two numpy arrays of equal size and calculates
# the pearson correlation coefficient for the two sets of data
# @params 2 numpy arrays
# @returns a float corresponding to the pearson r value
def pearson_coefficient(npX, npY):
    if len(npX) != len(npY):
        return -23
    
    n = len(npX)
    x2 = 0
    y2 = 0
    x = 0
    y = 0
    xy = 0
    
    #sum coefficient parameters
    for i in range(n):
        x2 += npX[i] ** 2
        y2 += npY[i] ** 2
        x += npX[i]
        y += npY[i]
        xy += npX[i] * npY[i]

    coef = float((xy - ((x * y)/n)) /math.sqrt((x2 - ((x ** 2) / n)) * (y2 - ((y ** 2) / n))))
    
    return coef
 
    
################################################
#           cryptocurrency correlations
################################################
#alt coin options['ETH','LTC','XRP','ETC','STR','DASH','XMR','XEM', 'GNT']

completed = {}

for alt1 in altcoins:
    for alt2 in altcoins:
        str1 = alt1 + "," + alt2
        str2 = alt2 + "," + alt1
        #print(alt1,",",alt2)
        
        # dont calculate if its already been done
        if str1 in completed.keys() or str2 in completed.keys() or alt1 == alt2:
            pass
        else:
            # Otherwise, calculate pearson value and print plot
            completed[str1] = crypto_correlation(alt_data[alt1], alt1, alt_data[alt2], alt2, True)

print("Pearson Coefficient Values:")
print("*" * 50)
for item in completed.keys():
    print(item + ":\t" + str(completed[item]))
    
################################


NameError: name 'altcoins' is not defined

The points in these plots represent the pearson values for the daily returns between two cryptocurrencies. Linear regression has been performed to check the fit of the distribution as well as the accuracy of the fit. 