In [1]:
# N-CryptoAsset Portfolios: Identifying Highly Correlated 
# Cryptocurrencies using PCA
#
# (c) 2017 QuantAtRisk.com, by Pawel Lachowicz
 
 
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
from datetime import datetime
import json
from bs4 import BeautifulSoup
import requests
 
# define some custom colours
grey = .6, .6, .6
 
 
def timestamp2date(timestamp):
    # function converts a Unix timestamp into Gregorian date
    return datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d')
 
def date2timestamp(date):
    # function coverts Gregorian date in a given format to timestamp
    return datetime.strptime(date_today, '%Y-%m-%d').timestamp()
 
def fetchCryptoClose(fsym, tsym):
    # function fetches the close-price time-series from cryptocompare.com
    # it may ignore USDT coin (due to near-zero pricing)
    # daily sampled
    cols = ['date', 'timestamp', fsym]
    lst = ['time', 'open', 'high', 'low', 'close']
    timestamp_today = datetime.today().timestamp()
    curr_timestamp = timestamp_today
 
    for j in range(2):
        df = pd.DataFrame(columns=cols)
        url = "https://min-api.cryptocompare.com/data/histoday?fsym=" + fsym + "&tsym=" + tsym + "&toTs=" + str(int(curr_timestamp)) + "&limit=2000"
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        dic = json.loads(soup.prettify())
        for i in range(1, 2001):
            tmp = []
            for e in enumerate(lst):
                x = e[0]
                y = dic['Data'][i][e[1]]
                if(x == 0):
                    tmp.append(str(timestamp2date(y)))
                tmp.append(y)
            if(np.sum(tmp[-4::]) > 0):  # remove for USDT
                tmp = np.array(tmp)
                tmp = tmp[[0,1,4]]  # filter solely for close prices
                df.loc[len(df)] = np.array(tmp)
        # ensure a correct date format
        df.index = pd.to_datetime(df.date, format="%Y-%m-%d")
        df.drop('date', axis=1, inplace=True)
        curr_timestamp = int(df.ix[0][0])
        if(j == 0):
            df0 = df.copy()
        else:
            data = pd.concat([df, df0], axis=0)
    data.drop("timestamp", axis=1, inplace=True)
 
    return data  # DataFrame

In [2]:
url = "https://api.coinmarketcap.com/v1/ticker/"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
dic = json.loads(soup.prettify())
 
# create an empty DataFrame
df = pd.DataFrame(columns=["Ticker", "MarketCap"])
 
for i in range(len(dic)):
    df.loc[len(df)] = [dic[i]['symbol'], dic[i]['market_cap_usd']]
 
df.sort_values(by=['MarketCap'])
# apply conversion to numeric as 'df' contains lots of 'None' string as values
df.MarketCap = pd.to_numeric(df.MarketCap)

In [4]:
P = df[(df.MarketCap > 200e6) & (df.Ticker !='BTC')]

In [8]:
print(P)

   Ticker     MarketCap
1     ETH  3.070304e+10
2     BCH  9.928900e+09
3     XRP  8.419809e+09
4     LTC  3.930879e+09
5     XEM  2.683485e+09
6    DASH  2.497948e+09
7   MIOTA  1.878762e+09
8     XMR  1.756990e+09
9     ETC  1.646807e+09
10    OMG  1.090988e+09
11    NEO  1.067830e+09
12    BCC  8.934812e+08
13    LSK  7.517003e+08
14   QTUM  6.258189e+08
15  STRAT  6.115757e+08
16  WAVES  5.113080e+08
17    ZEC  5.025770e+08
18   USDT  3.913846e+08
19    BCN  3.696499e+08
20  STEEM  3.529745e+08
21    BTS  3.528633e+08
22    KMD  3.385107e+08
23    XLM  3.274852e+08
24    GNT  3.053445e+08
25   MAID  2.974451e+08
26    PAY  2.968080e+08
27    EOS  2.875721e+08
28    HSR  2.862485e+08
29    ARK  2.621708e+08
30    REP  2.537909e+08
31   MCAP  2.372688e+08
32    FCT  2.291820e+08
33    MTL  2.232218e+08
34    ICN  2.215260e+08
35     SC  2.211678e+08
36    BAT  2.180770e+08
37   VERI  2.177536e+08
38   PIVX  2.165008e+08
39   DOGE  2.130067e+08


In [12]:
portfolio = list(P.Ticker)
print(portfolio)

['ETH', 'BCH', 'XRP', 'LTC', 'XEM', 'DASH', 'MIOTA', 'XMR', 'ETC', 'OMG', 'NEO', 'BCC', 'LSK', 'QTUM', 'STRAT', 'WAVES', 'ZEC', 'USDT', 'BCN', 'STEEM', 'BTS', 'KMD', 'XLM', 'GNT', 'MAID', 'PAY', 'EOS', 'HSR', 'ARK', 'REP', 'MCAP', 'FCT', 'MTL', 'ICN', 'SC', 'BAT', 'VERI', 'PIVX', 'DOGE']


In [13]:
data.tail

NameError: name 'data' is not defined

In [41]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)
import plotly.plotly as py
from plotly.graph_objs import *
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    # Onlne Plot
    py.plot(fig, filename = 'Cryptocurrency PAC',  fileopt ='overwrite')
    # Offline Plot
    #py.iplot(fig)

In [31]:
data.replace(0, np.nan, inplace=True)

In [39]:
data.info

<bound method DataFrame.info of                  ETH    DASH       XMR       XRP    LTC    ETC     XEM    REP  \
date                                                                            
2015-08-07    0.6747    3.35    0.6400  0.008010   3.91    NaN     NaN    NaN   
2015-08-07    0.6747    3.35    0.6400  0.008010   3.91    NaN     NaN    NaN   
2015-08-08    0.1500    3.19    0.4727  0.008013   3.71    NaN     NaN    NaN   
2015-08-09    1.2000    3.13    0.6000  0.008600   3.74    NaN     NaN    NaN   
2015-08-10    1.2000    3.10    0.4969  0.008700   3.83    NaN     NaN    NaN   
2015-08-11    0.6504    3.12    0.4969  0.008500   3.87    NaN     NaN    NaN   
2015-08-12    0.9050    3.14    0.4969  0.008010   3.92    NaN     NaN    NaN   
2015-08-13    1.2600    3.15    0.4969  0.008500   3.79    NaN     NaN    NaN   
2015-08-14    1.7900    3.15    0.4969  0.008010   3.82    NaN     NaN    NaN   
2015-08-15    1.5000    3.15    0.4969  0.008010   3.87    NaN     NaN    NaN

In [11]:
dfP = df[(df.index >= "2017-01-01") ]
dfP = dfP.dropna(axis=1, how='any')

TypeError: '>=' not supported between instances of 'numpy.ndarray' and 'numpy.ndarray'

In [47]:
df_scatter(dfP, 'cryptocurrency prices')

In [55]:
# N-Cryptocurrency Portfolio (tickers)
fsym = portfolio

# vs. 
tsym = 'USD'

In [56]:
for e in enumerate(fsym):
    print(e[0], e[1])
    if(e[0] == 0):
        try:
            data = fetchCryptoClose(e[1], tsym)
        except:
            pass
    else:
        try:
            data = data.join(fetchCryptoClose(e[1], tsym))
        except:
            pass
 
data = data.astype(float)  # ensure values to be floats
 
# save portfolio to a file (HDF5 file format)
store = pd.HDFStore('portfolio.h5')
store['data'] = data
store.close()
 
# read in your portfolio from a file
df = pd.read_hdf('portfolio.h5', 'data')


0 ETH




.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated



1 BCH
2 XRP
3 LTC
4 XEM
5 DASH
6 MIOTA
7 XMR
8 ETC
9 NEO
10 OMG
11 HSR
12 QTUM
13 BCC
14 LSK
15 STRAT
16 WAVES
17 ZEC
18 PAY
19 BCN
20 EOS
21 BTS
22 STEEM
23 XLM
24 USDT
25 MAID
26 REP
27 BAT
28 GNT
29 FCT
30 ICN
31 ARK
32 MTL
33 SC
34 DOGE
35 BNB
36 GBYTE
37 PIVX
38 DCR
                 ETH     BCH       XRP    LTC     XEM    DASH       XMR  \
date                                                                      
2015-08-07    0.6747     NaN  0.008010   3.91     NaN    3.35    0.6400   
2015-08-07    0.6747     NaN  0.008010   3.91     NaN    3.35    0.6400   
2015-08-08    0.1500     NaN  0.008013   3.71     NaN    3.19    0.4727   
2015-08-09    1.2000     NaN  0.008600   3.74     NaN    3.13    0.6000   
2015-08-10    1.2000     NaN  0.008700   3.83     NaN    3.10    0.4969   
2015-08-11    0.6504     NaN  0.008500   3.87     NaN    3.12    0.4969   
2015-08-12    0.9050     NaN  0.008010   3.92     NaN    3.14    0.4969   
2015-08-13    1.2600     NaN  0.008500   3.79     NaN

In [64]:
print(df)

                 ETH     BCH       XRP    LTC     XEM    DASH       XMR  \
date                                                                      
2015-08-07    0.6747     NaN  0.008010   3.91     NaN    3.35    0.6400   
2015-08-07    0.6747     NaN  0.008010   3.91     NaN    3.35    0.6400   
2015-08-08    0.1500     NaN  0.008013   3.71     NaN    3.19    0.4727   
2015-08-09    1.2000     NaN  0.008600   3.74     NaN    3.13    0.6000   
2015-08-10    1.2000     NaN  0.008700   3.83     NaN    3.10    0.4969   
2015-08-11    0.6504     NaN  0.008500   3.87     NaN    3.12    0.4969   
2015-08-12    0.9050     NaN  0.008010   3.92     NaN    3.14    0.4969   
2015-08-13    1.2600     NaN  0.008500   3.79     NaN    3.15    0.4969   
2015-08-14    1.7900     NaN  0.008010   3.82     NaN    3.15    0.4969   
2015-08-15    1.5000     NaN  0.008010   3.87     NaN    3.15    0.4969   
2015-08-16    0.5000     NaN  0.008010   3.84     NaN    3.00    0.4969   
2015-08-17    1.2500     

In [57]:
print(df.columns)

Index(['ETH', 'BCH', 'XRP', 'LTC', 'XEM', 'DASH', 'XMR', 'ETC', 'NEO', 'OMG',
       'QTUM', 'BCC', 'LSK', 'STRAT', 'WAVES', 'ZEC', 'PAY', 'BCN', 'EOS',
       'BTS', 'STEEM', 'XLM', 'USDT', 'MAID', 'REP', 'BAT', 'GNT', 'FCT',
       'ICN', 'ARK', 'MTL', 'SC', 'DOGE', 'BNB', 'GBYTE', 'PIVX', 'DCR'],
      dtype='object')


In [58]:
df_scatter(df, 'Cryptocurrency prices')

In [5]:
df1 = df[['BTC', 'DASH', 'XMR']]
print(df1.head())

                BTC  DASH  XMR
date                          
2010-07-17  0.04951   NaN  NaN
2010-07-18  0.05941   NaN  NaN
2010-07-19  0.07723   NaN  NaN
2010-07-20  0.07426   NaN  NaN
2010-07-21  0.06634   NaN  NaN


In [6]:
df1 = df1.dropna().drop_duplicates()
print(df1.head())

               BTC  DASH     XMR
date                            
2015-01-29  220.36  1.90  0.3107
2015-01-30  227.19  1.33  0.3027
2015-01-31  216.46  1.90  0.2896
2015-02-01  209.71  1.90  0.2896
2015-02-02  220.53  1.90  0.2867


In [61]:
# portfolio pre-processing
dfP = df[(df.index >= "2017-01-01") & (df.index <= "2017-08-31")]
dfP = dfP.dropna(axis=1, how='any')

In [62]:
dfP.info

<bound method DataFrame.info of                ETH       XRP    LTC    DASH     XMR    ETC    STRAT   WAVES  \
date                                                                          
2017-01-01    8.02  0.006259   4.26   10.76   13.40   1.33  0.06860  0.2009   
2017-01-02    8.03  0.006207   4.37   10.99   13.57   1.35  0.06637  0.2060   
2017-01-03    8.18  0.006220   4.42   11.52   15.00   1.40  0.06865  0.2136   
2017-01-04    9.31  0.006374   4.36   12.06   15.20   1.43  0.07062  0.2147   
2017-01-05    9.07  0.005650   3.95   12.35   13.00   1.40  0.06044  0.1828   
2017-01-06    9.44  0.005995   3.61   10.50   13.50   1.41  0.05883  0.1765   
2017-01-07    9.47  0.006050   3.48   10.43   12.11   1.26  0.05487  0.1856   
2017-01-08    9.67  0.006240   3.80   11.79   12.93   1.42  0.05184  0.2015   
2017-01-09   10.03  0.006124   3.78   11.92   12.52   1.39  0.05150  0.2011   
2017-01-10   10.23  0.006176   4.15   11.93   13.35   1.39  0.05270  0.2102   
2017-01-11    9.49  

In [92]:
df_scatter(dfP, 'Cryptocurrency prices')

In [88]:
import plotly.plotly as py
from plotly.graph_objs import *
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.plot(fig, filename = 'TOP Cryptocurrencies Correlations in Aug 2017', fileopt ='overwrite' )


In [81]:
df_2017 = df[(df.index.year == 2017)&(df.index.month == 8)]
df_2017.pct_change().corr(method='pearson')

Unnamed: 0,ETH,BCH,XRP,LTC,XEM,DASH,XMR,ETC,NEO,OMG,...,FCT,ICN,ARK,MTL,SC,DOGE,BNB,GBYTE,PIVX,DCR
ETH,1.0,0.076164,0.440305,0.405995,0.214769,0.335836,0.123872,0.589822,0.381122,0.416399,...,0.554259,0.413041,-0.013129,0.370882,0.426671,0.422447,-0.064278,0.363866,0.316123,0.35939
BCH,0.076164,1.0,0.063177,-0.10256,0.17437,0.038471,0.066528,0.005989,-0.100763,-0.26074,...,0.438916,0.234628,-0.079063,0.244555,0.095338,0.063301,0.557233,-0.049284,0.165667,0.151526
XRP,0.440305,0.063177,1.0,0.266621,0.100042,0.128855,0.491062,0.370851,-0.049687,0.107722,...,0.226856,0.131892,-0.133297,-0.062467,0.214081,0.355622,0.061242,0.167155,0.136406,-0.021661
LTC,0.405995,-0.10256,0.266621,1.0,-0.033688,0.121021,0.024058,0.550798,0.073982,0.114135,...,0.215795,-0.020336,-0.073485,-0.213033,0.313718,0.379435,0.165898,0.23674,0.231883,0.157532
XEM,0.214769,0.17437,0.100042,-0.033688,1.0,0.380011,0.443053,0.002386,0.022058,0.084998,...,0.160828,0.191655,0.10439,-0.038838,0.287386,0.348635,-0.327621,0.175099,0.446487,0.22631
DASH,0.335836,0.038471,0.128855,0.121021,0.380011,1.0,0.244854,0.412918,0.078119,0.363631,...,0.343318,0.285936,0.008591,0.25058,0.337168,0.243474,-0.316019,0.30783,0.477401,0.461505
XMR,0.123872,0.066528,0.491062,0.024058,0.443053,0.244854,1.0,0.145989,-0.112472,-0.09645,...,0.140112,0.074074,-0.087409,-0.134562,0.14444,0.205003,-0.348943,0.06307,0.306105,0.257813
ETC,0.589822,0.005989,0.370851,0.550798,0.002386,0.412918,0.145989,1.0,0.113775,0.372938,...,0.658829,0.216202,0.09227,0.136205,0.503509,0.330615,-0.325668,0.60488,0.446476,0.509202
NEO,0.381122,-0.100763,-0.049687,0.073982,0.022058,0.078119,-0.112472,0.113775,1.0,0.467197,...,-0.020283,0.207914,-0.097119,0.194704,-0.001533,0.195453,0.001823,0.004009,0.258038,0.0367
OMG,0.416399,-0.26074,0.107722,0.114135,0.084998,0.363631,-0.09645,0.372938,0.467197,1.0,...,0.061575,0.212006,-0.054184,0.360683,0.075848,0.258305,-0.438803,0.192325,0.023103,-0.067725


In [91]:
correlation_heatmap(df_2017.pct_change(), "TOP Cryptocurrencies Correlations in Aug 2017")