In [10]:
import numpy as np
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

In [11]:
#import price data from csv files
coins = ['REP','ICN','GNT','GNO','LUN','BAT', 'ADX', 'BNT',  'CFI','STORJ', 
         'EDG','1ST','HMQ','RLC','MLN','MYST','PTOY','SNGLS','TKN','ETH']
csv = '_USDT.csv'

included_dates = []
for month in [str(x) for x in range(7,12)]:
    if int(month) < 10:
        month = '0'+month
    for day in [str(x) for x in range(1,32)]:
        if int(day) < 10:
            day = '0'+day
        included_dates.append('2017-'+month+'-'+day)

coin_data = {}
for coin in coins:
    x = pd.read_csv(coin+csv)
    coin_data[coin] = x.loc[x['timeDate'].isin(included_dates)]
    (length, _) = coin_data[coin].shape
    coin_data[coin].index = range(length) #coin_data[coin]['timeDate'] 


In [45]:
# Merge price of each coin into single dataframe 
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

combined_coin_data = merge_dfs_on_column(list(coin_data.values()), list(coin_data.keys()), 'close')
combined_coin_data_dated = combined_coin_data
combined_coin_data_dated.index = coin_data['ETH']['timeDate']
#combined_coin_data.index = coin_data['ETH']['timeDate']

In [46]:
# Calculate the pearson correlation coefficients for coins
coin_correlation = combined_coin_data.pct_change().corr(method='pearson')
# Sort the coins based off of their correlation to ETH
ETH_correlation = {}
for coin in coins:
    ETH_correlation[coin] = coin_correlation[coin]['ETH'] 
ETH_correlation
combined_coin_data = combined_coin_data[sorted(ETH_correlation, key=ETH_correlation.get)]

# Recalculate correlation so it appears in order
coin_correlation = combined_coin_data.pct_change().corr(method='pearson')
coin_correlation.to_csv('ETH_coin_correlations.csv')

In [47]:
# Display heat map of correlations
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

correlation_heatmap(combined_coin_data.pct_change(), "Correlations of Ethereum Based Coins (Jul-Nov 2016) ")


In [15]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)
    

In [44]:
# Chart all of the coin prices
df_scatter(combined_coin_data_dated, 'ETH Derivative Coin Prices (USDT)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [17]:
#combined_coin_data.loc[combined_coin_data_dated[''].isin(['NaN'])]


In [19]:
coin_data['ETH']

Unnamed: 0,time,timeDate,close,high,low,open,volumefrom,volumeto
0,1498953600000,2017-07-01,277.09,283.28,244.47,252.51,876.83,224831.39
1,1499040000000,2017-07-02,274.56,282.54,263.04,277.09,524.22,142508.62
2,1499126400000,2017-07-03,263.81,283.00,261.08,274.56,557.59,152150.10
3,1499212800000,2017-07-04,262.19,267.00,249.00,263.81,891.15,229475.39
4,1499299200000,2017-07-05,267.59,269.49,249.58,262.19,607.87,158140.92
5,1499385600000,2017-07-06,237.07,267.59,234.88,267.59,607.36,147428.18
6,1499472000000,2017-07-07,240.00,244.01,229.41,237.07,649.02,154347.52
7,1499558400000,2017-07-08,235.21,246.70,233.44,240.00,1001.17,239361.05
8,1499644800000,2017-07-09,203.08,237.03,185.00,235.21,1273.38,270083.91
9,1499731200000,2017-07-10,189.89,210.00,166.44,203.08,1375.31,262845.97


In [18]:
'''print len(coin_data['REP'])
print len(coin_data['ICN'])
print len(coin_data['GNT'])
print len(coin_data['GNO'])
print len(coin_data['LUN'])
print len(coin_data['BAT'])
print len(coin_data['ADX'])
print len(coin_data['BNT'])
print len(coin_data['CFI'])
print len(coin_data['OMG'])
print len(coin_data['STORJ'])
print len(coin_data['ETH']) #--> all the same length''' 

"print len(coin_data['REP'])\nprint len(coin_data['ICN'])\nprint len(coin_data['GNT'])\nprint len(coin_data['GNO'])\nprint len(coin_data['LUN'])\nprint len(coin_data['BAT'])\nprint len(coin_data['ADX'])\nprint len(coin_data['BNT'])\nprint len(coin_data['CFI'])\nprint len(coin_data['OMG'])\nprint len(coin_data['STORJ'])\nprint len(coin_data['ETH']) #--> all the same length"