# Analyse historical data

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

## Import code from __scrape__ notebook

In [2]:
import ipynb.fs  # Boilerplate required
from .defs.scrape import loadHistoricalData

## Load historical data into a dataframe

In [3]:
#coins, tokens = scrapeCurrencies()
minMarketCap = 10*1000*1000*1000
df = loadHistoricalData(minMarketCap=minMarketCap)
df.tail()

Done    12                                        


Unnamed: 0_level_0,BTC Open,BTC High,BTC Low,BTC Close,BTC Volume,BTC Market Cap,ETH Open,ETH High,ETH Low,ETH Close,...,DASH Low,DASH Close,DASH Volume,DASH Market Cap,TRX Open,TRX High,TRX Low,TRX Close,TRX Volume,TRX Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-20,11656.2,13103.0,11656.2,12899.2,11801700000,195979000000,1044.95,1167.11,1044.95,1155.15,...,848.98,926.12,162376000.0,6677870000.0,0.081158,0.088724,0.080608,0.085201,626439000.0,5336000000.0
2018-01-21,12889.2,12895.9,11288.2,11600.1,9935180000,216740000000,1155.68,1155.68,1021.5,1049.58,...,802.41,818.27,109775000.0,7267440000.0,0.084868,0.085698,0.068421,0.071186,561259000.0,5579930000.0
2018-01-22,11633.1,11966.4,10240.2,10931.4,10537400000,195645000000,1055.35,1089.1,930.74,1003.26,...,710.57,763.2,100651000.0,6466930000.0,0.071947,0.082809,0.063831,0.071412,623767000.0,4730400000.0
2018-01-23,10944.5,11377.6,10129.7,10868.4,9660610000,184087000000,1004.17,1023.23,920.54,986.23,...,692.93,757.34,110265000.0,5978460000.0,0.07121,0.074035,0.062891,0.068572,409592000.0,4681940000.0
2018-01-24,10903.4,11501.4,10639.8,11359.4,9940990000,183419000000,987.48,1062.44,965.81,1058.78,...,734.41,775.11,116898000.0,5932760000.0,0.068596,0.07432,0.066939,0.070421,505469000.0,4510090000.0


In [4]:
# https://blog.patricktriest.com/analyzing-cryptocurrencies-python/
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [5]:
# columns: Open, High, Low, Close, Volume, Market Cap
closeCols = [col for col in df.columns if col.endswith(" Close")]
dfClose = df[closeCols[:]].replace(0, np.nan) # replace 0 with np.nan, so it is not plotted
#trace = go.Scatter(x=df.Date, y=df[closeCols])
df_scatter(dfClose, title="Cryptocurrency Prices (USD)",
           seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [6]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
corr = dfClose # dfClose[dfClose.index.year == 2016]
corr = dfClose[dfClose.index.year >= 2017]
#corr.pct_change().corr(method='pearson').shape

In [7]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [8]:
correlation_heatmap(corr.pct_change(), "Cryptocurrency Correlations")


The installed version of numexpr 2.4.3 is not supported in pandas and will be not be used
The minimum supported version is 2.4.6




In [9]:
from tsfresh import extract_features
dfFresh = df.copy()
dfFresh["Date"] = dfFresh.index
#extracted_features = extract_features(dfFresh, column_id="Date")
#extracted_features.tail()
#from tsfresh import extract_relevant_features
#
#features_filtered_direct = extract_relevant_features(timeseries, y,
#                                                     column_id='id', column_sort='time')


The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.

