In [None]:
#Load dependencies
%matplotlib inline
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#Define data files to import
btcrawdata = '../data/rawdata/btcraw.csv'
ltcrawdata = '../data/rawdata/ltcraw.csv'
ethrawdata = '../data/rawdata/ethraw.csv'
trendsrawdata = '../data/rawdata/trendsraw.csv'

#Read data files into temporary Pandas data frames
tbtcdata = pd.read_csv(btcrawdata,index_col=0,parse_dates=True)
tltcdata = pd.read_csv(ltcrawdata,index_col=0,parse_dates=True)
tethdata = pd.read_csv(ethrawdata,index_col=0,parse_dates=True)
ttrendsdata = pd.read_csv(trendsrawdata,index_col=0,parse_dates=True)

In [None]:
#Create columns of daily change in value for cryptocurrencies (daily close - prior daily close or open)
tbtcdata['Bitcoin Daily Change (US$)']=tbtcdata['PriceUSD']-tbtcdata['PriceUSD'].shift(1)
tltcdata['Litecoin Daily Change (US$)']=tltcdata['PriceUSD']-tltcdata['PriceUSD'].shift(1)
tethdata['Ethereum Daily Change (US$)']=tethdata['PriceUSD']-tethdata['PriceUSD'].shift(1)

#Cleanup raw data files and discard all data prior to 2017
btcdata=tbtcdata.loc['2017':'2019']
ltcdata=tltcdata.loc['2017':'2019']
ethdata=tethdata.loc['2017':'2019']
trendsdata=ttrendsdata.loc['2017':'2019']

#Create dataset containing USD price of cryptocurrency
btc=btcdata[['PriceUSD','Bitcoin Daily Change (US$)']].copy()
btc.rename(columns={'PriceUSD':'Bitcoin Price (US$)'},inplace=True)
btccount=len(btc)
btcmissing=tbtcdata.isnull().sum().sum()
ltc=ltcdata[['PriceUSD','Litecoin Daily Change (US$)']].copy()
ltc.rename(columns={'PriceUSD':'Litecoin Price (US$)'},inplace=True)
ltccount=len(ltc)
ltcmissing=tltcdata.isnull().sum().sum()
eth=ethdata[['PriceUSD','Ethereum Daily Change (US$)']].copy()
eth.rename(columns={'PriceUSD':'Ethereum Price (US$)'},inplace=True)
ethcount=len(eth)
ethmissing=tethdata.isnull().sum().sum()

tcrypto=pd.merge(btc,ltc,right_index=True,left_index=True,how='outer')
crypto=pd.merge(tcrypto,eth,right_index=True,left_index=True,how='outer')
cryptocount=len(crypto)

#Delete columns of data not needed (btcdata['PriceBTC'] column is all 1 and trendsdata['isPartial'] is irrelevant)
del btcdata['PriceBTC']
del trendsdata['isPartial']
trendscount=len(trendsdata)

#Print out lengths of merged datasets for gut check
print(f'Bitcoin dataset contain {btccount} records and has {btcmissing} values')
print(f'Litecoin dataset contain {ltccount} records and has {ltcmissing} values')
print(f'Ethereum dataset contain {ethcount} records and has {ethmissing} values')
print(f'Cryptocurrency dataset contain {cryptocount} records')
print(f'Google Trends dataset contain {trendscount} records')

In [None]:
#Determine correlation coefficients for Cryptocurrencies
cryptocorr = crypto.corr()
cryptocorr.style.background_gradient(cmap='coolwarm',axis=None).set_precision(2)

In [None]:
#Determine correlation coefficients for Google Trends data
trendscorr = trendsdata.corr()
trendscorr.style.background_gradient(cmap='coolwarm',axis=None).set_precision(2)

In [None]:
#Determine correlation coefficients for Bitcoin Price USD data
tbtccorr = btcdata.corr()
btccorr = tbtccorr[['PriceUSD','Bitcoin Daily Change (US$)']]
btccorr.style.background_gradient(cmap='coolwarm',axis=None).set_precision(2)

In [None]:
#Determine correlation coefficients for Litecoin Price USD data
tltccorr = ltcdata.corr()
ltccorr = tltccorr[['PriceUSD','Litecoin Daily Change (US$)']]
ltccorr.style.background_gradient(cmap='coolwarm',axis=None).set_precision(2)

In [None]:
#Determine correlation coefficients for Ethereum data
tethcorr = ethdata.corr()
ethcorr = tethcorr[['PriceUSD','Ethereum Daily Change (US$)']]
ethcorr.style.background_gradient(cmap='coolwarm',axis=None).set_precision(2)

In [None]:
#Plot correlation coefficients for Cryptocurrencies
plt.figure(figsize=(20,12))
plt.matshow(crypto.corr(), cmap='RdBu')
plt.xticks(range(crypto.shape[1]), crypto.columns, fontsize=10, rotation=90)
plt.yticks(range(crypto.shape[1]), crypto.columns, fontsize=10)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=10)
plt.clim(-1,1)
#plt.title('Correlation Matrix', fontsize=16);

In [None]:
#Plot correlation coefficients for Google Trends data
plt.figure(figsize=(20,12))
plt.matshow(trendsdata.corr(), cmap='RdBu')
plt.xticks(range(trendsdata.shape[1]), trendsdata.columns, fontsize=10, rotation=90)
plt.yticks(range(trendsdata.shape[1]), trendsdata.columns, fontsize=10)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=10)
plt.clim(-1,1)
#plt.title('Correlation Matrix', fontsize=16);

In [None]:
#Plot Bitcoin Price over time
plt.figure(figsize=(20,10)) 
btcdata['PriceUSD'].plot(linewidth=0.5,label='Price')
crypto['Bitcoin Daily Change (US$)'].plot(linewidth=0.5,label='Daily Change')
plt.title('Bitcoin')
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('US Dollars')

In [None]:
#Plot Litecoin Price over time
plt.figure(figsize=(20,10)) 
ltcdata['PriceUSD'].plot(linewidth=0.5,label='Price')
crypto['Litecoin Daily Change (US$)'].plot(linewidth=0.5,label='Daily Change')
plt.title('Litecoin')
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('US Dollars')

In [None]:
#Plot Ethereum Price over time
plt.figure(figsize=(20,10)) 
ethdata['PriceUSD'].plot(linewidth=0.5,label='Price')
crypto['Ethereum Daily Change (US$)'].plot(linewidth=0.5,label='Daily Change')
plt.title('Ethereum')
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('US Dollars')

In [None]:
#Plot Google Trends over time
plt.figure(figsize=(20,10)) 
trendsdata['bitcoin'].plot(linewidth=0.5)
trendsdata['litecoin'].plot(linewidth=0.5)
trendsdata['ethereum'].plot(linewidth=0.5)
trendsdata['facebook libra'].plot(linewidth=0.5)
trendsdata['cryptocurrency'].plot(linewidth=0.5)
plt.title('Google Trends')
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('Trend Metric')

In [None]:
#Subplots of Bitcoin Data
varplots = ['PriceUSD','ROI30d','VtyDayRet30d','Bitcoin Daily Change (US$)']
axes = btcdata[varplots].plot(figsize=(20,30),subplots=True)

In [None]:
#Subplots of Litecoin Data
varplots = ['PriceUSD','ROI30d','VtyDayRet30d','Litecoin Daily Change (US$)']
axes = ltcdata[varplots].plot(figsize=(20,30),subplots=True)

In [None]:
#Subplots of Ethereum Data
varplots = ['PriceUSD','ROI30d','VtyDayRet30d','Ethereum Daily Change (US$)']
axes = ethdata[varplots].plot(figsize=(20,30),subplots=True)

In [None]:
#Create dataframe containing crypto and trends data
cryptotrends = pd.merge(crypto,trendsdata,right_index=True,left_index=True,how='outer')
cryptotrends = cryptotrends.dropna()
cryptotrends.rename(columns={'bitcoin':'Bitcoin Trend Metric','litecoin':'Litecoin Trend Metric','ethereum':'Ethereum Trend Metric','facebook libra':'Facebook Libra Trend Metric','cryptocurrency':'Cryptocurrency Trend Metric'},inplace=True)

In [None]:
#Create a final plot representing a start in modeling data
finalplot = ['Litecoin Price (US$)','Litecoin Daily Change (US$)','Bitcoin Trend Metric']
axes = cryptotrends[finalplot].plot(figsize=(20,30),subplots=True)

In [None]:
#Create column to groupby month
datecrypto = crypto
datecrypto['Month'] = monthcrypto.index.strftime('%m')

In [None]:
ax = datecrypto.loc['2017'].boxplot(by='Month',column=['Bitcoin Price (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(0,20000)
ax.set_title('2017 Bitcoin Price')
ax.set_ylabel('Bitcoin Price (US$)')

In [None]:
ax = datecrypto.loc['2017'].boxplot(by='Month',column=['Bitcoin Daily Change (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(-2000,3500)
ax.set_title('2017 Bitcoin Daily Change')
ax.set_ylabel('Bitcoin Price (US$)')

In [None]:
ax = datecrypto.loc['2018'].boxplot(by='Month',column=['Bitcoin Price (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(0,20000)
ax.set_title('2018 Bitcoin Price')
ax.set_ylabel('Bitcoin Price (US$)')

In [None]:
ax = datecrypto.loc['2018'].boxplot(by='Month',column=['Bitcoin Daily Change (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(-2000,3500)
ax.set_title('2018 Bitcoin Daily Change')
ax.set_ylabel('Bitcoin Price (US$)')

In [None]:
ax = datecrypto.loc['2019'].boxplot(by='Month',column=['Bitcoin Price (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(0,20000)
ax.set_title('2019 Bitcoin Price')
ax.set_ylabel('Bitcoin Price (US$)')

In [None]:
ax = datecrypto.loc['2019'].boxplot(by='Month',column=['Bitcoin Daily Change (US$)'],grid=False,figsize=(10,5))
ax.get_figure().suptitle('')
ax.set_ylim(-2000,3500)
ax.set_title('2019 Bitcoin Daily Change')
ax.set_ylabel('Bitcoin Price (US$)')