In [1]:
#Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
#Read data to DataFrame
df = pd.read_json('currencies.json')

ValueError: Expected object or value

In [None]:
#Let's get a sense of the data
df.head()

In [None]:
#DataFrame structure
df.info()

In [None]:
#Clean market rank variable
df['market_rank'] = (
df['market_rank']
    .str.replace('#','')
    .astype(int)
)

#Clean short name variable
df['short_name'] = (
df['short_name']
    .str.replace('(','')
    .str.replace(')','')
)

#Clean day close variable
df['day_close'] =  (
df['day_close']
    .str.replace(',','')
    .astype(float)
)

#Set date column as index
df = df.set_index('date')

#Check results
df.head()

In [None]:
#Sort DataFrame
df.sort_values(['market_rank', 'date'], ascending = [True, True], inplace = True)
df.head()

Autocorrelation investigation:

In [None]:
#Create Bitcoin and Ethereum series'
BTC = (
df
    .loc[df['short_name'] == 'BTC']
    .loc[:,'day_close']
)
ETH = (
df
    .loc[df['short_name'] == 'ETH']
    .loc[:,'day_close']
)

#Merge to new DataFrame
df_example = pd.DataFrame(columns = ['BTC', 'ETH'])
df_example['BTC'] = BTC
df_example['ETH'] = ETH
#Subset example DataFrame
df_example = df_example['2015-08-07':'2019-04-09']

In [None]:
#Visualize Autocorrelation of Bitcoin prices
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
fig, ax = plt.subplots(1,3, figsize = (21,6))
a1 = lag_plot(BTC, ax = ax[0])
ax[0].set_title('Lag 1 Autocorrelation Scatterplot')
a2 = plot_acf(BTC, lags = 30, ax = ax[1])
ax[1].set_title('Autocorrelation over 30 lags')
ax[1].set_ylabel('Autocorrelation')
ax[1].set_xlabel('Lag')
a3 = autocorrelation_plot(BTC, ax = ax[2])
ax[2].set_title('Autocorrelation over all lags')
plt.show()

In [None]:
#Plot values
ax = df_example.plot()
ax.set_ylabel('Price (USD)')
ax.set_title('Bitcoin and Ethereum prices')

In [None]:
#Take first differences
df_example_diff =  df_example.diff()
df_example_dff = df_example_diff.dropna()
ax = df_example_diff.plot()
ax.set_ylabel('Price Change From Previous Day (USD)')
ax.set_title('Bitcoin and Ethereum prices')

In [None]:
ax = plot_acf(df_example_dff.BTC, lags = 30)

Model building

In [None]:
#Split dataset
train, test = BTC[1:len(BTC)-7], BTC[len(BTC)-7:]

#Train autoregression model
from statsmodels.tsa.ar_model import AR
model = AR(train, freq = 'D')
model_fit = model.fit()

#Make predictions and score results
from sklearn.metrics import mean_squared_error
predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
for i in range(len(predictions)):
	print('predicted=%f, expected=%f' % (predictions[i], test[i]))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
plt.plot(test)
plt.plot(predictions, color='red')
plt.show()

In [None]:
#Split dataset
train = df_example[:len(df_example.index)-7]
test = df_example[len(df_example.index)-7:]

#Train autoregression model
from statsmodels.tsa.vector_ar.var_model import VAR
model = VAR(train, freq = 'D')
model_fit = model.fit()

#Make predictions and score results
from sklearn.metrics import mean_squared_error
predictions = model_fit.forecast(model_fit.y, steps = len(test))