In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import statsmodels
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt

In [None]:
plt.rcParams["figure.figsize"] = (8,3.5)

In [None]:
mydateparser = lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
btc_series = pd.read_excel('Data/BTC_closing.xlsx',squeeze=True, parse_dates=[0], index_col=0, date_parser=mydateparser)
btc_df = pd.DataFrame(btc_series)

In [None]:
#username = 'jochenmadler'
#token = '1a4c132c9cec1e3691b43f61eb54bcb4ff8c94e7'
#github_session = requests.Session()
#github_session.auth = (username, token)

In [None]:
from statsmodels.tsa.arima_model import ARIMA
import warnings

#split data in train and test
data = np.log(btc_series).diff().dropna()
train_size = int(len(data) * 0.8)
train, test = data[0:train_size], data[train_size:]

#evaluate p,d,q combinations for ARIMA model
def evaluate_models(data, p_values, d_values, q_values):
    best_aic, best_order = float('inf'), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    model = ARIMA(data, order=order)
                    model_fit = model.fit(maxiter=1000, disp=False)
                    if model_fit.aic < best_aic:
                        best_aic, best_order = model_fit.aic, order
                        print('ARIMA{} - Current best'.format(order))
                        print('AIC:   {}'.format(model_fit.aic))
                        print('BIC:   {}'.format(model_fit.bic))
                        print('HQIC:  {}\n'.format(model_fit.hqic))
                    else:
                        print('ARIMA{} - worse, neglect\n'.format(order))
                except:
                    print('ARIMA{} - not terminating, skip\n'.format(order))
                    continue
    print('ARIMA{} - **Global best**\nAIC:   {}'.format(best_order, best_aic))

#evalue parameters
p_values = range(0,11)
d_values = range(0,2)
q_values = range(0,11)
warnings.filterwarnings('ignore')
#evaluate_models(train, p_values, d_values, q_values)


In [None]:
btc_series.describe()
btc_series.plot()
sns.set_style('ticks')
sns.despine()

In [None]:
btc_series.hist()

In [None]:
btc_series.plot(kind='kde')

In [None]:
btc_df.boxplot()

In [None]:
btc_series['2018-01':'2020'].plot(kind='kde')

In [None]:
from pandas.plotting import lag_plot
lag_plot(btc_series)

In [None]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(btc_series)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(btc_series)

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(btc_series)

In [None]:
X = btc_series.values
split = int(len(X) * 0.5)
X1, X2 = X[0:split], X[split:]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1', mean1, '\tvar1', var1)
print('mean2', mean2, '\tvar2', var2)

In [None]:
btc_series.hist()


In [None]:
X = btc_series.values
X = np.log(X)
fig, (ax1, ax2) = plt.subplots(1,2)
ax1.plot(X)
ax2.hist(X)

split = int(len(X) * 0.5)
X1, X2 = X[0:split], X[split:]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1', mean1, '\tvar1', var1)
print('mean2', mean2, '\tvar2', var2)

In [None]:
from statsmodels.tsa.stattools import adfuller
X = btc_series.values
result = adfuller(X)
result

In [None]:
from statsmodels.tsa.stattools import adfuller
X = btc_series.values
X = np.log(X)
result_log = adfuller(X)
result_log

In [None]:
X = btc_series.values
diff_1 = list()
for i in range(1, len(X)):
    delta = X[i] - X[i-1]
    diff_1.append(delta)
plt.plot(diff_1)

In [None]:
from statsmodels.tsa.stattools import adfuller
result_diff1 = adfuller(diff_1)
result_diff1


In [None]:
X = btc_series.values
diff_1 = list()
for i in range(1, len(X)):
    delta = X[i] - X[i-1]
    diff_1.append(delta)
diff_2 = list()
for i in range(1, len(diff_1)):
    delta = diff_1[i] - diff_1[i-1]
    diff_2.append(delta)
fig, (ax1, ax2) = plt.subplots(1,2)
ax1.plot(diff_1, label='diff_1')
ax2.plot(diff_2, label='diff_2')
plt.legend(loc='upper left')

In [None]:
X = btc_series.values
X = np.log(X)
log_diff1 = list()
for i in range(1, len(X)):
    delta = X[i] - X[i-1]
    log_diff1.append(delta)
plt.plot(log_diff1)

In [None]:
from statsmodels.tsa.stattools import adfuller
result_log_diff1 = adfuller(log_diff1)
result_log_diff1

In [None]:
from sklearn.linear_model import LinearRegression
X = [i for i in range(0, len(btc_series))]
X = np.reshape(X, (len(X),1))
y = btc_series.values
model = LinearRegression()
model.fit(X,y)
trend = model.predict(X)
plt.plot(y, label='y')
plt.plot(trend, label='trend')

detrended = [y[i] - trend[i] for i in range(0, len(btc_series))]
plt.plot(detrended, label='y detrended')
plt.legend(loc='upper left')

In [None]:
pd_diff1 = btc_series.diff()
pd_diff1.plot()
pd_diff1 = pd_diff1[1:]

In [None]:
from statsmodels.tsa.stattools import adfuller
result_pd_diff1 = adfuller(pd_diff1)
result_pd_diff1

In [None]:
from sklearn.preprocessing import MinMaxScaler
values = btc_series.values
values = values.reshape(len(values), 1)
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(values)
#print(scaler.data_min_, scaler.data_max_,scaler.data_range_)
normalized = scaler.transform(values)
normalized_df = pd.DataFrame({'Column1': normalized[:,0]})

btc_df = pd.DataFrame(btc_series)
btc_df['Close'] = normalized_df['Column1'].values
btc_df = btc_df.rename({'Close': 'Close_norm'}, axis=1)
btc_df.plot()


In [None]:
pd_diff1 = btc_series.diff()
pd_diff1 = pd_diff1[1:]
from sklearn.preprocessing import MinMaxScaler
values_diff1 = pd_diff1.values
values_diff1 = values_diff1.reshape(len(values_diff1), 1)
scaler_diff1 = MinMaxScaler(feature_range=(0,1))
scaler_diff1 = scaler.fit(values_diff1)
normalized_diff1 = scaler_diff1.transform(values_diff1)
normalized_diff1_df = pd.DataFrame({'Clos_diff1_norm': normalized_diff1[:,0]})

btc_df = pd.DataFrame(btc_series)
btc_df = btc_df[1:]
btc_df['Close'] = normalized_diff1_df['Clos_diff1_norm'].values
btc_df = btc_df.rename({'Close': 'Close_diff1_norm'}, axis=1)
btc_df.plot()
#sns.distplot(btc_df)

In [None]:
#from scipy.stats import kurtosis
#kurtosis(btc_df, axis=0, fisher=True, bias=True)
btc_diff1_norm_series = btc_df.squeeze()
btc_diff1_norm_series.kurtosis() #Kurtosis = 31 (insane!) -> log transform?

In [None]:
btc_df = pd.DataFrame(btc_series)
btc_df['Close'] = np.log(btc_df['Close'])
btc_log_series = btc_df.squeeze()
#diff1
btc_log_diff_series = btc_log_series.diff()
btc_log_diff_series = btc_log_diff_series[1:]
#norm
from sklearn.preprocessing import MinMaxScaler
values_log_diff1 = btc_log_diff_series.values
values_log_diff1= values_log_diff1.reshape(len(values_log_diff1), 1)
scaler_log_diff1 = MinMaxScaler(feature_range=(0,1))
scaler_log_diff1 = scaler.fit(values_log_diff1)
normalized_log_diff1 = scaler_log_diff1.transform(values_log_diff1)
normalized_log_diff1_df = pd.DataFrame({'Close_log_diff1_norm': normalized_log_diff1[:,0]})
#back to series
btc_df = pd.DataFrame(btc_series)
btc_df_log_diff_norm = btc_df[1:]
btc_df_log_diff_norm['Close'] = normalized_log_diff1_df['Close_log_diff1_norm'].values
btc_df_log_diff_norm = btc_df_log_diff_norm.rename({'Close': 'Close_log_diff1_norm'}, axis=1)

btc_df_log_diff_norm.plot()
#sns.distplot(btc_df) #seems odd

#check kurtosis
btc_log_diff1_norm_series = btc_df_log_diff_norm.squeeze()
btc_log_diff1_norm_series.kurtosis() #Kurtosis = 13 (still high...)