In [1]:
import pandas as pd
import numpy as np
import random
random.seed(42)
np.random.seed(42)
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from datetime import datetime, timedelta
from PyEMD import EEMD
from scipy import stats
from itertools import product
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
import plotly.offline as py
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import matplotlib.pyplot as plt

In [3]:
FREQUENCY = "D"
PRICE_TYPE = "close"
CURRENCY_LST = ['BTC', 'ETH', 'LTC']

In [4]:
btc = pd.read_csv("../data/0_raw/BTC_USD_2013-10-01_2021-04-21-CoinDesk.csv")
eth = pd.read_csv("../data/0_raw/ETH_USD_2015-08-09_2021-04-21-CoinDesk.csv")
ltc = pd.read_csv("../data/0_raw/LTC_USD_2018-06-01_2021-04-21-CoinDesk.csv")

currency_dfs = {"BTC":btc, "ETH":eth, "LTC":ltc}

In [5]:
for cur, df in currency_dfs.items():
    df.Date = df.Date.apply(pd.Timestamp)
    missing = df.mean(numeric_only=True)
    missing['Date'] = pd.Timestamp("2018-10-29")
    df = df.append(missing, ignore_index=True)
    df.sort_values("Date", ascending=True, inplace=True)
    df = df[['Date', '24h Open (USD)', '24h High (USD)', '24h Low (USD)', 'Closing Price (USD)']]
    df.columns = ["date", "open", "high", "low", "close"]
    df['mid'] = (df["high"] + df["low"]) / 2
    df.set_index("date", inplace=True)
    currency_dfs[cur] = df

In [6]:
min_dates = [df.index.min() for cur, df in currency_dfs.items()]
max_dates = [df.index.max() for cur, df in currency_dfs.items()]

beg_date = max(min_dates)
end_date = min(max_dates)
date_range = pd.date_range(beg_date, end_date, freq=FREQUENCY)
n_steps = len(date_range)
print(n_steps, "steps from", beg_date, "to", end_date)

1056 steps from 2018-06-01 00:00:00 to 2021-04-21 00:00:00


In [7]:
df = pd.DataFrame({cur: df.loc[date_range, PRICE_TYPE].values for cur, df in currency_dfs.items()},
                 index=date_range)
df

Unnamed: 0,BTC,ETH,LTC
2018-06-01,7490.621250,570.865740,117.475053
2018-06-02,7518.333329,577.257364,119.934676
2018-06-03,7630.403388,591.381123,123.222694
2018-06-04,7716.145068,618.302767,125.365456
2018-06-05,7505.589277,591.549075,119.522003
...,...,...,...
2021-04-17,61965.782598,2438.429594,311.552530
2021-04-18,60574.444728,2374.755810,311.990124
2021-04-19,56850.830166,2249.008696,276.891528
2021-04-20,56224.101588,2201.648351,271.021457


In [8]:
scaled_df = pd.DataFrame(MinMaxScaler().fit_transform(df), columns=df.columns, index=df.index)
log_df = df.apply(np.log)
diff_df = df.diff().dropna()
log_diff_df = log_df.diff().dropna()

In [60]:
px.line(scaled_df, title="Scaled Prices")

In [10]:
px.line(log_df, title="Log Prices")

In [61]:
px.line(log_diff_df, title="Difference of Log Prices")

In [12]:
print(log_df.corr().round(2))
px.imshow(log_df.corr(), range_color=[0,1])

      BTC   ETH   LTC
BTC  1.00  0.90  0.78
ETH  0.90  1.00  0.83
LTC  0.78  0.83  1.00


In [13]:
fig = make_subplots(rows=1, cols=len(CURRENCY_LST), subplot_titles=["Q-Q Plot of "+ cur for cur in CURRENCY_LST])

for i,cur in enumerate(CURRENCY_LST):
    qq = stats.probplot(log_diff_df[cur])
    x = np.array([qq[0][0][0], qq[0][0][-1]])
    fig.add_trace(go.Scatter(x=qq[0][0], y=qq[0][1], mode='markers'), row=1, col=i+1)
    fig.add_trace(go.Scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines', marker_color='red'), row=1, col=i+1)

fig.layout.update(width=950, height=300, margin=dict(l=10, r=10, b=10, t=30), showlegend=False)
fig.show()
#qqplot_data = qqplot(gauss_data, line='s').gca().lines

In [62]:
px.histogram(log_diff_df, title="Histograms of Difference of Log Prices")

In [65]:
decomposed_components = {}
decomposition_model = 'additive'
for cur in CURRENCY_LST:
    decomposed_components[cur] = seasonal_decompose(log_df[cur], model=decomposition_model)

In [77]:
cur = 'BTC'
components = decomposed_components[cur]
fig = make_subplots(rows=4, cols=1, subplot_titles=["Original", "Trend", "Seasonal", "Residuals"])
fig.add_trace(go.Scatter(x=date_range, y=components.observed, mode='lines'), row=1, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.trend, mode='lines'), row=2, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.seasonal, mode='lines'), row=3, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.resid, mode='lines'), row=4, col=1)
fig.update_layout(height=600, width=950, title_text=f"Decomposed Series of {cur}", title_x=0.5, showlegend=False)

In [67]:
cur = 'ETH'
components = decomposed_components[cur]
fig = make_subplots(rows=4, cols=1, subplot_titles=["Original", "Trend", "Seasonal", "Residuals"])
fig.add_trace(go.Scatter(x=date_range, y=components.observed, mode='lines'), row=1, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.trend, mode='lines'), row=2, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.seasonal, mode='lines'), row=3, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.resid, mode='lines'), row=4, col=1)
fig.update_layout(height=600, width=950, title_text=f"Decomposed Series of {cur}", title_x=0.5, showlegend=False)

In [68]:
cur = 'LTC'
components = decomposed_components[cur]
fig = make_subplots(rows=4, cols=1, subplot_titles=["Original", "Trend", "Seasonal", "Residuals"])
fig.add_trace(go.Scatter(x=date_range, y=components.observed, mode='lines'), row=1, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.trend, mode='lines'), row=2, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.seasonal, mode='lines'), row=3, col=1)
fig.add_trace(go.Scatter(x=date_range, y=components.resid, mode='lines'), row=4, col=1)
fig.update_layout(height=600, width=950, title_text=f"Decomposed Series of {cur}", title_x=0.5, showlegend=False)

In [69]:
fig = make_subplots(rows=4, cols=1,subplot_titles=["Original", "Trend", "Seasonal", "Residuals"])
for cur in CURRENCY_LST:
    components = decomposed_components[cur]
    fig.add_trace(go.Scatter(x=date_range, y=components.observed, mode='lines', name=f"{cur}-Orig."), row=1, col=1)
    fig.add_trace(go.Scatter(x=date_range, y=components.trend, mode='lines', name=f"{cur}-Trend"), row=2, col=1)
    fig.add_trace(go.Scatter(x=date_range, y=components.seasonal, mode='lines', name=f"{cur}-Season."), row=3, col=1)
    fig.add_trace(go.Scatter(x=date_range, y=components.resid, mode='lines', name=f"{cur}-Resid."), row=4, col=1)
fig.update_layout(height=900, width=950, title_text=f"Decomposed Series of All Currencies", title_x=0.5, showlegend=True)

In [87]:
look_back = 7
ma_log_df = log_df.rolling(look_back).mean().dropna() 
ma_log_df.columns = [cur+f"-MA({look_back})" for cur in CURRENCY_LST]
ma_log_df
concat_df = pd.concat((log_df[look_back-1:], ma_log_df), axis=1)
px.line(concat_df, title=f"MA({look_back}) of Log Prices")

In [93]:
px.line(log_df[look_back-1:].values - ma_log_df, title=f"Error by MA({look_back}) of Log Prices")

In [94]:
px.line(log_df.expanding().mean(), title="Rolling Average Mean")

# Ensemble Empirical Mode Decomposition (EEMD)

Ensemble empirical mode decomposition (EEMD) is noise-assisted technique, which is meant to be more robust than simple Empirical Mode Decomposition (EMD). The robustness is checked by performing many decompositions on signals slightly perturbed from their initial position. In the grand average over all IMF results the noise will cancel each other out and the result is pure decomposition.

In [75]:
eemd_dict = {}
for cur in CURRENCY_LST:
    eemd = EEMD()
    imfs = eemd(log_df[cur].values)
    eemd_dict[cur] = imfs

In [76]:
cur = "BTC"
fig = make_subplots(rows=len(eemd_dict[cur]), cols=1)

for i, row in enumerate(eemd_dict[cur]):
    fig.add_trace(go.Scatter(x=date_range, y=row, mode='lines'), row=i+1, col=1)

fig.update_layout(height=1000, width=900, title_text=f"Intrinsic Mode Functions of {cur} by EEMD", title_x=0.5)
fig.show()

In [22]:
cur = "ETH"
fig = make_subplots(rows=len(eemd_dict[cur]), cols=1)

for i, row in enumerate(eemd_dict[cur]):
    fig.add_trace(go.Scatter(x=date_range, y=row, mode='lines'), row=i+1, col=1)

fig.update_layout(height=1000, width=900, title_text=f"Intrinsic Mode Functions of {cur} by EEMD", title_x=0.5)
fig.show()

In [23]:
cur = "LTC"
fig = make_subplots(rows=len(eemd_dict[cur]), cols=1)

for i, row in enumerate(eemd_dict[cur]):
    fig.add_trace(go.Scatter(x=date_range, y=row, mode='lines'), row=i+1, col=1)

fig.update_layout(height=1000, width=900, title_text=f"Intrinsic Mode Functions of {cur} by EEMD", title_x=0.5)
fig.show()