In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process.kernels import RBF, Matern
from sklearn.metrics import mean_absolute_percentage_error, r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet
from xgboost import XGBRegressor
from skopt import gp_minimize
from sklearn.model_selection import cross_val_score

In [2]:
data = pd.read_csv("Binance_ETHUSDT_1h.csv", index_col="date", parse_dates=True).drop(columns=["unix"])
data.head()

Unnamed: 0_level_0,symbol,open,high,low,close,Volume ETH,Volume USDT,tradecount
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
11/01/2021 00:00,ETH/USDT,4287.48,4317.88,4276.79,4316.13,6993.6869,30080479.7,15326.0
10/31/2021 23:00,ETH/USDT,4293.01,4302.19,4270.37,4287.21,8875.9914,38058457.82,26672.0
10/31/2021 22:00,ETH/USDT,4306.45,4317.92,4290.75,4293.0,10898.0574,46884132.12,28572.0
10/31/2021 21:00,ETH/USDT,4265.11,4329.46,4262.05,4306.45,15770.7324,67716875.0,55889.0
10/31/2021 20:00,ETH/USDT,4247.81,4271.17,4230.0,4265.11,9617.2065,40952475.61,27900.0


In [3]:
close_diff = data["close"].pct_change(1)
close_diff.dropna(inplace=True)

In [4]:
# Using the last values only
close_diff = close_diff.iloc[0:7500]
close_diff.tail(10)

date
12/23/2020 7:00     0.005108
12/23/2020 6:00    -0.001786
12/23/2020 5:00     0.000586
12/23/2020 4:00     0.003950
12/23/2020 3:00     0.009585
12/23/2020 2:00    -0.000273
12/23/2020 1:00     0.006272
12/23/2020 0:00     0.016644
12/22/2020 23:00   -0.001976
12/22/2020 22:00   -0.010449
Name: close, dtype: float64

In [5]:
# Create the training set
X = []
Y = []
length = 48
for i in range(0,len(close_diff)-length):
    X.append(close_diff.iloc[i:i+length])
    Y.append(close_diff.iloc[i+length])
X = np.array(X)
Y = np.array(Y)  

In [6]:
N_test = 48
X_train, Y_train = X[:-N_test], Y[:-N_test]
X_test, Y_test = X[-N_test:], Y[-N_test:]

## Naive Forecast

In [7]:
mean_squared_error(Y_train, np.zeros(len(Y_train)), squared=False)

0.012411239452986077

In [8]:
mean_squared_error(Y_test, np.zeros(len(Y_test)), squared=False)

0.011692437776164804

## Gaussian Regressor

In [53]:
def objective(params):
    gpr = GaussianProcessRegressor(kernel = Matern(length_scale= params[0], nu=params[1] ,length_scale_bounds="fixed"))
    return -np.mean(cross_val_score(gpr, X_train, Y_train, cv=5, n_jobs=-1, scoring='neg_root_mean_squared_error'))

In [54]:
space = [
    (1e-03, 1e-01),
    (0.1, 2)
]

r = gp_minimize(objective, space, n_calls=20, random_state=42, verbose=True)
print(r.x)
best_params = r.x
print(r.fun)
result = r.fun
print("List of X values tried : {}".format(r.x_iters))

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 46.2606
Function value obtained: 0.0123
Current minimum: 0.0123
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 70.1728
Function value obtained: 0.0126
Current minimum: 0.0123
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 77.3019
Function value obtained: 0.0121
Current minimum: 0.0121
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 74.9247
Function value obtained: 0.0122
Current minimum: 0.0121
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 56.9467
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 6 started. Evalu

In [89]:
gpr = GaussianProcessRegressor(kernel = Matern(length_scale= 0.013, nu=1.78 ,length_scale_bounds="fixed"))
gpr.fit(X_train, Y_train)

GaussianProcessRegressor(kernel=Matern(length_scale=0.013, nu=1.78))

In [92]:
mean_squared_error(Y_test, gpr.predict(X_test), squared=False)

0.011660136946581617

In [116]:
def objective(params):
    gpr = GaussianProcessRegressor(kernel = RBF(length_scale= params[0],length_scale_bounds="fixed"))
    return -np.mean(cross_val_score(gpr, X_train, Y_train, cv=5, n_jobs=-1, scoring='neg_root_mean_squared_error'))

In [117]:
space = [
    (1e-05, 1e-03)
]

r = gp_minimize(objective, space, n_calls=10, random_state=42, verbose=True)
print(r.x)
best_params = r.x
print(r.fun)
result = r.fun
print("List of X values tried : {}".format(r.x_iters))

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 17.5951
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 17.5094
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 23.2671
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 29.2250
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 35.0889
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 6 started. Evalu

In [120]:
gpr = GaussianProcessRegressor(kernel = RBF(length_scale= 0.001, length_scale_bounds="fixed"))
gpr.fit(X_train, Y_train)

GaussianProcessRegressor(kernel=RBF(length_scale=0.001))

In [121]:
mean_squared_error(Y_test, gpr.predict(X_test), squared=False)

0.011692437776164804

## MLP Regressor

In [93]:
mlp = MLPRegressor(random_state=42)
#-np.mean(cross_val_score(mlp, X_train, Y_train, cv=5, n_jobs=-1, scoring='neg_root_mean_squared_error'))
mlp.get_params().keys()

dict_keys(['activation', 'alpha', 'batch_size', 'beta_1', 'beta_2', 'early_stopping', 'epsilon', 'hidden_layer_sizes', 'learning_rate', 'learning_rate_init', 'max_fun', 'max_iter', 'momentum', 'n_iter_no_change', 'nesterovs_momentum', 'power_t', 'random_state', 'shuffle', 'solver', 'tol', 'validation_fraction', 'verbose', 'warm_start'])

In [103]:
parameters = {'hidden_layer_sizes':[(270,100), (275,), (250,50)]}
clf = GridSearchCV(mlp, parameters, scoring="neg_root_mean_squared_error")
clf.fit(X_train, Y_train)

GridSearchCV(estimator=MLPRegressor(random_state=42),
             param_grid={'hidden_layer_sizes': [(270, 100), (275,), (250, 50)]},
             scoring='neg_root_mean_squared_error')

In [104]:
clf.best_score_

-0.012052747720467718

In [105]:
clf.best_params_

{'hidden_layer_sizes': (275,)}

In [108]:
mlp = MLPRegressor(hidden_layer_sizes=(275,),random_state=42)
mlp.fit(X_train, Y_train)

MLPRegressor(hidden_layer_sizes=(275,), random_state=42)

In [109]:
mean_squared_error(Y_test, mlp.predict(X_test), squared=False)

0.011823479475239247

## Linear Regression

In [106]:
lr = LinearRegression()
lr.fit(X_train, Y_train)

LinearRegression()

In [107]:
mean_squared_error(Y_test, lr.predict(X_test), squared=False)

0.01175012033972643

## ElasticNet

In [75]:
def objective(params): 
    elas = ElasticNet(alpha = params[0], l1_ratio= params[1])
    return -np.mean(cross_val_score(elas, X_train, Y_train, cv=5, n_jobs=-1, scoring='neg_root_mean_squared_error'))

In [76]:
space = [
    (0.0, 1.0),
    (0.0, 1.0)
]

r = gp_minimize(objective, space, n_calls=10, random_state=42, verbose=True)
print(r.x)
best_params = r.x
print(r.fun)
result = r.fun
print("List of X values tried : {}".format(r.x_iters))

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 5.7262
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.0290
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.1421
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.1422
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.1395
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 6 started. Evaluating

In [110]:
elas = ElasticNet(alpha = 0.8, l1_ratio= 0.18)
elas.fit(X_train, Y_train)

ElasticNet(alpha=0.8, l1_ratio=0.18)

In [111]:
mean_squared_error(Y_test, elas.predict(X_test), squared=False)

0.011704454941261587

## XGB Regressor

In [81]:
def objective(params): 
    xgb = XGBRegressor(n_estimators=300, 
                       tree_method='exact', 
                       max_depth = params[0],
                       learning_rate = params[1],
                       n_jobs=-1,
                       random_state=42)
    return -np.mean(cross_val_score(xgb, X_train, Y_train, cv=5, n_jobs=-1, scoring='neg_root_mean_squared_error'))

In [82]:
space = [
    (10, 16),
    (1e-03, 1e-01)
]

r = gp_minimize(objective, space, n_calls=10, random_state=42, verbose=True)
print(r.x)
best_params = r.x
print(r.fun)
result = r.fun
print("List of X values tried : {}".format(r.x_iters))

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 106.9358
Function value obtained: 0.0126
Current minimum: 0.0126
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 71.3274
Function value obtained: 0.0125
Current minimum: 0.0125
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 49.5223
Function value obtained: 0.0229
Current minimum: 0.0125
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 94.1231
Function value obtained: 0.0125
Current minimum: 0.0125
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 49.3532
Function value obtained: 0.0124
Current minimum: 0.0124
Iteration No: 6 started. Eval

## Log-Difference

In [14]:
close_log = np.log(data["close"]).diff(1)
close_log.dropna(inplace=True)

In [15]:
close_log = close_log.iloc[0:7500]

In [16]:
# Create the training set
X = []
Y = []
length = 48
for i in range(0,len(close_log)-length):
    X.append(close_log.iloc[i:i+length])
    Y.append(close_log.iloc[i+length])
X_log = np.array(X)
Y_log = np.array(Y)  

In [17]:
N_test = 48
X_train_log, Y_train_log = X_log[:-N_test], Y_log[:-N_test]
X_test_log, Y_test_log = X_log[-N_test:], Y_log[-N_test:]

## Naive Forecast

In [18]:
mean_squared_error(Y_train_log, np.zeros(len(Y_train_log)), squared=False)

0.012357958586665899

In [19]:
mean_squared_error(Y_test_log, np.zeros(len(Y_test_log)), squared=False)

0.01170634633230302

## Gaussian Regressor

In [20]:
def objective(params):
    gpr = GaussianProcessRegressor(kernel = Matern(length_scale= params[0], nu=params[1] ,length_scale_bounds="fixed"))
    return -np.mean(cross_val_score(gpr, X_train_log, Y_train_log, cv=5, n_jobs=4, scoring='neg_root_mean_squared_error'))

In [22]:
space = [
    (1e-02, 1e-01),
    (0.0, 2.0)
]

r = gp_minimize(objective, space, n_calls=20, random_state=42, verbose=True)
print(r.x)
best_params = r.x
print(r.fun)
result = r.fun
print("List of X values tried : {}".format(r.x_iters))

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 65.4013
Function value obtained: 0.0122
Current minimum: 0.0122
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 114.5297
Function value obtained: 0.0126
Current minimum: 0.0122
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 82.1023
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 99.7190
Function value obtained: 0.0122
Current minimum: 0.0120
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 82.5702
Function value obtained: 0.0120
Current minimum: 0.0120
Iteration No: 6 started. Eval

In [23]:
gpr = GaussianProcessRegressor(kernel = Matern(length_scale= 0.094, nu=0.0016 ,length_scale_bounds="fixed"))
gpr.fit(X_train_log, Y_train_log)

GaussianProcessRegressor(kernel=Matern(length_scale=0.094, nu=0.0016))

In [25]:
mean_squared_error(Y_test_log, gpr.predict(X_test_log), squared=False)

0.011714716866778855

## Use Moving Averages