# Using Machine Learning for ETH price prediction

In [1]:
import json
from pprint import pprint

### Fetching the data
Training data has been retrieved by using the Poloniex API (see https://poloniex.com). 

In [2]:
with open("poloniex.json") as f:
    data = json.load(f)
    
closing = [d["close"] for d in data]
print(len(closing))

38428


In [3]:
zipped = zip(closing[0:-1], closing[1:])
changes = [d1 / d0 - 1for d0, d1 in zipped]

## Using an LSTM (Long Short Term Memory)

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [5]:
length = 40
step = 3
sequences = []
results = []
for i in range(0, len(changes) - length - 2, step):
    sequences.append(changes[i: i + length])
    results.append(closing[i + length + 2] / closing[i] - 1)

print("max abs. change", np.max(np.abs(results)))

sequences = [[[v] for v in sequence] for sequence in sequences]
n_valid = int(0.3 * len(sequences))
x_train = np.array(sequences[:-n_valid])
y_train = np.array(results[:-n_valid])

x_valid = np.array(sequences[-n_valid:])
y_valid = np.array(results[-n_valid:])

max abs. change 0.41267285436974777


In [6]:
print(x_train[0])
print(y_train[0])

print(x_train[-1].shape)

[[ 0.00000000e+00]
 [ 2.62001108e-02]
 [ 0.00000000e+00]
 [ 0.00000000e+00]
 [ 1.57175304e-02]
 [ 5.10336214e-03]
 [-6.02293157e-03]
 [-2.05559016e-02]
 [ 0.00000000e+00]
 [ 2.08189667e-02]
 [-2.57175024e-02]
 [ 2.63963506e-02]
 [ 0.00000000e+00]
 [-7.99796127e-03]
 [ 6.65583468e-03]
 [-2.34435510e-02]
 [ 2.54366773e-02]
 [ 5.11628169e-07]
 [ 0.00000000e+00]
 [ 4.65116279e-03]
 [ 0.00000000e+00]
 [-2.45358457e-02]
 [ 3.06704546e-02]
 [ 0.00000000e+00]
 [-5.00000008e-03]
 [ 5.02515656e-03]
 [ 3.68387665e-08]
 [ 0.00000000e+00]
 [-1.02364721e-02]
 [ 6.28746345e-03]
 [ 3.35415573e-03]
 [ 4.13956379e-02]
 [ 8.25396948e-02]
 [-2.03838929e-02]
 [ 6.58683171e-03]
 [ 1.59836051e-02]
 [ 1.99852313e-02]
 [-1.69033198e-03]
 [-1.29983193e-02]
 [ 2.65974953e-02]]
0.22423507446773328
(40, 1)


In [7]:
model = Sequential()
model.add(LSTM(32, input_shape=(length, 1), return_sequences=False))
#model.add(Dropout(0.4))
#model.add(LSTM(16))
model.add(Dense(1))
model.add(Activation('tanh'))

optimizer = RMSprop(lr=0.005, clipvalue=1.)
model.compile(loss='mean_absolute_error', optimizer=optimizer)
print('Build model...')

Build model...


In [8]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=65,
          validation_data=[x_valid, y_valid]
          )

Train on 8957 samples, validate on 3838 samples
Epoch 1/65
Epoch 2/65
Epoch 3/65
Epoch 4/65
Epoch 5/65
Epoch 6/65
Epoch 7/65
Epoch 8/65
Epoch 9/65
Epoch 10/65
Epoch 11/65
Epoch 12/65
Epoch 13/65
Epoch 14/65
Epoch 15/65
Epoch 16/65
Epoch 17/65
Epoch 18/65
Epoch 19/65
Epoch 20/65
Epoch 21/65
Epoch 22/65
Epoch 23/65
Epoch 24/65
Epoch 25/65
Epoch 26/65
Epoch 27/65
Epoch 28/65
Epoch 29/65
Epoch 30/65
Epoch 31/65
Epoch 32/65
Epoch 33/65
Epoch 34/65
Epoch 35/65
Epoch 36/65
Epoch 37/65
Epoch 38/65
Epoch 39/65
Epoch 40/65
Epoch 41/65
Epoch 42/65
Epoch 43/65
Epoch 44/65
Epoch 45/65
Epoch 46/65
Epoch 47/65
Epoch 48/65
Epoch 49/65
Epoch 50/65
Epoch 51/65
Epoch 52/65
Epoch 53/65
Epoch 54/65
Epoch 55/65
Epoch 56/65
Epoch 57/65
Epoch 58/65
Epoch 59/65
Epoch 60/65
Epoch 61/65
Epoch 62/65
Epoch 63/65
Epoch 64/65
Epoch 65/65


<keras.callbacks.History at 0x7fbf1cc42a58>

In [9]:
mean = np.mean(results)
print(mean)
mean_naive_error = np.mean([abs(r - mean) for r in results])
print(mean_naive_error)

0.007177252219517306
0.04353519267071629


In [10]:
print(model.predict(x_valid[0:30]))
print(y_valid[0:30])

[[ 0.01299654]
 [ 0.01228989]
 [-0.04567677]
 [-0.03527839]
 [-0.02091657]
 [-0.04552354]
 [-0.05517202]
 [-0.09301236]
 [-0.1397008 ]
 [-0.09860522]
 [-0.10334683]
 [-0.12534826]
 [-0.10874962]
 [-0.15743524]
 [-0.14613724]
 [-0.10383034]
 [-0.10685607]
 [-0.08644586]
 [-0.10484207]
 [-0.08880618]
 [-0.08093276]
 [-0.00682572]
 [-0.02745242]
 [-0.03046972]
 [-0.05216527]
 [-0.04630762]
 [-0.00667612]
 [ 0.02205369]
 [-0.01635568]
 [-0.05357958]]
[ 0.03267698 -0.02564482 -0.00886626 -0.01340034 -0.01239095 -0.04476311
 -0.04918033 -0.11403215 -0.1005057  -0.09017296 -0.09854839 -0.09814838
 -0.12454729 -0.14569672 -0.1231185  -0.06989254 -0.06938775 -0.07414533
 -0.0941634  -0.05879929 -0.03586207  0.00794259 -0.00723929 -0.04200783
 -0.04097334 -0.04252955  0.01794387  0.03308309 -0.03163657 -0.06350529]


In [11]:
model.save("model-price-prediction.h5")

In [12]:
import time

current_time = int(time.time())
start = current_time - 1800 * 41
print(start)
api_call = "https://poloniex.com/public?command=returnChartData&currencyPair=USDT_ETH&start={}&end=9999999999&period=1800".format(start)
print(api_call)
import urllib.request
contents = urllib.request.urlopen(api_call).read()

1525896588
https://poloniex.com/public?command=returnChartData&currencyPair=USDT_ETH&start=1525896588&end=9999999999&period=1800


In [13]:
js = json.loads(contents)
closing = [d["close"] for d in js]
zipped = zip(closing[:-1], closing[1:])
values = [[d1 / d0 - 1] for d0, d1 in zipped]

In [14]:
print(model.predict(np.array([values])))

[[0.01117908]]


## Using an SVM (Support Vector Machine)

In [15]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

In [16]:
parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5],
                     'C': [0.1, 1, 10, 100]}]
gs = GridSearchCV(SVR(), parameters, cv=5, verbose=5)

x_train_svm = [[x[0] for x in series] for series in x_train]
x_valid_svm = [[x[0] for x in series] for series in x_valid]

gs.fit(x_train_svm, y_train)

clf = gs.best_estimator_

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV]  C=0.1, gamma=0.01, kernel=rbf, score=-0.033444439938619785, total=   0.4s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV]  C=0.1, gamma=0.01, kernel=rbf, score=-0.17807416379103502, total=   0.5s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.5s remaining:    0.0s


[CV]  C=0.1, gamma=0.01, kernel=rbf, score=-0.23227590156423972, total=   0.5s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.3s remaining:    0.0s


[CV]  C=0.1, gamma=0.01, kernel=rbf, score=-0.00836785821041075, total=   0.4s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    3.0s remaining:    0.0s


[CV]  C=0.1, gamma=0.01, kernel=rbf, score=-0.03961634791501045, total=   0.4s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=-0.040327748058734514, total=   0.4s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=-0.18843738479124905, total=   0.5s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=-0.24680195458903986, total=   0.5s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=-0.018320441113637287, total=   0.4s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=-0.047964236693890605, total=   0.4s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV]  C=0.1, gamma=0.0001, kernel=rbf, score=-0.04125943942479071, total=   0.4s
[CV] C=0.1,

[CV]  C=10, gamma=1e-05, kernel=rbf, score=-0.04767705561574865, total=   0.4s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7228101841635972, total=   0.1s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7314399603638206, total=   0.1s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7137746584712057, total=   0.1s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7806442346680574, total=   0.1s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7306902594139941, total=   0.1s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV]  C=100, gamma=0.001, kernel=rbf, score=0.38841353812236035, total=   0.2s
[CV] C=100, gamma=0.001, kernel

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:   52.2s finished


In [17]:
predictions = clf.predict(x_valid_svm)
print(mean_absolute_error(y_valid, predictions))

0.021490322162718734


In [18]:
print(clf.predict([[v[0] for v in values]]))

[0.03531159]
