In [1]:
import csv
from collections import deque
import math
from sklearn import preprocessing
import numpy as np
from sklearn.cross_validation import train_test_split
from neupy import algorithms, layers
from neupy.functions.errors import rmsle


In [4]:
class ExpMovingAverage():

    def __init__(self, period):

        self.period = period
        self.stream = deque()
        self.multiplier = 2.0 / float((period + 1))
        self.prev_ema = None
        self.ema = None

    #end


    def __call__(self, value):

        self.stream.append(value)

        if len(self.stream) > self.period:
            self.stream.popleft()

            if not self.prev_ema:

                self.prev_ema = sum(self.stream) / len(self.stream)

            else:

                self.ema = (value - self.prev_ema) * self.multiplier + self.prev_ema
                self.prev_ema = self.ema

        return self.prev_ema

    #end

In [7]:
ema12 = ExpMovingAverage(period=12)
ema26 = ExpMovingAverage(period=26)
macd9 = ExpMovingAverage(period=9)
data = []

features = []
targets = []

In [10]:
with open("data/EURCHF_day.csv", "rb") as f:
    reader = csv.reader(f)
    for row in reader:
        if row[0] != "<TICKER>":
            data.append([row[1], float(row[3]), float(row[4]), float(row[5]), float(row[6])])

print "Loaded data: {0} rows".format(len(data))

Loaded data: 3648 rows


In [11]:
for index, item in enumerate(data):
    close = item[4]

    ema12_value = ema12(close) or close
    item.append(round(ema12_value, 4))

    ema26_value = ema26(close) or close
    item.append(round(ema26_value, 4))

    macd9_value = macd9(ema12_value - ema26_value) or close
    item.append(round(macd9_value, 4))

    features.append([close, ema12_value, ema26_value, macd9_value])

    try:
        targets.append([data[index + 1][4]])
    except:
        pass

In [13]:
data[:10]

[['20010103', 1.5201, 1.511, 1.5235, 1.5126, 1.5126, 1.5126, 1.5126],
 ['20010104', 1.5128, 1.5055, 1.5248, 1.5239, 1.5239, 1.5239, 1.5239],
 ['20010105', 1.5239, 1.5235, 1.5326, 1.5302, 1.5302, 1.5302, 1.5302],
 ['20010108', 1.5312, 1.5233, 1.5322, 1.5238, 1.5238, 1.5238, 1.5238],
 ['20010109', 1.5236, 1.5206, 1.5247, 1.523, 1.523, 1.523, 1.523],
 ['20010110', 1.5231, 1.5218, 1.5293, 1.526, 1.526, 1.526, 1.526],
 ['20010111', 1.5261, 1.5249, 1.5365, 1.5353, 1.5353, 1.5353, 1.5353],
 ['20010112', 1.5352, 1.5339, 1.543, 1.5393, 1.5393, 1.5393, 1.5393],
 ['20010115', 1.5398, 1.5377, 1.5463, 1.5416, 1.5416, 1.5416, 1.5416],
 ['20010116', 1.5415, 1.5357, 1.5426, 1.5384, 1.5384, 1.5384, 1.5384]]

In [14]:
features[:10]

[[1.5126, 1.5126, 1.5126, 1.5126],
 [1.5239, 1.5239, 1.5239, 1.5239],
 [1.5302, 1.5302, 1.5302, 1.5302],
 [1.5238, 1.5238, 1.5238, 1.5238],
 [1.523, 1.523, 1.523, 1.523],
 [1.526, 1.526, 1.526, 1.526],
 [1.5353, 1.5353, 1.5353, 1.5353],
 [1.5393, 1.5393, 1.5393, 1.5393],
 [1.5416, 1.5416, 1.5416, 1.5416],
 [1.5384, 1.5384, 1.5384, 1.5384]]

In [15]:
targets[:10]

[[1.5239],
 [1.5302],
 [1.5238],
 [1.523],
 [1.526],
 [1.5353],
 [1.5393],
 [1.5416],
 [1.5384],
 [1.5376]]

In [17]:
feature_data_scaler = preprocessing.MinMaxScaler()
target_data_scaler = preprocessing.MinMaxScaler()

features_stded = feature_data_scaler.fit_transform(features)
targets_stded = target_data_scaler.fit_transform(targets)

In [18]:
training_period = 500

In [20]:
x_train, x_test, y_train, y_test = train_test_split(features_stded[:training_period],
                                                    targets_stded[:training_period],
                                                    train_size=0.85)



cgnet = algorithms.ConjugateGradient(
    connection=[
        layers.SigmoidLayer(4),
        layers.SigmoidLayer(50),
        layers.OutputLayer(1),
    ],
    search_method='golden',
    show_epoch=100,
    verbose=True,
    optimizations=[algorithms.LinearSearch],
)

cgnet.train(x_train, y_train, x_test, y_test, epochs=300)

y_predict = cgnet.predict(x_test).round(5)

print len(y_test), len(y_predict)

test_normalized = feature_data_scaler.inverse_transform(y_test)
predicted_normalized = target_data_scaler.inverse_transform(y_predict)

error = rmsle(feature_data_scaler.inverse_transform(y_test),
              target_data_scaler.inverse_transform(y_predict))

print "Error on initial training: {0}".format(error)


[4m[1mNetwork structure[0;0m[0;0m

[[92mLAYERS[0m] SigmoidLayer(4) > SigmoidLayer(50) > OutputLayer(1)

[4m[1mNetwork options[0;0m[0;0m

Verbose:
[[92mOPTION[0m] verbose = True

BaseNetwork:
[[90mOPTION[0m] error = mse
[[92mOPTION[0m] show_epoch = 100
[[90mOPTION[0m] shuffle_data = False
[[90mOPTION[0m] step = 0.1
[[90mOPTION[0m] train_end_signal = None
[[90mOPTION[0m] train_epoch_end_signal = None
[[90mOPTION[0m] use_bias = True

Backpropagation:
[[92mOPTION[0m] optimizations = ['LinearSearch']

ConjugateGradient:
[[90mOPTION[0m] update_function = fletcher_reeves

LinearSearch:
[[92mOPTION[0m] search_method = golden
[[90mOPTION[0m] tol = 0.3


[4m[1mStart train[0;0m[0;0m

[[92mTRAIN[0m] Train data size: 425
[[92mTRAIN[0m] Validation data size: 75
[[92mTRAIN[0m] Total epochs: 300

[1mEpoch 1[0;0m
  [92m*[0m Train error:  0.279871564117
  [92m*[0m Validation error: 0.00289988406088
  [92m*[0m Epoch time: 0.01764 sec
[1mEpoch 100[0;0

ValueError: non-broadcastable output operand with shape (75,1) doesn't match the broadcast shape (75,4)