-
Notifications
You must be signed in to change notification settings - Fork 0
/
q_agent_test_ema520.py
392 lines (373 loc) · 17.1 KB
/
q_agent_test_ema520.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# agent_test_v7: Uses RETURN DE MACD ADELANTADO 10 ticks(signal 8 from q-datagen_c_v4) regression signal to decide action
# This version uses a dynamic TP and SL depending on the length of the ñ
# v8 uses the only the profit for buy and sell (signals 0 and 3) to determine action
import gym
import gym.wrappers
import gym_forex
from gym.envs.registration import register
import sys
import neat
import os
from joblib import load
from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import operator
from numpy import genfromtxt
import csv
from sklearn import svm
from operator import add,sub
from joblib import dump, load
from sklearn import preprocessing
import random
## \class QAgent
## \brief Q-Learning agent that uses an OpenAI gym environment for fx trading
## estimating for each tick, the optimal SL, TP, and Volume.
class QAgent():
## init method
## Loads the validation dataset, loads the pre-trained models
# initialize forex environment.
def __init__(self):
# percentage of noise to add to an action
# TODO: cambiar acciones para que solo se cierren ordenes por SL o TP (dep de volatility)
self.noise = 0.0
# TODO: probar con órdenes con duración mínima en ticks (solo se puden cerrar por TP/SL y por acttion si se ha superado el min_duartion)
# noise0, min_duratopn = 0 bal=241k
# noise0, min_duration = 20 bal=43k
# noise 0.25, min_duration = 20 bal=1k
self.duration = 5
self.min_duration = 0
self.th_open = 0.2
self.th_close = 0.1
# TODO: probar con órdenes que solo se cierran por SL/TP
# TODO: hacer gridsearch de SL/TP
# TODO: en caso ideal sin ruido, probar si ganancia incrementa con volumen controlado por volatility
# TODO: probar si mejora SL/TP controlados por volatilidad respecto a los mejores fijos encontrados por gridsearch
# First argument is the validation dataset, including headers indicating maximum and minimum per feature
self.vs_f = sys.argv[1]
# Second argument is the prefix (including path) for the dcn pre-trained models
# for the actions, all modes are files with .svm extention and the prefix is
# concatenated with a number indicating the action:
# 0 = Buy/CloseSell/nopCloseBuy
# 1 = Sell/CloseBuy/nopCloseSell
# 2 = No Open Buy
# 3 = No Open Sell
self.model_prefix = sys.argv[2]
# third argument is the path of the datasset to be used in the gym environment (not q-datagen generated, without headers)
self.env_f = sys.argv[3]
# initialize gym-forex env (version 4)
self.test_episodes = []
self.generation = 0
self.min_reward = -15
self.max_reward = 15
self.episode_score = []
self.episode_length = []
self.num_s = 19
self.model = []
self.raw_action = 0
self.max_index = 0
self.vs_data = []
self.vs_num_ticks = 0
self.vs_num_columns = 0
self.obsticks = 30
# TODO: obtener min y max de actions from q-datagen dataset headers
self.min_TP = 60
self.max_TP = 1000
self.min_SL = 60
self.max_SL = 1000
self.min_volume = 0.0
self.max_volume = 0.1
self.security_margin = 0.1
self.test_action = 0
self.action_prev = [0]
self.action = [0]
self.raw_action = [0]
self.svr_rbf = []
# load pre-processing settings
self.pt = preprocessing.PowerTransformer()
print("loading pre-processing.PowerTransformer() settings for the generated dataset")
self.pt = load(self.vs_f+'.powertransformer')
# load feature-selection mask
print("loading pre-processing feature selection mask")
self.mask = load(self.vs_f+'.feature_selection_mask')
# register the gym-forex openai gym environment
# TODO: extraer obs_ticks como el window_size, desde los headers de salida de q-datagen
register(
id='ForexValidationSet-v1',
entry_point='gym_forex.envs:ForexEnv6',
kwargs={'dataset': self.env_f ,'max_volume':self.max_volume, 'max_sl':self.max_SL,
'max_tp':self.max_TP, 'min_sl':self.min_SL,
'min_tp':self.min_TP,'obsticks':self.obsticks,
'capital':800, 'leverage':100, 'num_features': 13}
)
# make openai gym environments
self.env_v = gym.make('ForexValidationSet-v1')
# Shows the action and observation space from the forex_env, its observation space is
# bidimentional, so it has to be converted to an array with nn_format() for direct ANN feed. (Not if evaluating with external DQN)
print("action space: {0!r}".format(self.env_v.action_space))
print("observation space: {0!r}".format(self.env_v.observation_space))
# read normalization maximum and minimum per feature
# n_data_full = genfromtxt(self.vs_f, delimiter=',',dtype=str,skip_header=0)
with open(self.vs_f, newline='') as f:
reader = csv.reader(f)
n_data = next(reader) # gets the first line
# read header from vs_f
#n_data = n_data_full[0].tolist()
self.num_columns = len(n_data)
print("vs_f num_columns = ", self.num_columns)
# minimum and maximum per feature for normalization before evaluation in pretrained models
self.max = [None] * self.num_columns
self.min = [None] * self.num_columns
for i in range(0, self.num_columns-self.num_s):
header_cell = n_data[i]
#print("header_cell = ", header_cell, "type = " ,type(header_cell))
data = header_cell.split("_")
num_parts = len(data)
self.max[i] = float(data[num_parts-1])
self.min[i] = float(data[num_parts-2])
# data was mormalized as: my_data_n[0, i] = (2.0 * (my_data[0, i] - min[i]) / (max[i] - min[i])) - 1
## the action model is the same q-datagen generated dataset
def load_action_models(self):
self.vs_data = genfromtxt(self.vs_f, delimiter=',')
# get the number of observations
self.vs_num_ticks = len(self.vs_data)
self.vs_num_columns = len(self.vs_data[0])
## For an observation for each tick, returns 0 if the slope of the future(10) MACD signal (output 16 zero-based) is negative, 1 if its positive.
def decide_next_action(self, normalized_observation):
# evaluate all models with the observation data window
self.action = []
self.max_index = 0
action_list = [0.0] * 19
vs = np.array(normalized_observation)
# read the normalized_observation skipping (num_features-1) and sum the values to compare with
# the sum of the same sum from the validation set.
a_pattern = 0
# TODO: ERROR: NO COINCIDEN EL RETURN DE ENTRENAMIENTO CON EL CALCULADO EN NORMALIZE OBS
num_features = (self.vs_num_columns-self.num_s)//(3*self.obsticks)
n_p = -1
for i in range(0, num_features):
n_p = n_p * -1
# print("num_features= ",num_features ," len(obs)=",len(normalized_observation), "i=",i)
if n_p == 1:
a_pattern = a_pattern + normalized_observation[self.obsticks * i]
else:
#print("len(normalized_observation)=",len(normalized_observation)," i=",i)
a_pattern = a_pattern * normalized_observation[self.obsticks * i]
# for each row of the validation set(output of q-datagen), do the sum and compare with the observation sum
index = 0
for i in range(1, self.vs_num_ticks):
a_search = 0
n_p = -1
# do the sum of the values per feature to compare with the q-datagen dataset output
for j in range(0, num_features):
n_p = n_p * -1
if n_p == 1:
a_search = a_search + self.vs_data[i, self.obsticks * j]
else:
a_search = a_search * self.vs_data[i, self.obsticks * j]
# Return all values from the action signals
if (a_pattern == a_search):
# there are 19 signals and vs_num_columns - 10 is the 9th training signal
action_list_n = self.vs_data[i, self.vs_num_columns-19 : self.vs_num_columns].copy()
action_list = action_list_n.tolist()
break
else:
if (i == self.vs_num_ticks-1):
print("PATTERN NOT FOUND, i=",i)
#print("normalized_observation=", normalized_observation)
#print("a_pattern=", a_pattern, " a_search=", a_search, " index=", i)
#adds noise to the action 0
if (random.random()<self.noise):
if (action_list[0]>0.5):
action_list[0]=0.0
else:
action_list[0]=1.0
# VOILA!
self.action = action_list.copy()
#print("action=",self.action)
return self.action
## normalize the observation matrix, converts it to a list feedable to a pretrained DcN
# oldest data is first in dataset and also in observation matrix
# input obs_matrix, prev obs_matrix, output:row
def normalize_observation(self, observation, observation_prev):
# observation is a list with size num_features of numpy.deque of size 30 (time window)
n_obs = []
num_columns_o = len(observation)
#print("num_columns_o = ", num_columns_o)
# compose list from observation matrix similar to a row of the training set output from q-datagen (tick contiguous per feature)
for i in range (0, num_columns_o):
l_obs = list(observation[i])
for j in l_obs:
n_obs.append(j)
# append list of the returned values
# TODO: Cambiar a recorrido de l_obs restando el anterior y solo usar l_obs_prev para el primer elemento
for i in range (0, num_columns_o):
l_obs = list(observation[i])
l_obs_prev = list(observation_prev[i])
# l_dif = l_obs - l_obs_prev
l_dif = list( map(sub, l_obs, l_obs_prev) )
for l in l_obs:
n_obs.append(l)
#for i in range(0,10):
# n_obs.append(0)
#apply pre-processing
n_obs = self.pt.transform(np.array(n_obs).reshape(1,-1))
n_o = n_obs[0].tolist()
#print("n_o=",n_o)
#for i in range(0,9):
# n_o.append(0)
#apply feature selection.
#print("n_o=",n_o)
#print("mask=",self.mask)
#print("len(n_o)=",len(n_o))
#print("len(mask)", len(self.mask))
n_obs=np.array(n_o)
n_obs = n_obs[self.mask]
return n_obs
## Function transform_action: convert the output of the raw_action into the
## denormalized values to be used in the simulation environment.
## increase the SL in the sec_margin% and decrease the TP in the same %margin, volume is also reduced in the %margin
def transform_action(self, order_status):
# order_status: 0 nop, -1=sell,1=buy
# the variable self.raw_action contains the output of decide_next_action, which is an array of 3 values, MACD signal return, RSI return and MACD main - signal >0?
# the output actions are: 0=TP,1=SL,2=volume(dInv).
# if there is no opened order
act = []
# initialize values for next order , dir: 1=buy, -1=sell, 0=nop
dire = 0.0
tp = 1.0
tp_a = tp
sl = 1.0
vol = 1.0
action_diff = self.raw_action[self.test_action] - self.action_prev[self.test_action]
# TODO: if there is an opened order, increases de duration counter, else set it to 0
if (order_status==0):
self.duration = 0
else:
self.duration = self.duration + 1
# TODO: add min_duration constraint to evaluate if closing an open order with an action
# if there is no opened order
if order_status == 0:
# si profit_buy > 0.2*1500 y DDbuy < 0.6*1500 y el DDbuy < DDsell compra, sino vende
if (self.raw_action[8] > 0):
# opens buy order
dire = 1.0
#tp_a = abs(self.raw_action[0]-self.raw_action[3])
tp_a = 0.3
# si profit_sell > 0.2*1500if (self.raw_action[6] < 0): y DDsell < 0.6*1500 y el DDsell < DDbuy compra, sino vende
if (self.raw_action[8] < 0):
# opens sell order
dire = -1.0
#tp_a = abs(self.raw_action[3]-self.raw_action[0])
tp_a = 0.3
# if there is an existing buy order
if (order_status == 1) and (self.duration > self.min_duration):
# si action[0] == 0 cierra orden de buy
if (self.raw_action[8] < 0) :
# closes buy order
dire = -1.0
# if there is an existing sell order
if (order_status == -1) and (self.duration > self.min_duration):
# if action[0]>0, closes the sell order
if (self.raw_action[8] > 0) :
# closes sell order
dire = 1.0
# verify limits of sl and tp, TODO: quitar cuando estén desde fórmula
sl_a = 1.0
# Create the action list output [tp, sl, vol, dir]
act.append(tp_a)
# TODO: en el simulador, implmeentar min_tp ysl
act.append(sl_a)
act.append(vol)
act.append(dire)
return act
## Evaluate all the steps on the simulation choosing in each step the best
## action, given the observations per tick.
## \returns the final balance and the cummulative reward
# Posssible actions:
# 0 = Buy/CloseSell/nopCloseBuy
# 1 = Sell/CloseBuy/nopCloseSell
# 2 = No Open Buy
# 3 = No Open Sell
def evaluate(self, max_ticks):
# calculate the validation set score
hist_scores = []
observation = self.env_v.reset()
#print("observation = ", observation)
normalized_observation = agent.normalize_observation(observation, observation)
#print("normalized_observation = ", normalized_observation)
score = 0.0
step = 0
order_status=0
equity=[]
balance=[]
while 1:
step += 1
# si el step > 2, hacce el resto, sono usa vector e zeros como accion
self.raw_action = self.decide_next_action(normalized_observation)
action = self.transform_action(order_status)
observation_prev = observation.copy()
#if step > 1:
# print("a=", action, " order_status=",info['order_status'], " num_closes=", info['num_closes']," balance=",info['balance'], " equity=", info['equity'])
if (step < 3 ) or (step > (self.vs_num_ticks-self.obsticks)):
print ("Skippig limits, step = ", step)
# action = nop
action = []
# initialize values for next order , dir: 1=buy, -1=sell, 0=nop
dire = 0.0
tp = 1.0
sl = 1.0
vol = 1.0
# Create the action list output [tp, sl, vol, dir]
action.append(tp)
action.append(sl)
action.append(vol)
action.append(dire)
observation, reward, done, info = self.env_v.step(action)
order_status=info['order_status']
equity.append(info['equity'])
balance.append(info['balance'])
# TODO: Hacer gráfico de balance y equity
normalized_observation = self.normalize_observation(observation, observation_prev)
score += reward
#env_v.render()
if done or (step > max_ticks):
break
lw = 2
y_rbf = balance
y_v = equity
x_seq = list(range(0, len(balance)))
fig=plt.figure()
plt.plot(x_seq, y_v, color='darkorange', label='Equity')
plt.plot(x_seq, y_rbf, color='navy', lw=lw, label='Balance')
plt.xlabel('tick')
plt.ylabel('value')
plt.title('Performance')
plt.legend()
fig.savefig('agent_test_520.png')
#plt.show()
hist_scores.append(score)
avg_score = sum(hist_scores) / len(hist_scores)
print("Validation Set Score = ", avg_score)
print("*********************************************************")
return info['balance'], avg_score
def show_results(self):
test=0
# main function
if __name__ == '__main__':
agent = QAgent()
#agent.svr_rbf = agent.set_dcn_model()
agent.load_action_models()
scores = []
balances = []
for i in range(0, 1):
print("Testing signal ",8+i)
agent.test_action = i
agent.load_action_models()
balance,score = agent.evaluate(6000)
scores.append(score)
balances.append(balance)
print("Results:")
for i in range(0, 1):
print("Signal ", 8+i, " balance=",balances[i], " score=",scores[i])