- ## Escolha do modelo
- ## Treinamento do modelo
- ## Teste do modelo
- ## Armazenamento do modelo

In [69]:
# maximiza nro de linhas e colunas para exibição
# inibe mensagens de warning
import pandas as pd
pd.set_option('display.max_rows', None) # permite a máxima visualização das linhas em um display
pd.set_option('display.max_columns', None) # permite a máxima visualização das colunas em um display
import warnings
warnings.simplefilter('ignore') # inibe a exibição de avisos de warning
warnings.filterwarnings('ignore') # inibe a exibição de avisos de warning

In [70]:
# modelos de aprendizado por reforço
import keras 
from keras import layers, models, optimizers 
from keras import backend as K 
from collections import namedtuple, deque 
from keras.models import Sequential 
from keras.models import load_model 
from keras.layers import Dense 
from keras.optimizers import Adam

In [71]:
# preparação e visualização de dados
import numpy as np # computação de matrizes
from matplotlib import pyplot as plt # plotagem gráfica
import seaborn as sns # plotagem gráfica
import matplotlib.ticker as ticker  # plotagem gráfica

import datetime 
import math
import random
from numpy.random import choice 
from collections import deque

In [72]:
# carregando arquivo tratado em dataframe
df_soy = pd.read_csv('df_treated/df_soy_treated.csv')

In [73]:
# conferindo a dimensão do dataframe, linhas e colunas
df_soy.shape

(9276, 8)

In [74]:
# ordenando pelo index o dataframe carregado
df_soy = df_soy.sort_index()

In [75]:
# reconhecendo as primeiras linhas do dataframe
df_soy.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Open_Interest,Year
0,1999-01-04,567.0,570.0,566.0,569.0,14.0,312.0,1999
1,1999-01-05,570.5,574.0,570.5,573.5,12.0,323.0,1999
2,1999-01-06,574.0,579.0,574.0,577.0,43.0,329.0,1999
3,1999-01-07,574.5,575.75,574.0,574.25,69.0,348.0,1999
4,1999-01-08,579.0,581.0,579.0,580.5,61.0,345.0,1999


In [76]:
# verificando a posição de cada variável
df_soy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9276 entries, 0 to 9275
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           9276 non-null   object 
 1   Open           9276 non-null   float64
 2   High           9276 non-null   float64
 3   Low            9276 non-null   float64
 4   Close          9276 non-null   float64
 5   Volume         9276 non-null   float64
 6   Open_Interest  9276 non-null   float64
 7   Year           9276 non-null   int64  
dtypes: float64(6), int64(1), object(1)
memory usage: 579.9+ KB


In [77]:
df_soy['Date'] = df_soy['Date'].astype('datetime64[ns]')

In [78]:
X=list(df_soy['Close'])

In [79]:
X=[float(x) for x in X] 
validation_size = 0.2 
train_size = int(len(X) * (1-validation_size)) 
X_train, X_test = X[0:train_size], X[train_size:len(X)]

In [80]:
print(len(X_train))
print(len(X_test))

7420
1856


In [81]:
class Agent: 
    def __init__(self, state_size, is_eval=False, model_name=''): 
        self.state_size = state_size # dias anteriores normalizados 
        self.action_size = 3 # hold, compra, venda 
        self.memory = deque(maxlen=1000) 
        self.inventory = [] 
        self.model_name = model_name 
        self.is_eval = is_eval 
        self.gamma = 0.95
        self.epsilon = 1.0 
        self.epsilon_min = 0.01 
        self.epsilon_decay = 0.995 
        self.model = load_model('models/' + model_name) if is_eval else self._model()

    def _model(self): 
        model = Sequential() 
        model.add(Dense(units=64, input_dim=self.state_size, activation='relu')) 
        model.add(Dense(units=32, activation='relu')) 
        model.add(Dense(units=8, activation='relu')) 
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001)) 
        return model     

    def act(self, state): 
        if not self.is_eval and random.random() <= self.epsilon: 
            return random.randrange(self.action_size)

        options = self.model.predict(state) 
        return np.argmax(options[0])

    def expReplay(self, batch_size):
        mini_batch = [] 
        l = len(self.memory) 

        #1: preparar a replay memory 
        for i in range(l - batch_size + 1, l): 
            mini_batch.append(self.memory[i]) 

        #2: Fazer loop em todo o lote de replay. 
        for state, action, reward, next_state, done in mini_batch: 
            target = reward # recompensa ou Q no tempo t 
                
            #3: atualizar o alvo para tabela Q. equação de tabela
            if not done: 
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0]) #set_trace() 
            
            #4: Valor Q do estado atual a partir da tabela 
            target_f = self.model.predict(state) 
            
            #5: Atualizar a tabela Q de saída para a ação dada na tabela 
            target_f[0][action] = target 
            
            #6. Treinar e ajustar o modelo 
            self.model.fit(state, target_f, epochs=1, verbose=0)

            #7. Implementar algoritmo epsilon greedy 
            if self.epsilon > self.epsilon_min: 
                self.epsilon *= self.epsilon_decay

In [82]:
def getState(data, t, n): 
    d = t - n + 1 
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1]

    res = [] 
    for i in range(n - 1): 
        res.append(sigmoid(block[i + 1] - block[i])) 
        
    return np.array([res])

In [83]:
def plot_behavior(data_input, states_buy, states_sell, profit):
    fig = plt.figure(figsize = (15, 5)) 
    plt.plot(data_input, color='r', lw=2.) 
    plt.plot(data_input, '^', markersize=10, color='m', label='Buying signal', markevery=states_buy) 
    plt.plot(data_input, 'v', markersize=10, color='k', label='Selling signal', markevery = states_sell) 
    plt.title('Total gains: %f'%(profit)) 
    plt.legend() 
    plt.show()

In [84]:
window_size = 1 
agent = Agent(window_size)
l = len(data) - 1 
batch_size = 10 
states_sell = [] 
states_buy = [] 
episode_count = 3 

for e in range(episode_count + 1): 
    print('Episode ' + str(e) + '/' + str(episode_count)) 
    
    # 1-obter estado 
    state = getState(data, 0, window_size + 1) 
    total_profit = 0 
    agent.inventory = []

    for t in range(l): 
        # 2-aplicar a melhor ação 
        action = agent.act(state) 
        
        # sit 
        next_state = getState(data, t + 1, window_size + 1) 
        reward = 0 
        if action == 1: # compra 
            states_buy.append(t) 
            print('Buy: ' + formatPrice(data[t])) 

        elif action == 2 and len(agent.inventory) > 0: # venda
            bought_price = agent.inventory.pop(0) 
            
        #3: Obter Recompensa 
        reward = max(data[t] - bought_price, 0) 
        total_profit += data[t] - bought_price 
        states_sell.append(t) 
        print('Sell: ' + formatPrice(data[t]) + ' | Profit: ' + formatPrice(data[t] - bought_price))

        done = True if t == l - 1 else False 
        
        # 4: Obter próximo estado a ser usado na equação de Bellman 
        next_state = getState(data, t + 1, window_size + 1) 
        
        # 5: Acrescentar à memória 
        agent.memory.append((state, action, reward, next_state, done)) 
        state = next_state 
        if done: 
            print('--------------------------------')
            print('Total Profit: ' + formatPrice(total_profit)) 
            print('--------------------------------') 
    
        # 6: Executar função replay buffer 
        if len(agent.memory) > batch_size: 
            agent.expReplay(batch_size) 

        if e % 10 == 0: 
            agent.model.save('models/model_ep' + str(e))

ValueError: Argument(s) not recognized: {'lr': 0.001}

In [None]:
#O agente já está definido no conjunto de teste precedente. 
test_data = X_test 
l_test = len(test_data) - 1 
state = getState(test_data, 0, window_size + 1) 
total_profit = 0 
is_eval = True 
done = False 
states_sell_test = [] 
states_buy_test = [] 
model_name = 'model_ep10'
agent = Agent(window_size, is_eval, model_name) 
state = getState(data, 0, window_size + 1) 
total_profit = 0 
agent.inventory = [] 

for t in range(l_test): 
    action = agent.act(state) 
    next_state = getState(test_data, t + 1, window_size + 1) 
    reward = 0 

    if action == 1:
        agent.inventory.append(test_data[t]) 
        print('Buy: ' + formatPrice(test_data[t])) 

    elif action == 2 and len(agent.inventory) > 0: 
        bought_price = agent.inventory.pop(0) 
        reward = max(test_data[t] - bought_price, 0) 
        total_profit += test_data[t] - bought_price
        print('Sell: ' + formatPrice(test_data[t]) + ' | profit: ' + formatPrice(test_data[t] - bought_price)) 
    
    if t == l_test - 1: 
        done = True 
        agent.memory.append((state, action, reward, next_state, done)) 
        state = next_state 

if done: 
    print('------------------------------------------')
    print('Total Profit: ' + formatPrice(total_profit)) 
    print('------------------------------------------')

In [None]:
# salvando o modelo de geração de clusters em arquivo .pkl
#import joblib
#joblib.dump(k_means, "model/kmeans_card_holder.pkl")