In [1]:
"""
Robô de Treinamento
"""
import pandas as pd
import numpy as np
from env_rand_train import ArmEnv
from rl_robot_treino import DDPG

MAX_EPISODES = 500 # 2000 para 1 eixo 
MAX_EP_STEPS = 50 #
ON_TRAIN = True

# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method (continuous)
rl = DDPG(a_dim, s_dim, a_bound) # Aqui definimos o algoritmo DDPG como algorítmo de treino

steps = []
def train():
    # start training
    if(1):
        s = env.reset() # faz o robô voltar para a situação inicial no começo de todos os episódios
        ep_r = 0.
        for j in range(MAX_EP_STEPS): # em cada passo a memória é enchida com possíveis situações dada pela rede "choose_action(s)"
                                       # essas ações são os passos de um episódio
            env.render()                 

            a = rl.choose_action(s) # faz a rede neural escolher uma ação dado o atual estado 's', 
                                       #no início ela não está treinada e escolhe ações pouco eficazes
                                    # 'a' é a ação, que são os ângulos dos dois braços, como mostrado na função 'env.step'
                                      #o valor de 'a' é entre -1 e 1, que simboliza -2*pi e + 2*pi em radianos (-360° e +360°)   

            s_, r, done = env.step(a) # a ação gera um novo estado 's_' e uma nova recompensa 'r' 

            

            rl.store_transition(s, a, r, s_)

            ep_r += r
            if rl.memory_full:
                # start to learn once has fulfilled the memory
                rl.learn()
            
            s_ant = s
            s = s_
            if done or j == MAX_EP_STEPS-1: # vão acontecendo passos até que alcanse o objetivo ou o número máximo de passos por episódio
                print('Ep: %i | %s | ep_r: %.1f | step: %i ' % (i, '---' if not done else 'done', ep_r, j))
                print("angulos")
                print(a)
                break  #interrompe o  laço "for", assim finaliza um episódio
    rl.save()
    return s_ant, a, s_


def eval(quant):
    rl.restore()
    env.render()
    env.viewer.set_vsync(True)
    repeat = 0
    while repeat < quant:
        repeat = repeat + 1
        s = env.reset()
        for _ in range(200):
            env.render()
            a = rl.choose_action(s)
            s, r, done = env.step(a)
            if done:
                break


if ON_TRAIN:
    #train()
    dataset = [] # onde será gravado o conjunto de saídas
    for i in range(MAX_EPISODES):
        estado_ant, acao, estado = train()
        
        # Vamos armazenar acao e estado num banco de dados, não vamos usar o estado_anterior
        linha = []
        #for count in range (len(estado_ant)):
        #    linha.append(estado_ant[count])  
        for count in range (len(acao)):
            linha.append(acao[count])
        for count in range (len(estado)):
            linha.append(estado[count]) 
        dataset.append(linha)
    
    df = pd.DataFrame(dataset) # transformação do banco de dados em um dataframe para exportar para .csv
    df.to_csv('movimentos_robo.csv',  index=False)
    
else:
    eval(1)

#env.ajust_goal(80., 80., 10)
#eval(10)

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Ep: 0 | --- | ep_r: -20.2 | step: 49 
angulos
[-0.05776201 -0.05346316 -0.11585479]
Ep: 1 | --- | ep_r: -26.1 | step: 49 
angulos
[ 0.07742827 -0.12905774 -0.1794705 ]
Ep: 2 | --- | ep_r: -19.3 | step: 49 
angulos
[-0.109002   -0.19839424 -0.11876936]
Ep: 3 | --- | ep_r: -20.3 | step: 49 
angulos
[ 0.02124318 -0.16878375 -0.08447097]
Ep: 4 | --- | ep_r: -15.7 | step: 49 
angulos
[-0.07930022 -0.06261014 -0.12271255]
Ep: 5 | --- | ep_r: -15.7 | step: 49 
angulos
[-0.05876755 -0.04153832 -0.09353486]
Ep: 6 | --- | ep_r: -25.6 | step: 49 
angulos
[-0.04083178 -0.20828758 -0.1514398 ]
Ep: 7 | --- | ep_r: -19.8 | step: 49 
angulos
[-0.01798894 -0.00743047 -0.12955976]
Ep: 8 | --- | ep_r: -33.1 | step: 49 
angulos
[-0.11758671 -0.14651334 -0.11003387]
Ep: 9 | --- | ep_r: 

Ep: 87 | --- | ep_r: -14.7 | step: 49 
angulos
[ 0.09044699 -0.11918443 -0.15769023]
Ep: 88 | --- | ep_r: -30.5 | step: 49 
angulos
[-0.06416152 -0.04857209 -0.11635195]
Ep: 89 | --- | ep_r: -23.4 | step: 49 
angulos
[ 0.06907754 -0.14314497 -0.18598826]
Ep: 90 | --- | ep_r: -21.3 | step: 49 
angulos
[ 0.04581018 -0.08379665 -0.21718252]
Ep: 91 | --- | ep_r: -13.2 | step: 49 
angulos
[ 0.00094753 -0.07111011 -0.10984938]
Ep: 92 | --- | ep_r: -29.6 | step: 49 
angulos
[-0.02583892 -0.06980904 -0.10397501]
Ep: 93 | --- | ep_r: -33.1 | step: 49 
angulos
[ 0.07285726 -0.07601846 -0.22602716]
Ep: 94 | --- | ep_r: -26.8 | step: 49 
angulos
[ 0.04333501 -0.02478652 -0.1837213 ]
Ep: 95 | --- | ep_r: -6.6 | step: 49 
angulos
[-0.09625617 -0.04579727 -0.09250902]
Ep: 96 | --- | ep_r: -23.1 | step: 49 
angulos
[-0.03812621  0.02870855 -0.07655983]
Ep: 97 | --- | ep_r: -32.2 | step: 49 
angulos
[-0.05589511 -0.23231521 -0.23334545]
Ep: 98 | --- | ep_r: -22.3 | step: 49 
angulos
[-0.04751803 -0.035

Ep: 183 | --- | ep_r: -37.3 | step: 49 
angulos
[ 0.01039825 -0.09961263 -0.14982665]
Ep: 184 | --- | ep_r: -9.6 | step: 49 
angulos
[ 0.00395837 -0.02100655 -0.07651703]
Ep: 185 | --- | ep_r: -17.6 | step: 49 
angulos
[-0.01106554 -0.24462119 -0.1881686 ]
Ep: 186 | --- | ep_r: -35.6 | step: 49 
angulos
[ 0.02504719 -0.00913883 -0.1541361 ]
Ep: 187 | --- | ep_r: -22.8 | step: 49 
angulos
[ 0.05012689 -0.04423929 -0.17985493]
Ep: 188 | --- | ep_r: -36.5 | step: 49 
angulos
[ 0.06440519 -0.07266685 -0.17761084]
Ep: 189 | --- | ep_r: -14.3 | step: 49 
angulos
[-0.02323117 -0.19942158 -0.11623227]
Ep: 190 | --- | ep_r: -28.4 | step: 49 
angulos
[-0.10908882 -0.20186146 -0.0933746 ]
Ep: 191 | --- | ep_r: -28.5 | step: 49 
angulos
[-0.11985001 -0.11087796 -0.11785649]
Ep: 192 | --- | ep_r: -39.8 | step: 49 
angulos
[-0.0006027  -0.05432439 -0.14122273]
Ep: 193 | --- | ep_r: -35.5 | step: 49 
angulos
[ 0.10730282 -0.02055241 -0.20933571]
Ep: 194 | --- | ep_r: -5.5 | step: 49 
angulos
[ 0.0053

Ep: 279 | --- | ep_r: -27.2 | step: 49 
angulos
[-0.06557354 -0.14338583 -0.147551  ]
Ep: 280 | --- | ep_r: -34.4 | step: 49 
angulos
[ 0.08863387 -0.13474141 -0.28473356]
Ep: 281 | --- | ep_r: -29.3 | step: 49 
angulos
[ 0.03079792 -0.02552372 -0.11926807]
Ep: 282 | --- | ep_r: -13.0 | step: 49 
angulos
[-0.0262109   0.03492664 -0.12845218]
Ep: 283 | --- | ep_r: -29.7 | step: 49 
angulos
[-0.12882568 -0.19938974 -0.15005822]
Ep: 284 | --- | ep_r: -16.4 | step: 49 
angulos
[-0.07889185 -0.02702276 -0.09639189]
Ep: 285 | --- | ep_r: -23.1 | step: 49 
angulos
[ 0.0226669  -0.1720132  -0.27849877]
Ep: 286 | --- | ep_r: -12.2 | step: 49 
angulos
[-0.07042023 -0.00571459 -0.1442874 ]
Ep: 287 | --- | ep_r: -34.2 | step: 49 
angulos
[ 0.12371463 -0.060506   -0.21250895]
Ep: 288 | --- | ep_r: -10.1 | step: 49 
angulos
[ 0.01784354 -0.2099686  -0.09326012]
Ep: 289 | --- | ep_r: -35.5 | step: 49 
angulos
[ 0.0950004  -0.12390579 -0.28903133]
Ep: 290 | --- | ep_r: -13.0 | step: 49 
angulos
[ 0.03

Ep: 374 | --- | ep_r: -16.9 | step: 49 
angulos
[-0.13154991 -0.16183855 -0.10399814]
Ep: 375 | --- | ep_r: -31.9 | step: 49 
angulos
[-0.18435767 -0.2645908  -0.1672127 ]
Ep: 376 | --- | ep_r: -24.7 | step: 49 
angulos
[-0.01683936 -0.27923602 -0.18847562]
Ep: 377 | --- | ep_r: -8.6 | step: 49 
angulos
[-0.01045062 -0.1103958  -0.07876289]
Ep: 378 | --- | ep_r: -26.3 | step: 49 
angulos
[ 0.0649872  -0.08068229 -0.21708767]
Ep: 379 | --- | ep_r: -36.5 | step: 49 
angulos
[-0.14817135 -0.32145685 -0.2465305 ]
Ep: 380 | --- | ep_r: -29.1 | step: 49 
angulos
[-0.09681111 -0.27262843 -0.09925652]
Ep: 381 | --- | ep_r: -35.5 | step: 49 
angulos
[-0.11531704 -0.25243384 -0.1456829 ]
Ep: 382 | --- | ep_r: -38.7 | step: 49 
angulos
[-0.167409   -0.2579239  -0.08686324]
Ep: 383 | --- | ep_r: -18.0 | step: 49 
angulos
[-0.05715602 -0.12911965 -0.11742747]
Ep: 384 | --- | ep_r: -30.3 | step: 49 
angulos
[ 0.05921328 -0.1302194  -0.27839145]
Ep: 385 | --- | ep_r: -31.2 | step: 49 
angulos
[-0.155

Ep: 469 | --- | ep_r: -18.1 | step: 49 
angulos
[-0.03382892 -0.01766545 -0.06092639]
Ep: 470 | --- | ep_r: -15.5 | step: 49 
angulos
[ 0.0256     -0.03107806 -0.15193461]
Ep: 471 | --- | ep_r: -5.6 | step: 49 
angulos
[-0.0200002  -0.07632653 -0.06425156]
Ep: 472 | --- | ep_r: -48.3 | step: 49 
angulos
[ 0.03066181 -0.04960355 -0.12023094]
Ep: 473 | --- | ep_r: -8.9 | step: 49 
angulos
[ 0.00323647 -0.13807322 -0.11040638]
Ep: 474 | --- | ep_r: -14.7 | step: 49 
angulos
[ 0.00766475 -0.2064218  -0.1319489 ]
Ep: 475 | --- | ep_r: -18.5 | step: 49 
angulos
[-0.06331098 -0.25613612 -0.1551266 ]
Ep: 476 | --- | ep_r: -28.1 | step: 49 
angulos
[ 0.06332029 -0.08788317 -0.25461677]
Ep: 477 | --- | ep_r: -13.6 | step: 49 
angulos
[ 0.044965   -0.02326122 -0.06902198]
Ep: 478 | --- | ep_r: -21.6 | step: 49 
angulos
[-0.06650279 -0.26980132 -0.14362332]
Ep: 479 | --- | ep_r: -18.5 | step: 49 
angulos
[ 0.03404568 -0.09034586 -0.16462944]
Ep: 480 | --- | ep_r: -29.4 | step: 49 
angulos
[-0.0572

In [3]:
# Compile aqui para saber o tamanho de cada entrada do banco de dados
# Assim sabemos que as 3 primeiras entradas são as ações,
# Os próximos 13 são as variáveis de estado decorrente dessa ação
print(len(acao))
print(len(estado))

3
13


In [4]:
# compile aqui para ver o banco de dados gerados
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.005262,0.037308,-0.080550,0.571525,0.340284,0.454683,0.210004,0.529480,0.143630,-0.057977,-0.314755,0.058865,-0.184475,-0.015932,-0.118102,0.0
1,-0.008912,0.016800,-0.124238,0.586047,0.347616,0.454168,0.462651,0.542761,0.416269,-0.003054,-0.075130,0.128824,-0.190165,0.040231,-0.143784,0.0
2,-0.060602,-0.100864,-0.097909,0.347806,0.413618,0.186067,0.480442,0.195431,0.380882,-0.227643,0.511661,-0.065905,0.444836,-0.075269,0.544396,0.0
3,-0.116682,0.097324,-0.095517,0.331037,0.545568,0.276000,0.711688,0.182657,0.675811,0.543273,0.299038,0.598310,0.132918,0.691652,0.168795,0.0
4,-0.103070,-0.184455,-0.159284,0.389815,0.635957,0.525715,0.525702,0.565425,0.433925,-0.360806,0.284448,-0.496706,0.394703,-0.536416,0.486480,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,-0.667946,-0.859938,0.152722,0.507261,0.674849,0.681191,0.655524,0.781015,0.649609,0.209013,-0.006943,0.035084,0.012381,-0.064741,0.018297,0.0
4996,-0.323809,0.933045,-0.173489,0.541120,0.329900,0.599944,0.165082,0.568324,0.259952,-0.009842,-0.012532,-0.068666,0.152286,-0.037046,0.057416,0.0
4997,-0.125873,0.063980,0.515833,0.501171,0.674996,0.524453,0.501552,0.432483,0.462289,-0.053673,-0.186125,-0.076955,-0.012681,0.015015,0.026582,0.0
4998,-0.002695,-0.080467,-0.034134,0.545057,0.669100,0.718278,0.644212,0.658248,0.564234,0.103613,-0.121282,-0.069609,-0.096394,-0.009579,-0.016416,1.0
