In [1]:
import sys

from powertac_wm import PowerTAC_WM
import gym
import datetime
import json
import numpy as np
import pandas as pd

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from ReplayBuffer import ReplayBuffer
from ActorNetwork import ActorNetwork
from CriticNetwork import CriticNetwork
from OU import OU

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
model_storage_path = "ddpg_v1.0" 

In [3]:
BUFFER_SIZE = 100000
BATCH_SIZE = 64
EPOCHS = 1000
GAMMA = 0.99
TAU = 0.001     #Target Network HyperParameters
LRA = 0.0001    #Learning rate for Actor
LRC = 0.001     #Lerning rate for Critic

'''
Action : [
           limitprice1 belongs to R
           limitprice2 belongs to R
         ] (TWO output nuerons)
'''
action_dim = 2

'''
State : [
          Proximity (24)
          Required_Quantity (1)
        ] (total 25 input nuerons)
'''
state_dim = 25

np.random.seed(1337)
EXPLORE = 100000.0

step = 0
epsilon = 1

ou = OU()       #Ornstein-Uhlenbeck Process

In [4]:
config = tf.ConfigProto(
    device_count={'GPU': 1},
    intra_op_parallelism_threads=1,
    allow_soft_placement=True
)

config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.6

session = tf.Session(config=config)

In [5]:
actor = ActorNetwork(session, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
critic = CriticNetwork(session, state_dim, action_dim, BATCH_SIZE, TAU, LRC)

data_storage_path = "/mnt/d/PowerTAC/PowerTAC2021/training_scripts/ddpg_data_scripts_and_models/data/new_with_two_limit_prices_2609/similar_to_ddpg_v1"
replay_buffer = pd.read_csv(data_storage_path + '/replay_buffer_2.csv', header=None)


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 25)]              0         
_________________________________________________________________
dense (Dense)                (None, 400)               10400     
_________________________________________________________________
dense_1 (Dense)              (None, 300)               120300    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 602       
_________________________________________________________________
concatenate (Concatenate)    (None, 2)                 0         
Total params: 131,302
Trainable params: 131,302
Non-trainable params: 0
_________________________________________________________________
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output S

In [6]:
replay_buffer.shape

(9705, 54)

In [7]:
def train_ddpg_network():

    for epoch in range(EPOCHS):

        print("Epoch ", (epoch+1))
        print("-"*12)
        loss = 0

        #Do the batch update
        batch = replay_buffer.sample(n=BATCH_SIZE)
        states = np.asarray(batch[batch.columns[0:25]])
        actions = np.asarray(batch[batch.columns[25:27]])
        rewards = np.asarray(batch[batch.columns[27:28]])
        new_states = np.asarray(batch[batch.columns[28:53]])
        terminals = np.asarray(batch[batch.columns[53:54]])

        y_t = np.zeros([BATCH_SIZE,1])

        print("States", states.shape)
        print("Actions", actions.shape)
        print("Rewards", rewards.shape)
        print("New_States", new_states.shape)

        with session.as_default():
            with session.graph.as_default():

                target_q_values = critic.target_model.predict([new_states, actor.target_model.predict(new_states)])
                # print("Target_Q_Values", target_q_values.shape)

                for k in range(BATCH_SIZE):
                    if terminals[k] == 1:
                        y_t[k] = rewards[k]
                    else:
                        y_t[k] = rewards[k] + GAMMA*target_q_values[k]

                # print("Bellman Rewards", y_t)
                loss += critic.model.train_on_batch([states,actions], y_t)
                print("Loss", loss)
                a_for_grad = actor.model.predict(states)      # This may not be required, a_for_grad should be replaced by actions ##### Check PENDING #####
                # print("a_for_grad", a_for_grad)
                grads = critic.gradients(states, a_for_grad)       # a_for_grad is replaced by actions ##### Check PENDING #####   shape ERROR 
                # print("grads", grads)
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()

    print("Training Completed !!!")

In [8]:
train_ddpg_network()

Epoch  1
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)




Loss 1.8216791152954102
Epoch  2
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4444083571434021
Epoch  3
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 1.44011390209198
Epoch  4
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.641099214553833
Epoch  5
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.36705130338668823
Epoch  6
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6175764203071594
Epoch  7
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5000836253166199
Epoch  8
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.9979690313339233
Epoch  9
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.26035207509994507
Epoch  10
------------
States (64, 25)
Actions (64, 2)
Rewards (6

Epoch  83
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.7081607580184937
Epoch  84
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.42507004737854004
Epoch  85
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 1.074671983718872
Epoch  86
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2269120216369629
Epoch  87
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.8272923231124878
Epoch  88
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2480745017528534
Epoch  89
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.1680234670639038
Epoch  90
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.1774970293045044
Epoch  91
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_State

Epoch  163
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6264951825141907
Epoch  164
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.22143356502056122
Epoch  165
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.23120486736297607
Epoch  166
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.1638208031654358
Epoch  167
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.20328889787197113
Epoch  168
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.25008171796798706
Epoch  169
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.29731523990631104
Epoch  170
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5678235292434692
Epoch  171
------------
States (64, 25)
Actions (64, 2)
Rewards (64

Rewards (64, 1)
New_States (64, 25)
Loss 0.42813122272491455
Epoch  244
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.20885469019412994
Epoch  245
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.17837688326835632
Epoch  246
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4078257381916046
Epoch  247
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5857089161872864
Epoch  248
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4898951053619385
Epoch  249
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2579661011695862
Epoch  250
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.22681796550750732
Epoch  251
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.19305111467838287
Epoch  

Epoch  322
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16425064206123352
Epoch  323
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.31643813848495483
Epoch  324
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.23799468576908112
Epoch  325
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3505084812641144
Epoch  326
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.22382541000843048
Epoch  327
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3670441806316376
Epoch  328
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 1.3675105571746826
Epoch  329
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.20430797338485718
Epoch  330
------------
States (64, 25)
Actions (64, 2)
Rewards (64

States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6923229098320007
Epoch  402
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.40123075246810913
Epoch  403
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.10628609359264374
Epoch  404
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.18234111368656158
Epoch  405
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16846802830696106
Epoch  406
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5292630195617676
Epoch  407
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.11003819108009338
Epoch  408
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.18880608677864075
Epoch  409
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25

Epoch  483
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2874706983566284
Epoch  484
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.35887256264686584
Epoch  485
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.517349123954773
Epoch  486
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.32926905155181885
Epoch  487
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3199025094509125
Epoch  488
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.43118607997894287
Epoch  489
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3884059190750122
Epoch  490
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.17797507345676422
Epoch  491
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 

Loss 0.370097815990448
Epoch  557
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.21330168843269348
Epoch  558
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16214029490947723
Epoch  559
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.26798418164253235
Epoch  560
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.28836917877197266
Epoch  561
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5311099290847778
Epoch  562
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6832165718078613
Epoch  563
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.48634257912635803
Epoch  564
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5512168407440186
Epoch  565
------------
States (64, 25)
Acti

States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2545276880264282
Epoch  629
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3891979157924652
Epoch  630
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2271498143672943
Epoch  631
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.11920234560966492
Epoch  632
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6419132947921753
Epoch  633
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.7046727538108826
Epoch  634
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.14754855632781982
Epoch  635
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.15626613795757294
Epoch  636
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
L

New_States (64, 25)
Loss 0.8612967729568481
Epoch  704
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.1487676352262497
Epoch  705
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.18063604831695557
Epoch  706
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.46532005071640015
Epoch  707
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.13098768889904022
Epoch  708
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.12987057864665985
Epoch  709
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.5584983825683594
Epoch  710
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.33529719710350037
Epoch  711
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.21424150466918945
Epoch  712
-----------

Epoch  776
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.521591067314148
Epoch  777
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.316156804561615
Epoch  778
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.36351925134658813
Epoch  779
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.41173988580703735
Epoch  780
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.24422402679920197
Epoch  781
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16948874294757843
Epoch  782
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.1615832895040512
Epoch  783
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.12300468981266022
Epoch  784
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 

Epoch  849
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.06984139233827591
Epoch  850
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.6649881601333618
Epoch  851
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 1.0181169509887695
Epoch  852
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4400942325592041
Epoch  853
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4995405375957489
Epoch  854
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4039541482925415
Epoch  855
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.20821887254714966
Epoch  856
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.35998302698135376
Epoch  857
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 

Epoch  920
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.3737969398498535
Epoch  921
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.262737512588501
Epoch  922
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2438613772392273
Epoch  923
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4417712986469269
Epoch  924
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.4037206768989563
Epoch  925
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.15763995051383972
Epoch  926
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16240108013153076
Epoch  927
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.37034744024276733
Epoch  928
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1

Loss 0.40052780508995056
Epoch  991
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.7892831563949585
Epoch  992
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.10810455679893494
Epoch  993
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.22031819820404053
Epoch  994
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.7905285358428955
Epoch  995
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.2366894781589508
Epoch  996
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.10051640123128891
Epoch  997
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.16030815243721008
Epoch  998
------------
States (64, 25)
Actions (64, 2)
Rewards (64, 1)
New_States (64, 25)
Loss 0.23728100955486298
Epoch  999
------------
States (64, 25)
Ac

In [9]:
def choose_Action(states):

        actions = list()

        for state in states:

            try:

                # self.epsilon -= 1.0 / self.EXPLORE
                # a_t = np.zeros([self.action_dim])
                # noise_t = np.zeros([self.action_dim])

                with session.as_default():
                    with session.graph.as_default():

                        a_t_original = actor.model.predict(state.reshape(1, state.shape[0]))[0].tolist()
                        # noise_t[0] = max(self.epsilon, 0) * self.ou.function(a_t_original[0],  0.0 , 0.60, 0.30)  # decide theta, sigma and mu for limitprice

                        # a_t[0] = a_t_original[0] + noise_t[0]
                        # a_t[1] = a_t_original[1] + noise_t[1]

                        # print(a_t_original)
                        actions.append(list(a_t_original))

            except Exception as e:
                print(e)

        return actions

In [10]:
batch = replay_buffer.sample(n=BATCH_SIZE)
states = np.asarray(batch[batch.columns[0:25]])

lps = choose_Action(states)

In [11]:
for lp in lps:
    print(lp)

[-1.9986138343811035, -1.998808741569519]
[-1.9995125532150269, -1.9998949766159058]
[-1.999644160270691, -1.9999266862869263]
[-1.999294400215149, -1.9998599290847778]
[-1.9997283220291138, -1.9999641180038452]
[-1.9985324144363403, -1.9987252950668335]
[-1.9987373352050781, -1.9989328384399414]
[-1.998441219329834, -1.9995189905166626]
[-1.997105360031128, -1.998976469039917]
[-1.9982918500900269, -1.998471975326538]
[-1.998637080192566, -1.9995906352996826]
[-1.9989433288574219, -1.9997400045394897]
[-1.997590184211731, -1.9991861581802368]
[-1.997422695159912, -1.9991159439086914]
[-1.9976328611373901, -1.9993222951889038]
[-1.9955360889434814, -1.9982162714004517]
[-1.9997235536575317, -1.9999449253082275]
[-1.9991308450698853, -1.9993141889572144]
[-1.993714451789856, -1.9971733093261719]
[-1.9988806247711182, -1.9996758699417114]
[-1.999822974205017, -1.9999780654907227]
[-1.999625325202942, -1.9999216794967651]
[-1.9996508359909058, -1.9999260902404785]
[-1.9990684986114502, -1

In [12]:
def save_models():

        with session.as_default():
                with session.graph.as_default():        

                    timestamp = int(datetime.datetime.now().timestamp())

                    actor.model.save_weights(model_storage_path + "/actormodel.h5", overwrite=True)
                    with open(model_storage_path + "/actormodel.json", "w") as outfile:
                        json.dump(actor.model.to_json(), outfile)

                    critic.model.save_weights(model_storage_path + "/criticmodel.h5", overwrite=True)
                    with open(model_storage_path + "/criticmodel.json", "w") as outfile:
                        json.dump(critic.model.to_json(), outfile)

                    actor.target_model.save_weights(model_storage_path + "/actortargetmodel.h5", overwrite=True)
                    with open(model_storage_path + "/actormodeltarget.json", "w") as outfile:
                        json.dump(actor.target_model.to_json(), outfile)

                    critic.target_model.save_weights(model_storage_path + "/critictargetmodel.h5", overwrite=True)
                    with open(model_storage_path + "/criticmodeltarget.json", "w") as outfile:
                        json.dump(critic.target_model.to_json(), outfile)

                    print("Models Saved Successfully !!!")


In [13]:
# save_models()