In [1]:
import sys

import json
import datetime
import numpy as np
import pandas as pd

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from ReplayBuffer import ReplayBuffer
from ActorNetwork import ActorNetwork
from CriticNetwork import CriticNetwork
from OU import OU

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
model_storage_path = "ddpg_v1.0" 

In [3]:
BUFFER_SIZE = 100000
BATCH_SIZE = 64
EPOCHS = 5000
GAMMA = 0.99
TAU = 0.001     #Target Network HyperParameters
LRA = 0.0001    #Learning rate for Actor
LRC = 0.001     #Lerning rate for Critic

'''
Action : [
           limitprice1 scaler belongs to [0,1]
           limitprice2 scaler belongs to [0,1]
         ] (Two output nuerons)
'''
action_dim = 2

'''
State : [
          Required_Quantity (1)
          True_Valuation (1)
        ] (total 2 input nuerons)
'''
state_dim = 2

np.random.seed(1337)
EXPLORE = 100000.0

step = 0
epsilon = 1

ou = OU()       #Ornstein-Uhlenbeck Process

In [4]:
config = tf.ConfigProto(
    device_count={'GPU': 1},
    intra_op_parallelism_threads=1,
    allow_soft_placement=True
)

config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.6

session = tf.Session(config=config)

In [5]:
actor = ActorNetwork(session, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
critic = CriticNetwork(session, state_dim, action_dim, BATCH_SIZE, TAU, LRC)

data_storage_path = "/mnt/d/PowerTAC/PowerTAC2021/experiments_scripts/powertac_simulator_py/ddpg_based_wholesale_strategy/"
replay_buffer = pd.read_csv(data_storage_path + 'temp.csv', header=None)


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense (Dense)                (None, 40)                120       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                1230      
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 62        
_________________________________________________________________
concatenate (Concatenate)    (None, 2)                 0         
Total params: 1,412
Trainable params: 1,412
Non-trainable params: 0
_________________________________________________________________
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape

In [6]:
replay_buffer.shape

(376223, 7)

In [7]:
def train_ddpg_network():

    for epoch in range(EPOCHS):

        print("Epoch ", (epoch+1))
        print("-"*12)
        loss = 0

        #Do the batch update
        batch = replay_buffer.sample(n=BATCH_SIZE)
        states = np.asarray(batch[batch.columns[0:2]])
        actions = np.asarray(batch[batch.columns[2:4]])
        rewards = np.asarray(batch[batch.columns[4:5]])
        new_states = np.asarray(batch[batch.columns[5:7]])

        y_t = np.zeros([BATCH_SIZE,1])

        # print("States", states.shape)
        # print("Actions", actions.shape)
        # print("Rewards", rewards.shape)
        # print("New_States", new_states.shape)

        with session.as_default():
            with session.graph.as_default():

                for k in range(BATCH_SIZE):
                    y_t[k] = rewards[k] - (new_states[k][0]*new_states[k][1])

                # print("Bellman Rewards", y_t)
                loss += critic.model.train_on_batch([states,actions], y_t)
                print("Loss", loss)
                a_for_grad = actor.model.predict(states)      # This may not be required, a_for_grad should be replaced by actions ##### Check PENDING #####
                # print("a_for_grad", a_for_grad)
                grads = critic.gradients(states, a_for_grad)       # a_for_grad is replaced by actions ##### Check PENDING #####   shape ERROR 
                # print("grads", grads)
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()

    print("Training Completed !!!")

In [None]:
train_ddpg_network()

Epoch  1
------------
Loss 1.2975274324417114
Epoch  2
------------
Loss 0.93156898021698
Epoch  3
------------
Loss 0.7287549376487732
Epoch  4
------------
Loss 0.6948959827423096
Epoch  5
------------




Loss 0.5113528966903687
Epoch  6
------------
Loss 0.5364248752593994
Epoch  7
------------
Loss 0.2978594899177551
Epoch  8
------------
Loss 0.42240965366363525
Epoch  9
------------
Loss 0.21699194610118866
Epoch  10
------------
Loss 0.2990318536758423
Epoch  11
------------
Loss 0.211356520652771
Epoch  12
------------
Loss 0.2357388734817505
Epoch  13
------------
Loss 0.15973283350467682
Epoch  14
------------
Loss 0.26458826661109924
Epoch  15
------------
Loss 0.22837628424167633
Epoch  16
------------
Loss 0.23910930752754211
Epoch  17
------------
Loss 0.24493926763534546
Epoch  18
------------
Loss 0.25910601019859314
Epoch  19
------------
Loss 0.23579534888267517
Epoch  20
------------
Loss 0.2618321180343628
Epoch  21
------------
Loss 0.22112751007080078
Epoch  22
------------
Loss 0.16777044534683228
Epoch  23
------------
Loss 0.24979639053344727
Epoch  24
------------
Loss 0.17133036255836487
Epoch  25
------------
Loss 0.2834542989730835
Epoch  26
------------
Loss 

Epoch  185
------------
Loss 0.0953773558139801
Epoch  186
------------
Loss 0.10956934839487076
Epoch  187
------------
Loss 0.16105583310127258
Epoch  188
------------
Loss 0.09212549030780792
Epoch  189
------------
Loss 0.11496633291244507
Epoch  190
------------
Loss 0.18823206424713135
Epoch  191
------------
Loss 0.09722422063350677
Epoch  192
------------
Loss 0.14311133325099945
Epoch  193
------------
Loss 0.11016042530536652
Epoch  194
------------
Loss 0.08982285857200623
Epoch  195
------------
Loss 0.11795488744974136
Epoch  196
------------
Loss 0.14237922430038452
Epoch  197
------------
Loss 0.11430568993091583
Epoch  198
------------
Loss 0.13618728518486023
Epoch  199
------------
Loss 0.13813312351703644
Epoch  200
------------
Loss 0.10485120117664337
Epoch  201
------------
Loss 0.1365942656993866
Epoch  202
------------
Loss 0.1440185159444809
Epoch  203
------------
Loss 0.12558020651340485
Epoch  204
------------
Loss 0.16602087020874023
Epoch  205
------------

Epoch  356
------------
Loss 0.1327025294303894
Epoch  357
------------
Loss 0.1892164647579193
Epoch  358
------------
Loss 0.1570335030555725
Epoch  359
------------
Loss 0.12438531219959259
Epoch  360
------------
Loss 0.16736853122711182
Epoch  361
------------
Loss 0.12691190838813782
Epoch  362
------------
Loss 0.12262111902236938
Epoch  363
------------
Loss 0.13338682055473328
Epoch  364
------------
Loss 0.14713865518569946
Epoch  365
------------
Loss 0.10485772788524628
Epoch  366
------------
Loss 0.14126256108283997
Epoch  367
------------
Loss 0.11444076150655746
Epoch  368
------------
Loss 0.13420774042606354
Epoch  369
------------
Loss 0.195108100771904
Epoch  370
------------
Loss 0.13447439670562744
Epoch  371
------------
Loss 0.14081759750843048
Epoch  372
------------
Loss 0.12938083708286285
Epoch  373
------------
Loss 0.13910913467407227
Epoch  374
------------
Loss 0.10416163504123688
Epoch  375
------------
Loss 0.1293097883462906
Epoch  376
------------
Lo

Epoch  530
------------
Loss 0.1305086314678192
Epoch  531
------------
Loss 0.09310223162174225
Epoch  532
------------
Loss 0.1625414490699768
Epoch  533
------------
Loss 0.09490640461444855
Epoch  534
------------
Loss 0.12674114108085632
Epoch  535
------------
Loss 0.12459512799978256
Epoch  536
------------
Loss 0.1066802442073822
Epoch  537
------------
Loss 0.1372675895690918
Epoch  538
------------
Loss 0.09240980446338654
Epoch  539
------------
Loss 0.13097447156906128
Epoch  540
------------
Loss 0.17683209478855133
Epoch  541
------------
Loss 0.12202920019626617
Epoch  542
------------
Loss 0.11906698346138
Epoch  543
------------
Loss 0.175279438495636
Epoch  544
------------
Loss 0.12337153404951096
Epoch  545
------------
Loss 0.1719864308834076
Epoch  546
------------
Loss 0.12162965536117554
Epoch  547
------------
Loss 0.10456787049770355
Epoch  548
------------
Loss 0.11187837272882462
Epoch  549
------------
Loss 0.1444520801305771
Epoch  550
------------
Loss 0.

Epoch  706
------------
Loss 0.16869819164276123
Epoch  707
------------
Loss 0.1083785742521286
Epoch  708
------------
Loss 0.11468859016895294
Epoch  709
------------
Loss 0.1185990571975708
Epoch  710
------------
Loss 0.16359741985797882
Epoch  711
------------
Loss 0.15168258547782898
Epoch  712
------------
Loss 0.12550541758537292
Epoch  713
------------
Loss 0.16006040573120117
Epoch  714
------------
Loss 0.15264607965946198
Epoch  715
------------
Loss 0.12399935722351074
Epoch  716
------------
Loss 0.15972337126731873
Epoch  717
------------
Loss 0.13024577498435974
Epoch  718
------------
Loss 0.06959211826324463
Epoch  719
------------
Loss 0.11422889679670334
Epoch  720
------------
Loss 0.140618234872818
Epoch  721
------------
Loss 0.12399239838123322
Epoch  722
------------
Loss 0.1448279321193695
Epoch  723
------------
Loss 0.1331857144832611
Epoch  724
------------
Loss 0.15168100595474243
Epoch  725
------------
Loss 0.11325196921825409
Epoch  726
------------
Lo

Loss 0.1265365183353424
Epoch  879
------------
Loss 0.10743194073438644
Epoch  880
------------
Loss 0.1319146454334259
Epoch  881
------------
Loss 0.09844411909580231
Epoch  882
------------
Loss 0.10674731433391571
Epoch  883
------------
Loss 0.17038290202617645
Epoch  884
------------
Loss 0.12911707162857056
Epoch  885
------------
Loss 0.10258065164089203
Epoch  886
------------
Loss 0.1428186595439911
Epoch  887
------------
Loss 0.112001433968544
Epoch  888
------------
Loss 0.13540968298912048
Epoch  889
------------
Loss 0.1155916377902031
Epoch  890
------------
Loss 0.13871659338474274
Epoch  891
------------
Loss 0.10250203311443329
Epoch  892
------------
Loss 0.12263056635856628
Epoch  893
------------
Loss 0.11357488483190536
Epoch  894
------------
Loss 0.12029631435871124
Epoch  895
------------
Loss 0.12189316749572754
Epoch  896
------------
Loss 0.09891682863235474
Epoch  897
------------
Loss 0.11650802195072174
Epoch  898
------------
Loss 0.12319120019674301
E

Loss 0.09055417031049728
Epoch  1053
------------
Loss 0.11132854968309402
Epoch  1054
------------
Loss 0.14001163840293884
Epoch  1055
------------
Loss 0.1086275652050972
Epoch  1056
------------
Loss 0.10981646925210953
Epoch  1057
------------
Loss 0.11322177946567535
Epoch  1058
------------
Loss 0.18794454634189606
Epoch  1059
------------
Loss 0.1029570922255516
Epoch  1060
------------
Loss 0.14085620641708374
Epoch  1061
------------
Loss 0.1909271478652954
Epoch  1062
------------
Loss 0.14650924503803253
Epoch  1063
------------
Loss 0.11905704438686371
Epoch  1064
------------
Loss 0.16807475686073303
Epoch  1065
------------
Loss 0.1276586949825287
Epoch  1066
------------
Loss 0.13176903128623962
Epoch  1067
------------
Loss 0.10566772520542145
Epoch  1068
------------
Loss 0.12328151613473892
Epoch  1069
------------
Loss 0.13419364392757416
Epoch  1070
------------
Loss 0.09475163370370865
Epoch  1071
------------
Loss 0.20702917873859406
Epoch  1072
------------
Loss

Loss 0.18737438321113586
Epoch  1224
------------
Loss 0.11460712552070618
Epoch  1225
------------
Loss 0.11536660045385361
Epoch  1226
------------
Loss 0.09255626797676086
Epoch  1227
------------
Loss 0.13790825009346008
Epoch  1228
------------
Loss 0.18665236234664917
Epoch  1229
------------
Loss 0.11544235050678253
Epoch  1230
------------
Loss 0.11197316646575928
Epoch  1231
------------
Loss 0.140424907207489
Epoch  1232
------------
Loss 0.13113528490066528
Epoch  1233
------------
Loss 0.1148168072104454
Epoch  1234
------------
Loss 0.138348788022995
Epoch  1235
------------
Loss 0.16384577751159668
Epoch  1236
------------
Loss 0.16278332471847534
Epoch  1237
------------
Loss 0.13813194632530212
Epoch  1238
------------
Loss 0.11975616216659546
Epoch  1239
------------
Loss 0.16927441954612732
Epoch  1240
------------
Loss 0.10681168735027313
Epoch  1241
------------
Loss 0.15480002760887146
Epoch  1242
------------
Loss 0.14863726496696472
Epoch  1243
------------
Loss 

Loss 0.14431720972061157
Epoch  1398
------------
Loss 0.1893966794013977
Epoch  1399
------------
Loss 0.14200598001480103
Epoch  1400
------------
Loss 0.12658199667930603
Epoch  1401
------------
Loss 0.12386687099933624
Epoch  1402
------------
Loss 0.12105827033519745
Epoch  1403
------------
Loss 0.09917036443948746
Epoch  1404
------------
Loss 0.15085265040397644
Epoch  1405
------------
Loss 0.14055393636226654
Epoch  1406
------------
Loss 0.1163710281252861
Epoch  1407
------------
Loss 0.11408571153879166
Epoch  1408
------------
Loss 0.11747576296329498
Epoch  1409
------------
Loss 0.10301163792610168
Epoch  1410
------------
Loss 0.15873321890830994
Epoch  1411
------------
Loss 0.16104869544506073
Epoch  1412
------------
Loss 0.13710300624370575
Epoch  1413
------------
Loss 0.12872229516506195
Epoch  1414
------------
Loss 0.15974083542823792
Epoch  1415
------------
Loss 0.08981004357337952
Epoch  1416
------------
Loss 0.08439791202545166
Epoch  1417
------------
Lo

Epoch  1564
------------
Loss 0.14173544943332672
Epoch  1565
------------
Loss 0.09483286738395691
Epoch  1566
------------
Loss 0.11992188543081284
Epoch  1567
------------
Loss 0.09727531671524048
Epoch  1568
------------
Loss 0.17344337701797485
Epoch  1569
------------
Loss 0.13592766225337982
Epoch  1570
------------
Loss 0.12560278177261353
Epoch  1571
------------
Loss 0.11029661446809769
Epoch  1572
------------
Loss 0.09183330833911896
Epoch  1573
------------
Loss 0.1148747056722641
Epoch  1574
------------
Loss 0.11706681549549103
Epoch  1575
------------
Loss 0.11652641743421555
Epoch  1576
------------
Loss 0.1647372543811798
Epoch  1577
------------
Loss 0.15207964181900024
Epoch  1578
------------
Loss 0.13897919654846191
Epoch  1579
------------
Loss 0.06580358743667603
Epoch  1580
------------
Loss 0.12198828905820847
Epoch  1581
------------
Loss 0.10562364757061005
Epoch  1582
------------
Loss 0.15555936098098755
Epoch  1583
------------
Loss 0.08663610368967056
Ep

Loss 0.09539070725440979
Epoch  1735
------------
Loss 0.1237926036119461
Epoch  1736
------------
Loss 0.1037428006529808
Epoch  1737
------------
Loss 0.12248124927282333
Epoch  1738
------------
Loss 0.13764522969722748
Epoch  1739
------------
Loss 0.12527869641780853
Epoch  1740
------------
Loss 0.13841316103935242
Epoch  1741
------------
Loss 0.1728394329547882
Epoch  1742
------------
Loss 0.12996330857276917
Epoch  1743
------------
Loss 0.09649544954299927
Epoch  1744
------------
Loss 0.16052383184432983
Epoch  1745
------------
Loss 0.09467285126447678
Epoch  1746
------------
Loss 0.1526317447423935
Epoch  1747
------------
Loss 0.17956312000751495
Epoch  1748
------------
Loss 0.11514443159103394
Epoch  1749
------------
Loss 0.1698669046163559
Epoch  1750
------------
Loss 0.10840632021427155
Epoch  1751
------------
Loss 0.08898617327213287
Epoch  1752
------------
Loss 0.10896982997655869
Epoch  1753
------------
Loss 0.1335073709487915
Epoch  1754
------------
Loss 0

In [None]:
def choose_Action(states):

        actions = list()

        for state in states:

            try:

                # self.epsilon -= 1.0 / self.EXPLORE
                # a_t = np.zeros([self.action_dim])
                # noise_t = np.zeros([self.action_dim])

                with session.as_default():
                    with session.graph.as_default():

                        a_t_original = actor.model.predict(state.reshape(1, state.shape[0]))[0].tolist()
                        # noise_t[0] = max(self.epsilon, 0) * self.ou.function(a_t_original[0],  0.0 , 0.60, 0.30)  # decide theta, sigma and mu for limitprice

                        # a_t[0] = a_t_original[0] + noise_t[0]
                        # a_t[1] = a_t_original[1] + noise_t[1]

                        # print(a_t_original)
                        actions.append(list(a_t_original))

            except Exception as e:
                print(e)

        return actions

In [None]:
batch = replay_buffer.sample(n=10)
states = np.asarray(batch[batch.columns[0:2]])

lps = choose_Action(states)

for lp in lps:
    print(lp)

In [None]:
def save_models():

        with session.as_default():
                with session.graph.as_default():        

                    timestamp = int(datetime.datetime.now().timestamp())

                    actor.model.save_weights(model_storage_path + "/actormodel.h5", overwrite=True)
                    with open(model_storage_path + "/actormodel.json", "w") as outfile:
                        json.dump(actor.model.to_json(), outfile)

                    critic.model.save_weights(model_storage_path + "/criticmodel.h5", overwrite=True)
                    with open(model_storage_path + "/criticmodel.json", "w") as outfile:
                        json.dump(critic.model.to_json(), outfile)

                    actor.target_model.save_weights(model_storage_path + "/actortargetmodel.h5", overwrite=True)
                    with open(model_storage_path + "/actormodeltarget.json", "w") as outfile:
                        json.dump(actor.target_model.to_json(), outfile)

                    critic.target_model.save_weights(model_storage_path + "/critictargetmodel.h5", overwrite=True)
                    with open(model_storage_path + "/criticmodeltarget.json", "w") as outfile:
                        json.dump(critic.target_model.to_json(), outfile)

                    print("Models Saved Successfully !!!")


In [None]:
# save_models()

In [None]:
def test(iterations=100000):

    #Now load the weight
#     print("Now we load the weight")
#     try:
#         actor.model.load_weights(model_storage_path + "/actormodel.h5")
#         critic.model.load_weights(model_storage_path + "/criticmodel.h5")
#         actor.target_model.load_weights(model_storage_path + "/actortargetmodel.h5")
#         critic.target_model.load_weights(model_storage_path + "/critictargetmodel.h5")
#         print("Weights loaded successfully")
#     except Exception as e:
#         print(e)
#         print("Cannot find the weights")
        
        
    list_of_actions = [0.882782, 0.588521]
    while(iterations > 0):
        
        theta_b = np.random.uniform(0, 1)
        current_state = np.array([2, theta_b]).reshape(1,2)

        actions = choose_Action(current_state)
        list_of_actions = np.vstack((list_of_actions, actions))
    
        iterations -= 1;    
        
    return list_of_actions

In [None]:
list_of_actions = test()

In [None]:
print("alpha_b1 statistics:: mean: {}, std: {}".format(np.mean(list_of_actions[:,0]),np.std(list_of_actions[:,0])))
print("alpha_b1 statistics:: mean: {}, std: {}".format(np.mean(list_of_actions[:,1]),np.std(list_of_actions[:,1])))