In [1]:
"""
Deep Deterministic Policy Gradient (DDPG), Reinforcement Learning.
1-way relay, net bit rate, energy harvesting example for training.
Thanks to : https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/tree/master/contents/9_Deep_Deterministic_Policy_Gradient_DDPG
Using:
tensorflow 1.0
"""
import math
import tensorflow as tf
import numpy as np
import gym
import time
import EH_P2P
import DDPG_CLASS

np.random.seed(1)
tf.set_random_seed(1)

#####################  hyper parameters  ####################

MAX_EPISODES = 1500
MAX_EP_STEPS = 240
LR_A = 0.0004  # learning rate for actor
LR_C = 0.0004  # learning rate for critic
GAMMA = 0.9   # reward discount
REPLACEMENT = [
    dict(name='soft', tau=0.01),
    dict(name='hard', rep_iter_a=600, rep_iter_c=500)
][0]            # you can try different target replacement strategies
MEMORY_CAPACITY = 40000
BATCH_SIZE = 80


OUTPUT_GRAPH = False

    
env=EH_P2P.EH_P2P()
env.Chanpower()
env.Solarread()
  

state_dim = 4 #SD_channel,RD_channel,battery，solar
action_dim = 1 #Transmission power
action_bound = 1 #no more than battery energy


if OUTPUT_GRAPH:
    tf.summary.FileWriter("logs/", sess.graph)

tip=1
tip2=1

for modulation in range(1):
    for snr in range(0,20,2):
        var = 10
        tip=1
        tip2=1
        tf.reset_default_graph()
        sess = tf.Session()
        with tf.name_scope('S'):
            S = tf.placeholder(tf.float32, shape=[None, state_dim], name='s')
        with tf.name_scope('R'):
            R = tf.placeholder(tf.float32, [None, 1], name='r')
        with tf.name_scope('S_'):
            S_ = tf.placeholder(tf.float32, shape=[None, state_dim], name='s_')
        DDPG_CLASS.S=S
        DDPG_CLASS.R=R
        DDPG_CLASS.S_=S_
        actor= DDPG_CLASS.Actor(sess, action_dim, action_bound, LR_A, REPLACEMENT)
        critic = DDPG_CLASS.Critic(sess, state_dim, action_dim, LR_C, GAMMA, REPLACEMENT, actor.a, actor.a_)
        actor.add_grad_to_graph(critic.a_grads)
        M = DDPG_CLASS.Memory(MEMORY_CAPACITY, dims=2 * state_dim + action_dim + 1)
        sess.run(tf.global_variables_initializer())
        saver=tf.train.Saver(max_to_keep=100)        
        

        for i in range(MAX_EPISODES):
            ss=np.zeros((4,))
            ss_=np.zeros((4,))
            s = env.reset_1_way(snr)
            ss[0]=s[0]
            ss[1]=s[3]
            ss[2]=s[4]
            ss[3]=s[1]
            ep_reward = 0
            for j in range(MAX_EP_STEPS):
 

                judge=env.judge_1_way()
                if judge == 1:
                    a = actor.choose_action(ss)
                    a = np.random.normal(a, var)
                    a=np.clip(a,0,1)

                    s_, r, flag,info = env.step_1_way2([a,modulation])#input modulation 0:qpsk,1:8psk,2:16qam
                    
                    ss_[0]=s_[0]
                    ss_[1]=s_[3]
                    ss_[2]=s_[4]
                    ss_[3]=s_[1]

                    M.store_transition(ss, a, (r), ss_)

                    if M.pointer > MEMORY_CAPACITY:
                        if tip == 1:
                            print("memory full",j,i)
                            tip=0
                        var *= 0.9995  # decay the action randomness
                        if tip2 == 1 and var<0.00000001:
                            print("var zero",j,i)
                            tip2=0
                        b_M = M.sample(BATCH_SIZE)
                        b_s = b_M[:, :state_dim]
                        b_a = b_M[:, state_dim: state_dim + action_dim]
                        b_r = b_M[:, -state_dim - 1: -state_dim]
                        b_s_ = b_M[:, -state_dim:]

                        critic.learn(b_s, b_a, b_r, b_s_)
                        actor.learn(b_s)

                else:     
                    a=-1
                    s_, r,flag , info = env.step_1_way2([0,modulation])#input modulation 0:qpsk,1:8psk,2:16qam  
                
                s = s_ 
                ss[0]=s[0]
                ss[1]=s[3]
                ss[2]=s[4]
                ss[3]=s[1]              

                ep_reward += r



            if i % 30 == 0 :
                print("Net bit rate=",r,"action",a, "solar,channel,battery",s,"average_reward",ep_reward/j)
              


        save_path = saver.save(sess, "folder_for_1way_net_bit_rate"+"/EH_save_net_snr="+str(snr)+str(modulation)+"_1way.ckpt")

        print("Save to path: ", save_path)
print("----------------------------END--------------------------------")


snr= 0
Net bit rate= 4.5629919994044993e-138 action [1.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.258576])] 4466.166346261902 238

Net bit rate= 7.545363117910235e-101 action [1.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.258576])] 4876.311539543688 7378

Net bit rate= 3.9332709609361103e-190 action [0.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([2.285992])] 7671.342966035912 14518

Net bit rate= 7.545363117910235e-101 action [1.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.258576])] 3255.9186239856485 21658

Net bit rate= 3.7902211569936866e-73 action [1.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.258576])] 4260.201366146861 28798

Net bit rate=

Net bit rate= 3.9332709609361103e-190 action [6.26644392e-55] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([1.268872])] 11295.439868683672 292978

Net bit rate= 3.9332709609361103e-190 action [0.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([1.268872])] 10437.482644736936 300118

Net bit rate= 3.9332709609361103e-190 action [1.15008501e-61] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.9034])] 9808.29112296121 307258

Net bit rate= 1.1487679936356158e-18 action [1.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([0.258576])] 8375.492196623172 314398

Net bit rate= 3.9332709609361103e-190 action [0.] solar,channel,battery [array([0.085368]) -0.342703134198299 -0.06392477442318345
 -0.30883032215986694 array([1.924528])] 9612.915420631

snr= 0 Net bit rate= 0.0 action [0.] solar,channel,battery [array([0.190424]) -0.9274196409837738 0.8648506198147066
 0.029218145083579117 array([1.58316])] 12660.016501746717
snr= 0 Net bit rate= 0.0 action [1.22166204e-68] solar,channel,battery [array([0.182896]) -0.7669746162331016 0.7477307652047118
 -0.607096668917434 array([1.873584])] 12452.47524761972
snr= 0 Net bit rate= 0.0 action [3.94046453e-69] solar,channel,battery [array([0.271024]) -0.5400768157560277 0.5420517476420132
 -0.9271327150226745 array([2.15648])] 12251.628872658113
snr= 0 Net bit rate= 5.992000695697451e-244 action [2.38853869e-70] solar,channel,battery [array([0.70424]) -0.28517230713102815 0.3140810692994167
 -0.8623626458679449 array([2.527504])] 12057.15857309211
snr= 0 Net bit rate= 4.4635000322395907e-188 action [4.25113267e-69] solar,channel,battery [array([0.88456]) -0.04146678481025279 0.11530614475383416
 -0.4762800401697068 array([3.331744])] 11868.765470387547
snr= 0 Net bit rate= 8.7869772774120

snr= 0 Net bit rate= 2.3976350956169008e-96 action [1.9443964e-68] solar,channel,battery [array([0.228056]) 0.7971041125095557 -0.46303650203862534
 -0.6342546840204297 array([0.808288])] 15442.629884429989
snr= 0 Net bit rate= 2.2581521962031262e-77 action [0.] solar,channel,battery [array([0.029672]) 1.0551779942496902 -0.256774030658791
 -0.881073466069932 array([1.136344])] 15309.503764736626
snr= 0 Net bit rate= 6.178559142269819e-64 action [0.] solar,channel,battery [array([0.062168]) 1.2584961504362329 -0.1161216417034292
 -0.9775887433342921 array([1.266016])] 15178.653305208964
snr= 0 Net bit rate= 3.837806916121722e-55 action [9.50138149e-69] solar,channel,battery [array([0.194232]) 1.3808462532566423 -0.08809165162997568
 -0.9631106638184004 array([1.428184])] 15050.020650080074
snr= 0 Net bit rate= 1.769251402123958e-50 action [7.30979203e-69] solar,channel,battery [array([0.093472]) 1.3955664039811082 -0.18061788586169802
 -0.8664218202476514 array([1.722416])] 14923.54988

snr= 0 Net bit rate= 101365.958047992 action [0.9999994] solar,channel,battery [array([0.49992]) 1.3106663926825401 -0.5985645706160403
 -0.09586440033084365 array([0.35823375])] 12025.92470098812
snr= 0 Net bit rate= 4.22501418613219e-53 action [0.] solar,channel,battery [array([0.70848]) 2.328676915294625 -0.7180033383435557
 -0.4943796994260099 array([0.95815375])] 11956.810191212326
snr= 0 Net bit rate= 1.6541943572880415e-25 action [0.] solar,channel,battery [array([0.552984]) 2.9641886375389843 -0.8157570915953907
 -0.7633362010429958 array([1.76663375])] 11888.485561548256
snr= 0 Net bit rate= 1.919012817908846e-06 action [1.] solar,channel,battery [array([0.76992]) 3.0279301664181855 -0.8633155468585116
 -0.9037202954510503 array([0.652984])] 11820.937348141271
snr= 0 Net bit rate= 4.409375730577701e-15 action [0.] solar,channel,battery [array([0.77008]) 2.503877396856655 -0.8495710411867957 -0.929197949777762
 array([1.522904])] 11754.152391372112
snr= 0 Net bit rate= 1.975669

snr= 0 Net bit rate= 2.1868452120492484e-225 action [0.] solar,channel,battery [array([0.1894]) -0.5750942397084938 -0.04076928054685225
 -0.025432205366494265 array([2.294384])] 12034.278742195673
snr= 0 Net bit rate= 9.700262804059473e-254 action [4.14091478e-69] solar,channel,battery [array([0.261464]) -0.6533522753492755 0.01750905873339259
 -0.47622779343913557 array([2.583784])] 11982.629477207709
snr= 0 Net bit rate= 2.318671025087051e-278 action [1.49685349e-68] solar,channel,battery [array([0.24084]) -0.6705922907508272 0.029300477245925105
 -0.7506516581788835 array([2.945248])] 11931.421658929043
snr= 0 Net bit rate= 2.1600316972475324e-284 action [9.68698691e-69] solar,channel,battery [array([0.214816]) -0.6199721228046469 0.00668115043497397
 -0.8624553943794775 array([3.286088])] 11880.649651869771
snr= 0 Net bit rate= 2.4006802305479157e-267 action -1 solar,channel,battery [array([0.265472]) -0.5201076668732227 -0.03307426586276452
 -0.8586754470558534 array([3.600904])]