In [1]:
from keras import backend as K
from keras.layers import Dense, Activation, Input
from keras.models import Model, load_model, Sequential
from keras.optimizers import Adam

from keras.layers import BatchNormalization
import numpy as np

Using TensorFlow backend.


In [50]:
class Agent_AC(object):
    def __init__(self, alpha, beta, gamma=0.99, n_action=4, ip_dims=8):
        
        self.alpha = alpha
        self.gamma = gamma
        self.beta = beta
        self.ip_dims = ip_dims
        self.n_action = n_action
        
        self.actor, self.critic, self.policy = self.build_AC()
        self.action_space = [i for i in range(n_action)]
        
    def build_AC(self):
        ip = Input(shape=(self.ip_dims,))
        delta = Input(shape=[1])
        def A_C():
            m = Dense(1024, activation="relu")(ip)
            m = BatchNormalization()(m)
            m = Dense(512, activation='relu')(m)
            m = BatchNormalization()(m)
            m = Dense(256, activation='relu')(m)
            m = BatchNormalization()(m)
            m = Dense(128, activation='relu')(m)
            m = BatchNormalization()(m)
            m = Dense(56, activation='relu')(m)
            m = BatchNormalization()(m)
            m = Dense(28, activation='relu')(m)
            m = BatchNormalization()(m)
                
            prob = Dense(self.n_action, activation='softmax')(m)
            val = Dense(1, activation='linear')(m)
    
            actor = Model(inputs=[ip, delta], outputs=[prob])    
            critic = Model(inputs=[ip], outputs=[val])
            policy = Model(inputs=[ip], outputs=[prob])
            
            return actor, critic, policy
        
        def loss(y_true, y_pred):
            out = K.clip(y_pred, 1e-8, 1-1e-8)
            log_lik = y_true*K.log(out)
            
            return K.sum(-log_lik*delta)
        
        actor, critic, policy = A_C()
        
        actor.compile(optimizer=Adam(lr=self.alpha), loss=loss, metrics=['accuracy'])
        critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error', metrics=['accuracy'])
        
        #print(actor.summary())
        #print(critic.summary())
        #print(policy.summary())
        return actor, critic, policy
    def choose_action(self, observation):
        state = observation[np.newaxis, :]
        prob = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=prob)
        return action
    
    def learn(self, s, a, r, s_, done):
        s = s[np.newaxis, :]
        s_ = s_[np.newaxis, :]
        
        critic_val_ = self.critic.predict(s_)
        critic_val = self.critic.predict(s)
        
        target = r + self.gamma * critic_val_*(1-int(done))
        delta = target - critic_val
        
        actons = np.zeros([1, self.n_action])
        actons[np.arange(1), a] = 1
        
        self.actor.fit([s, delta], actons, verbose=0)
        self.critic.fit(s, target, verbose=0)
        

In [51]:
a = Agent_AC(alpha=0.00001, beta=0.00005)

In [52]:
a.build_AC()

(<keras.engine.training.Model at 0x1cc6b473448>,
 <keras.engine.training.Model at 0x1cc6b47c308>,
 <keras.engine.training.Model at 0x1cc6b47f3c8>)

In [None]:
import gym, os,numpy as np
if __name__ == '__main__':
    agent = Agent_AC(alpha=0.00001, beta=0.00005)
    env = gym.make('LunarLander-v2')
    score_his = []
    num_ep = 5000
    
    for i in range(num_ep):
        done = False
        score = 0
        obs = env.reset()
        
        while not done:
            a = agent.choose_action(obs)
            obs_, r, done, info = env.step(a)
            agent.learn(obs, a, r, obs_, done)
            obs = obs_
            score += r
        score_his.append(score)
        avg_score= np.mean(score_his[-100:])
        print('episode:', i, 'score: %.2f' %score, 'avg_score: %.2f' %avg_score)



episode: 0 score: -110.99 avg_score: -110.99
episode: 1 score: -632.60 avg_score: -371.80
episode: 2 score: -542.22 avg_score: -428.61
episode: 3 score: -722.59 avg_score: -502.10
episode: 4 score: -440.71 avg_score: -489.82
episode: 5 score: -563.45 avg_score: -502.09
episode: 6 score: -683.41 avg_score: -528.00
episode: 7 score: -720.03 avg_score: -552.00
episode: 8 score: -335.08 avg_score: -527.90
episode: 9 score: -469.85 avg_score: -522.09
episode: 10 score: -466.32 avg_score: -517.02
episode: 11 score: -778.12 avg_score: -538.78
episode: 12 score: -679.88 avg_score: -549.64
episode: 13 score: -426.72 avg_score: -540.86
episode: 14 score: -724.62 avg_score: -553.11
episode: 15 score: -434.97 avg_score: -545.72
episode: 16 score: -471.22 avg_score: -541.34
episode: 17 score: -818.75 avg_score: -556.75
episode: 18 score: -701.86 avg_score: -564.39
episode: 19 score: -400.01 avg_score: -556.17
episode: 20 score: -556.86 avg_score: -556.20
episode: 21 score: -455.18 avg_score: -551.6

episode: 177 score: -522.39 avg_score: -586.64
episode: 178 score: -504.40 avg_score: -588.09
episode: 179 score: -444.08 avg_score: -587.46
episode: 180 score: -424.50 avg_score: -586.68
episode: 181 score: -487.08 avg_score: -587.07
episode: 182 score: -487.18 avg_score: -583.66
episode: 183 score: -741.38 avg_score: -587.34
episode: 184 score: -751.40 avg_score: -590.61
episode: 185 score: -471.33 avg_score: -591.04
episode: 186 score: -476.16 avg_score: -591.49
episode: 187 score: -772.61 avg_score: -590.85
episode: 188 score: -624.55 avg_score: -590.60
episode: 189 score: -488.52 avg_score: -591.72
episode: 190 score: -546.48 avg_score: -589.15
episode: 191 score: -394.62 avg_score: -588.25
episode: 192 score: -402.92 avg_score: -587.78
episode: 193 score: -449.30 avg_score: -587.45
episode: 194 score: -458.01 avg_score: -586.49
episode: 195 score: -936.90 avg_score: -587.99
episode: 196 score: -757.88 avg_score: -588.21
episode: 197 score: -514.00 avg_score: -587.28
episode: 198 

episode: 352 score: -578.34 avg_score: -601.97
episode: 353 score: -719.28 avg_score: -600.97
episode: 354 score: -409.02 avg_score: -597.87
episode: 355 score: -403.42 avg_score: -595.05
episode: 356 score: -438.04 avg_score: -592.19
episode: 357 score: -591.95 avg_score: -591.08
episode: 358 score: -444.28 avg_score: -590.31
episode: 359 score: -453.85 avg_score: -585.27
episode: 360 score: -646.82 avg_score: -583.33
episode: 361 score: -426.19 avg_score: -576.97
episode: 362 score: -475.98 avg_score: -574.19
episode: 363 score: -776.57 avg_score: -576.61
episode: 364 score: -654.89 avg_score: -577.18
episode: 365 score: -710.65 avg_score: -578.81
episode: 366 score: -901.17 avg_score: -582.81
episode: 367 score: -525.41 avg_score: -583.46
episode: 368 score: -331.03 avg_score: -583.67
episode: 369 score: -731.74 avg_score: -583.29
episode: 370 score: -679.02 avg_score: -582.19
episode: 371 score: -360.14 avg_score: -580.75
episode: 372 score: -740.98 avg_score: -584.19
episode: 373 

episode: 527 score: -868.64 avg_score: -600.51
episode: 528 score: -836.94 avg_score: -601.77
episode: 529 score: -760.50 avg_score: -604.55
episode: 530 score: -326.57 avg_score: -602.18
episode: 531 score: -456.94 avg_score: -598.65
episode: 532 score: -692.73 avg_score: -601.02
episode: 533 score: -404.00 avg_score: -597.71
episode: 534 score: -450.58 avg_score: -597.74
episode: 535 score: -803.84 avg_score: -599.72
episode: 536 score: -763.38 avg_score: -601.98
episode: 537 score: -799.92 avg_score: -606.36
episode: 538 score: -440.13 avg_score: -605.49
episode: 539 score: -548.20 avg_score: -607.16
episode: 540 score: -330.08 avg_score: -605.91
episode: 541 score: -644.54 avg_score: -604.72
episode: 542 score: -591.57 avg_score: -605.28
episode: 543 score: -764.98 avg_score: -604.68
episode: 544 score: -647.47 avg_score: -606.01
episode: 545 score: -437.39 avg_score: -605.58
episode: 546 score: -360.73 avg_score: -605.42
episode: 547 score: -1058.22 avg_score: -610.88
episode: 548

episode: 702 score: -546.09 avg_score: -581.02
episode: 703 score: -492.28 avg_score: -578.43
episode: 704 score: -925.62 avg_score: -583.52
episode: 705 score: -504.61 avg_score: -582.16
episode: 706 score: -874.33 avg_score: -586.91
episode: 707 score: -419.84 avg_score: -586.23
episode: 708 score: -763.63 avg_score: -585.77
episode: 709 score: -793.33 avg_score: -587.85
episode: 710 score: -704.93 avg_score: -590.75
episode: 711 score: -452.85 avg_score: -586.29
episode: 712 score: -601.34 avg_score: -587.21
episode: 713 score: -600.42 avg_score: -588.09
episode: 714 score: -946.21 avg_score: -590.03
episode: 715 score: -547.25 avg_score: -591.23
episode: 716 score: -436.38 avg_score: -592.27
episode: 717 score: -556.64 avg_score: -592.94
episode: 718 score: -511.34 avg_score: -591.30
episode: 719 score: -482.12 avg_score: -589.49
episode: 720 score: -921.03 avg_score: -593.65
episode: 721 score: -837.51 avg_score: -597.30
episode: 722 score: -728.55 avg_score: -597.31
episode: 723 

episode: 877 score: -458.85 avg_score: -575.05
episode: 878 score: -1065.92 avg_score: -582.37
episode: 879 score: -486.86 avg_score: -582.16
episode: 880 score: -671.38 avg_score: -579.91
episode: 881 score: -470.25 avg_score: -580.35
episode: 882 score: -903.85 avg_score: -584.71
episode: 883 score: -367.15 avg_score: -582.71
episode: 884 score: -448.06 avg_score: -580.50
episode: 885 score: -424.29 avg_score: -578.49
episode: 886 score: -794.47 avg_score: -582.18
episode: 887 score: -657.57 avg_score: -585.18
episode: 888 score: -547.01 avg_score: -586.77
episode: 889 score: -610.81 avg_score: -589.34
episode: 890 score: -500.64 avg_score: -589.04
episode: 891 score: -513.18 avg_score: -587.16
episode: 892 score: -488.33 avg_score: -588.32
episode: 893 score: -397.94 avg_score: -586.67
episode: 894 score: -543.40 avg_score: -587.73
episode: 895 score: -730.39 avg_score: -588.92
episode: 896 score: -404.89 avg_score: -589.03
episode: 897 score: -467.00 avg_score: -587.06
episode: 898

episode: 1051 score: -566.33 avg_score: -603.53
episode: 1052 score: -744.33 avg_score: -605.44
episode: 1053 score: -516.32 avg_score: -605.78
episode: 1054 score: -529.83 avg_score: -608.15
episode: 1055 score: -512.50 avg_score: -604.75
episode: 1056 score: -777.44 avg_score: -605.13
episode: 1057 score: -743.65 avg_score: -606.00
episode: 1058 score: -400.33 avg_score: -604.47
episode: 1059 score: -502.08 avg_score: -604.30
episode: 1060 score: -528.81 avg_score: -604.80
episode: 1061 score: -855.01 avg_score: -607.53
episode: 1062 score: -879.32 avg_score: -608.27
episode: 1063 score: -572.32 avg_score: -609.75
episode: 1064 score: -450.43 avg_score: -605.19
episode: 1065 score: -426.88 avg_score: -601.76
episode: 1066 score: -802.24 avg_score: -602.08
episode: 1067 score: -777.06 avg_score: -602.23
episode: 1068 score: -680.27 avg_score: -602.25
episode: 1069 score: -798.28 avg_score: -600.16
episode: 1070 score: -492.13 avg_score: -598.53
episode: 1071 score: -489.87 avg_score: 

episode: 1222 score: -497.38 avg_score: -578.55
episode: 1223 score: -384.23 avg_score: -574.13
episode: 1224 score: -557.90 avg_score: -574.44
episode: 1225 score: -416.32 avg_score: -573.86
episode: 1226 score: -379.30 avg_score: -571.60
episode: 1227 score: -358.36 avg_score: -567.42
episode: 1228 score: -453.15 avg_score: -568.18
episode: 1229 score: -693.67 avg_score: -568.15
episode: 1230 score: -504.95 avg_score: -568.52
episode: 1231 score: -502.01 avg_score: -567.99
episode: 1232 score: -884.24 avg_score: -572.17
episode: 1233 score: -545.33 avg_score: -572.69
episode: 1234 score: -401.40 avg_score: -568.55
episode: 1235 score: -946.94 avg_score: -573.14
episode: 1236 score: -809.56 avg_score: -573.58
episode: 1237 score: -488.00 avg_score: -570.73
episode: 1238 score: -395.98 avg_score: -568.19
episode: 1239 score: -497.07 avg_score: -565.58
episode: 1240 score: -664.50 avg_score: -566.62
episode: 1241 score: -469.33 avg_score: -567.00
episode: 1242 score: -350.83 avg_score: 

episode: 1393 score: -709.32 avg_score: -587.58
episode: 1394 score: -317.01 avg_score: -586.67
episode: 1395 score: -768.54 avg_score: -588.44
episode: 1396 score: -547.68 avg_score: -589.55
episode: 1397 score: -809.34 avg_score: -592.30
episode: 1398 score: -442.72 avg_score: -589.64
episode: 1399 score: -375.88 avg_score: -589.63
episode: 1400 score: -733.62 avg_score: -593.06
episode: 1401 score: -510.77 avg_score: -591.65
episode: 1402 score: -731.76 avg_score: -592.34
episode: 1403 score: -367.19 avg_score: -590.89
episode: 1404 score: -469.93 avg_score: -588.05
episode: 1405 score: -351.67 avg_score: -585.14
episode: 1406 score: -518.77 avg_score: -582.87
episode: 1407 score: -606.94 avg_score: -583.76
episode: 1408 score: -654.55 avg_score: -580.04
episode: 1409 score: -527.19 avg_score: -577.98
episode: 1410 score: -435.53 avg_score: -577.62
episode: 1411 score: -434.03 avg_score: -574.03
episode: 1412 score: -667.45 avg_score: -577.53
episode: 1413 score: -693.24 avg_score: 

episode: 1564 score: -393.00 avg_score: -562.83
episode: 1565 score: -502.99 avg_score: -564.28
episode: 1566 score: -648.78 avg_score: -566.85
episode: 1567 score: -555.90 avg_score: -563.93
episode: 1568 score: -466.18 avg_score: -564.11
episode: 1569 score: -395.85 avg_score: -562.66
episode: 1570 score: -726.37 avg_score: -559.65
episode: 1571 score: -369.36 avg_score: -556.37
episode: 1572 score: -502.54 avg_score: -556.01
episode: 1573 score: -359.17 avg_score: -555.72
episode: 1574 score: -819.56 avg_score: -559.74
episode: 1575 score: -840.53 avg_score: -563.14
episode: 1576 score: -660.80 avg_score: -560.63
episode: 1577 score: -498.93 avg_score: -559.77
episode: 1578 score: -421.01 avg_score: -556.91
episode: 1579 score: -681.10 avg_score: -559.55
episode: 1580 score: -377.90 avg_score: -557.07
episode: 1581 score: -444.43 avg_score: -556.40
episode: 1582 score: -637.22 avg_score: -557.91
episode: 1583 score: -459.29 avg_score: -559.18
episode: 1584 score: -426.49 avg_score: 