# DQN CartPole

In [54]:
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import random

### Ortamı Tanımlama

In [55]:
env = gym.make('CartPole-v1') # CartPole ortamını tanımlar

In [56]:
print(env.observation_space.shape[0]) # durum 4 değerden oluşan bir vektördür
print(env.action_space.n) # yapılabilecek 2 adet eylem var

4
2


In [58]:
env.reset()
action = env.action_space.sample() # eylem uzayından rastgele eylem seçilir
result = env.step(action) # eylem gerçekleştirilir ve sonuç alınır
print(result)

(array([-0.01239878,  0.14657064, -0.02694915, -0.32741123], dtype=float32), 1.0, False, False, {})


In [59]:
state, reward, done = result[:3] 
# state, reward, done, _, _ = result 
print(state, reward, done)

[-0.01239878  0.14657064 -0.02694915 -0.32741123] 1.0 False


In [60]:
#Ajan eğitilmediğinde bu şekilde çalışır
env.reset()
while True:
    #env.render() CartPole ortamı için render çalışmıyor
    action = env.action_space.sample()
    state, reward, done = env.step(action)[:3]
    print(state, reward, done)
    if done: # bölümün bitimini gerçekleştirme
        env.close()
        break

[ 0.04499032 -0.24485508 -0.00315376  0.2768539 ] 1.0 False
[ 0.04009322 -0.04968827  0.00238332 -0.01682207] 1.0 False
[ 0.03909945  0.14539942  0.00204688 -0.3087521 ] 1.0 False
[ 0.04200744 -0.04975164 -0.00412817 -0.01542433] 1.0 False
[ 0.04101241  0.14542927 -0.00443665 -0.30940688] 1.0 False
[ 0.04392099 -0.04962919 -0.01062479 -0.01812644] 1.0 False
[ 0.04292841 -0.24459717 -0.01098732  0.2711854 ] 1.0 False
[ 0.03803647 -0.04932017 -0.00556361 -0.02494263] 1.0 False
[ 0.03705006  0.14588113 -0.00606246 -0.31937572] 1.0 False
[ 0.03996769  0.3410889  -0.01244998 -0.6139644 ] 1.0 False
[ 0.04678946  0.53638256 -0.02472926 -0.91054237] 1.0 False
[ 0.05751712  0.7318303  -0.04294011 -1.210894  ] 1.0 False
[ 0.07215372  0.53728825 -0.06715799 -0.9319704 ] 1.0 False
[ 0.08289949  0.733249   -0.0857974  -1.2449785 ] 1.0 False
[ 0.09756447  0.5393261  -0.11069697 -0.98035765] 1.0 False
[ 0.10835099  0.7357437  -0.13030413 -1.3056592 ] 1.0 False
[ 0.12306587  0.54249203 -0.15641731 -1.

### Ajanı Tanımlama 

In [61]:
# Parametreler
ACTION_SIZE = env.action_space.n
STATE_SIZE = env.observation_space.shape[0]
GAMMA = 0.95           # indirim faktörü
BATCH_SIZE = 32        # 
LR = 0.0001            # öğrenme hızı 
EPSILON = 1
EPS_MIN = 0.01         # epsilon bitiş değeri
EPS_DECAY = 0.995      # 
Episodes = 1000

In [62]:
class DQN_Agent:
    
    def __init__(self):
        
        self.memory = deque(maxlen=2000)
        self.model = self._build_model()
        self.epsilon = EPSILON
        
    def _build_model(self):
        
        model = Sequential()
        model.add(Dense(24, input_dim= STATE_SIZE, activation= 'relu'))
        model.add(Dense(24, activation= 'relu'))
        model.add(Dense(ACTION_SIZE, activation= 'linear'))
        model.compile(loss= 'mse', optimizer=Adam(learning_rate= LR))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        
        # eylem seçimi: keşif veya sömürü
        if random.uniform(0,1) <= self.epsilon:
            return env.action_space.sample()
        else:
            act_values = self.model.predict(state)
            return np.argmax(act_values[0])
    
    #Düzenlenicek
    def replay(self):
        
        #Eğitim
        
        if len(self.memory) < BATCH_SIZE:
            return

        minibatch = random.sample(self.memory, BATCH_SIZE)
        
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + GAMMA * np.amax(self.model.predict(next_state)[0]))
                
            train_target = self.model.predict(state)
            train_target[0][action] = target
            self.model.fit(state, train_target, epochs=1, verbose=0)
        
        #epsilon değerini güncelleme
        if self.epsilon > EPS_MIN:
            self.epsilon *= EPS_DECAY
    
    
    
    
        
    

### Model Eğitimi

In [None]:
if __name__ == "__main__":
    
    agent = DQN_Agent()
    
    for e in range(Episodes):
        
        state = env.reset()[:1]
        
        state = np.reshape(state,[1,4])
        
        time = 0
        
        while True:
            
            # act: Eylem seçimi
            action = agent.act(state) 
            
            # step: Seçilen eylemi gerçekleştirme ve sonuçlar
            next_state, reward, done = env.step(action)[:3]
            next_state = np.reshape(next_state,[1,4])
            
            # remember / storage
            agent.remember(state, action, reward, next_state, done)
            
            # durumu güncelleme
            state = next_state
            
            # replay
            agent.replay()
            
            time += 1
            
            if done:
                print("Episode: {}, time: {}".format(e,time))
                break


Episode: 0, time: 13












Episode: 1, time: 34






