# DQN CartPole

In [8]:
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import random




### Ortamı Tanımlama

In [31]:
env = gym.make('CartPole-v1') # CartPole ortamını tanımlar

In [32]:
print(env.observation_space.shape[0]) # durum 4 değerden oluşan bir vektördür
print(env.action_space.n) # yapılabilecek 2 adet eylem var

4
2


In [33]:
env.reset()
action = env.action_space.sample() # eylem uzayından rastgele eylem seçilir
result = env.step(action) # eylem gerçekleştirilir ve sonuç alınır
print(result)

(array([ 0.01532794, -0.2305943 ,  0.04136678,  0.32406178], dtype=float32), 1.0, False, False, {})


In [34]:
state, reward, done = result[:3] 
# state, reward, done, _, _ = result 
print(state, reward, done)

[ 0.01532794 -0.2305943   0.04136678  0.32406178] 1.0 False


In [35]:
#Ajan eğitilmediğinde bu şekilde çalışır
env.reset()
while True:
    #env.render() CartPole ortamı için render çalışmıyor
    action = env.action_space.sample()
    state, reward, done = env.step(action)[:3]
    print(state, reward, done)
    if done: # bölümün bitimini gerçekleştirme
        env.close()
        break

[-0.02508555 -0.20854247 -0.0148479   0.31890926] 1.0 False
[-0.0292564  -0.01321222 -0.00846972  0.02158106] 1.0 False
[-0.02952064 -0.20821169 -0.0080381   0.3115797 ] 1.0 False
[-0.03368488 -0.4032182  -0.0018065   0.6017169 ] 1.0 False
[-0.04174924 -0.20807104  0.01022783  0.30846545] 1.0 False
[-0.04591066 -0.0130963   0.01639714  0.01902558] 1.0 False
[-0.04617259  0.1817867   0.01677765 -0.26843908] 1.0 False
[-0.04253685 -0.01357061  0.01140887  0.02948805] 1.0 False
[-0.04280826  0.18138589  0.01199863 -0.25957352] 1.0 False
[-0.03918055  0.37633452  0.00680716 -0.5484479 ] 1.0 False
[-0.03165385  0.1811176  -0.0041618  -0.25362802] 1.0 False
[-0.0280315   0.37629873 -0.00923436 -0.5476208 ] 1.0 False
[-0.02050553  0.18130772 -0.02018677 -0.25786155] 1.0 False
[-0.01687938  0.37671196 -0.025344   -0.55684274] 1.0 False
[-0.00934514  0.18195482 -0.03648086 -0.27225116] 1.0 False
[-0.00570604 -0.01262811 -0.04192588  0.00870604] 1.0 False
[-0.0059586   0.18306926 -0.04175176 -0.

### Ajanı Tanımlama 

In [36]:
# Parametreler
ACTION_SIZE = env.action_space.n
STATE_SIZE = env.observation_space.shape[0]
GAMMA = 0.95           # indirim faktörü
BATCH_SIZE = 32        # 
LR = 0.0001            # öğrenme hızı 
EPS_START = 1          # epsilon değeri
EPS_MIN = 0.01         # epsilon bitiş değeri
EPS_DECAY = 0.995      # 
Episodes = 1000

In [37]:
class DQN_Agent:
    
    def __init__(self):
        
        self.memory = deque(maxlen=2000)
        self.model = self._build_model()
        
    def _build_model(self):
        
        model = Sequential()
        model.add(Dense(24, input_dim= STATE_SIZE, activation= 'relu'))
        model.add(Dense(24, activation= 'relu'))
        model.add(Dense(ACTION_SIZE, activation= 'linear'))
        model.compile(loss= 'mse', optimizer=Adam(lr= LR))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        
        # eylem seçimi: keşif veya sömürü
        if random.uniform(0,1) <= self.epsilon:
            return env.action_space.sample()
        else:
            act_values = self.model.predict(state)
            return np.argmax(act_values[0])
    
    #Düzenlenicek
    def replay(self, batch_size):
        
        #Eğitim
        
        minibatch = random.sample(self.memory, batch_size)
        
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *np.amax(self.model.predict(next_state)[0]))
                
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        #epsilon değerini güncelleme
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    
    
    
        
    