In [None]:
!pip install gymnasium

In [5]:
import gymnasium as gym
from gymnasium import spaces
import pygame
import numpy as np
import random
import math
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from collections import deque

In [6]:
class CustomEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(CustomEnv, self).__init__()

        pygame.init()
        self.WIDTH, self.HEIGHT = 320, 240
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))

        self.action_space = spaces.Discrete(4)  # 4 hareket (yukarı, aşağı, sağ, sol)
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.HEIGHT, self.WIDTH, 3), dtype=np.uint8)

        self.organism_size = 20
        self.organism_direction = 0  # Başlangıç yönü
        self.organism_speed = 5
        self.organism_radius = self.organism_size // 2
        self.organism_position = [self.WIDTH // 2, self.HEIGHT // 2]

        self.goal_size = 20
        self.goal_radius = self.organism_radius
        self.goal_position = [100, 100]

        self.score = 0
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.epsilon = 0.3

        self.state_space_size = (self.WIDTH // self.organism_size, self.HEIGHT // self.organism_size)
        self.q_table = np.zeros((self.state_space_size[0], self.state_space_size[1], self.action_space.n))

    def step(self, action):
        reward = -0.1
        done = False
        info = {}

        angle_change = math.pi / 8
        x, y = self.organism_position

        if action == 0:
            y -= self.organism_speed
        elif action == 1:
            y += self.organism_speed
        elif action == 2:
            x -= self.organism_speed
            self.organism_direction -= angle_change
        elif action == 3:
            x += self.organism_speed
            self.organism_direction += angle_change

        x = max(self.organism_radius, min(x, self.WIDTH - self.organism_radius))
        y = max(self.organism_radius, min(y, self.HEIGHT - self.organism_radius))

        self.organism_position = [x, y]

        distance_to_goal = math.sqrt((x - self.goal_position[0])**2 + (y - self.goal_position[1])**2)
        if distance_to_goal < self.goal_radius:
            reward += 10
            done = True
            self.goal_position = [random.randint(0, (self.WIDTH - self.goal_size) // self.organism_size) * self.organism_size,
                                  random.randint(0, (self.HEIGHT - self.goal_size) // self.organism_size) * self.organism_size]

        observation = np.array(pygame.surfarray.array3d(self.screen))
        return observation, reward, done, info

    def reset(self):
        self.organism_position = [self.WIDTH // 2, self.HEIGHT // 2]
        self.score = 0
        observation = np.array(pygame.surfarray.array3d(self.screen))
        return observation

    def render(self, mode='human'):
        self.screen.fill((255, 255, 255))
        pygame.draw.circle(self.screen, (255, 0, 0), self.organism_position, self.organism_radius)
        pygame.draw.circle(self.screen, (0, 255, 0), self.goal_position, self.goal_radius)
        pygame.display.update()

    def close(self):
        pygame.quit()

In [7]:
class DQN:
    def __init__(self, state_space, action_space):
        self.state_space = state_space
        self.action_space = action_space
        self.memory = deque(maxlen=2000)

        self.gamma = 0.95    # discount factor
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001

        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Flatten(input_shape=(self.state_space[0], self.state_space[1], self.state_space[2])))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_space, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_space)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
import gym
import numpy as np

# Önce ortamınızı başlatın
env = CustomEnv()  # CustomEnv sınıfınızın adını kullanın
state_size = env.observation_space.shape
action_size = env.action_space.n

# DQN ajanınızı başlatın
dqn_agent = DQN(state_size, action_size)

# Eğitim parametreleri
episodes = 10
batch_size = 32

for e in range(episodes):
    # Ortamı başlatın ve ilk durumu alın
    state = env.reset()
    state = np.reshape(state, [1, state_size[0], state_size[1], state_size[2]])

    for time in range(50):
        # Ajanın bir eylem seçmesi
        action = dqn_agent.act(state)

        # Eylemi gerçekleştir ve sonucu al
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size[0], state_size[1], state_size[2]])

        # Deneyimi hafızaya kaydet
        dqn_agent.remember(state, action, reward, next_state, done)

        state = next_state

        if done:
            print("Episode: {}/{}, Score: {}".format(e, episodes, time))
            break

        # Hafızada yeterli deneyim biriktiğinde eğitimi başlat
        if len(dqn_agent.memory) > batch_size:
            dqn_agent.replay(batch_size)

# Eğitim tamamlandıktan sonra modeli kaydedebilirsiniz
dqn_agent.model.save('dqn_model.h5')
