In [None]:
import gym
import numpy as np
from gym import spaces
from dd_class import Dragodinde
from dd_class import Elevage

In [None]:
class ElevageEnv(gym.Env):
    def __init__(self, elevage):
        super(ElevageEnv, self).__init__()
        self.elevage = elevage

        # Define the action and observation space
        self.action_space = spaces.Discrete(len(elevage.dragodindes) ** 2)
        self.observation_space = spaces.Box(
            low=0, high=10, shape=(len(elevage.dragodindes), 4), dtype=np.float32)

        self.state = self._get_observation()
        self.current_step = 0
        self.max_steps = 1000
        self.max_generations = 10
        self.generation = 1

    def _get_observation(self):
        """
        Returns the current observation of the environment.
        The observation could be the current state of all dragodindes in the elevage.
        Each dragodinde might have features such as gender, color, generation, etc.
        """
        obs = []
        for dragodinde in self.elevage.dragodindes:
            obs.append([dragodinde.id, dragodinde.generation, self._encode_gender(dragodinde.gender), self._encode_color(dragodinde.color)])
        return np.array(obs, dtype=np.float32)

    def _encode_gender(self, gender):
        """Encodes gender as a numerical value."""
        return 1 if gender == "M" else 0

    def _encode_color(self, color):
        """Encodes color as a numerical value. Extend this method if more colors are introduced."""
        color_encoding = {"Rousse": 0, "Amande": 1, "Dorée": 2}
        return color_encoding.get(color, -1)  # Default to -1 if color not found

    def step(self, action):
        """
        Apply the action and return the next state, reward, done, and info.
        """
        assert self.action_space.contains(action)
        self.current_step += 1

        # Define the logic for updating the state based on the action
        # For now, let's assume action is an index to select two dragodindes for breeding
        dragodinde_1_idx = action // len(self.elevage.dragodindes)
        dragodinde_2_idx = action % len(self.elevage.dragodindes)

        self._breed_dragodindes(dragodinde_1_idx, dragodinde_2_idx)

        # Calculate the reward based on the action
        next_state, reward, done, info = self._calculate_reward(action)

        if self.current_step >= self.max_steps:
            done = True

        self.state = self._get_observation()

        return self.state, reward, done, info

    def _breed_dragodindes(self, idx1, idx2):
        """
        Simulates the breeding of two dragodindes and updates the elevage state.
        This is a placeholder method and should be replaced with actual breeding logic.
        """
        if idx1 != idx2:  # Ensure that the same dragodinde is not breeding with itself
            parent_1 = self.elevage.dragodindes[idx1]
            parent_2 = self.elevage.dragodindes[idx2]
            # Implement actual breeding logic here and update self.elevage.dragodindes
            # For simplicity, assume a new dragodinde is created with generation + 1
            new_dragodinde = Dragodinde(
                id=len(self.elevage.dragodindes) + 1,
                gender="M" if np.random.rand() > 0.5 else "F",
                color=parent_1.color,  # Assume the new dragodinde inherits color from parent_1
                generation=max(parent_1.generation, parent_2.generation) + 1
            )
            self.elevage.dragodindes.append(new_dragodinde)

    def _calculate_reward(self, action):
        """
        Calculates the reward based on the action and the current state of the environment.
        """
        reward = 0
        done = False
        info = {}

        if self.generation >= self.max_generations:
            done = True
            reward = 1000  # High reward for completing the maximum generations

        elif action == self.generation:
            reward = 100  # Positive reward for advancing the generation

        elif action > self.generation:
            reward = 50  # Smaller reward for valid actions advancing the generation

        elif action < self.generation - 2:
            reward = -100  # Penalty for regressing too far back in generations

        return self.state, reward, done, info

    def reset(self):
        """
        Resets the environment to an initial state and returns the initial observation.
        """
        self.current_step = 0
        self.generation = 1
        self.elevage = self.create_elevage()
        self.state = self._get_observation()
        return self.state

    def render(self, mode='human'):
        """
        Renders the current state of the environment.
        """
        print(f"Generation: {self.generation}, State: {self.state}")

    def create_elevage(self):
        """
        Initializes a new Elevage with a predefined set of dragodindes.
        """
        dragodindes_data = [
            (1, "M", "Rousse", 1),
            (2, "F", "Rousse", 1),
            (3, "M", "Amande", 1),
            (4, "F", "Amande", 1),
            (5, "M", "Dorée", 1),
            (6, "F", "Dorée", 1)
        ]

        list_dd = []
        for id, gender, color, generation in dragodindes_data:
            dragodinde = Dragodinde(id, gender, color, generation)
            list_dd.append(dragodinde)

        return Elevage(list_dd)
    

In [None]:
import tensorflow as tf
import random
from tensorflow.keras import layers
from collections import deque

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential()
        model.add(layers.Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(32, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)
