In [1]:
!pip install pandas numpy scikit-learn gym stable-baselines3 joblib

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
     ---------------------------------------- 0.0/721.7 kB ? eta -:--:--
     ---------------------------------------- 0.0/721.7 kB ? eta -:--:--
     --------------------------- ---------- 524.3/721.7 kB 2.1 MB/s eta 0:00:01
     -------------------------------------- 721.7/721.7 kB 2.0 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting gym_notices>=0.0.4 (from gym)
  Downloading gym_notices-0.0.8-py3-none-any.whl.metadata (1.0 kB)
Downloading gym_notices-0.0.8-py3-none-any.whl (3.0 kB)
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml): started
  Building wheel for gym (pyproject.toml): finish

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

# Charger les données
data = pd.read_csv("donnees.csv")

# Filtrer les outliers (Temp > 40°C est incohérent pour l’aquaculture)
data = data[data["Temp"] <= 40]

# Définir les features (10 variables) et la cible (Water Quality)
features = ["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", "Alkalinity (mg L-1 )", 
            "Ammonia (mg L-1 )", "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"]
X = data[features]
y = data["Water Quality"]

# Diviser les données en train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entraîner un modèle Random Forest
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Évaluer le modèle
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Précision du modèle de classification : {accuracy:.2f}")

# Sauvegarder le modèle
joblib.dump(classifier, "water_quality_classifier.pkl")
print("Modèle de classification sauvegardé sous 'water_quality_classifier.pkl'.")

Précision du modèle de classification : 0.97
Modèle de classification sauvegardé sous 'water_quality_classifier.pkl'.


In [7]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),  # 11 dimensions
            high=np.array([100, 40, 15, 500, 14, 5, 5, 1000, 5, 50, 2]),  # Quality max = 2 (Poor)
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
       full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense
        if (self.state[2] > 6 and 6.5 <= self.state[4] <= 8.5 and 
            self.state[5] < 0.5 and self.state[8] < 0.5 and self.state[7] < 500 and self.state[9] < 15):
            quality = 0
            reward = 15
        elif (self.state[2] > 4 and 6.0 <= self.state[4] <= 9.0 and 
              self.state[5] < 1.0 and self.state[8] < 1.0 and self.state[7] < 700 and self.state[9] < 20):
            quality = 1
            reward = 0
        else:
            quality = 2
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

IndentationError: unindent does not match any outer indentation level (<string>, line 46)

In [8]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),  # 11 dimensions
            high=np.array([100, 40, 15, 500, 14, 5, 5, 1000, 5, 50, 2]),  # Quality max = 2 (Poor)
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense
        if (self.state[2] > 6 and 6.5 <= self.state[4] <= 8.5 and 
            self.state[5] < 0.5 and self.state[8] < 0.5 and self.state[7] < 500 and self.state[9] < 15):
            quality = 0
            reward = 15
        elif (self.state[2] > 4 and 6.0 <= self.state[4] <= 9.0 and 
              self.state[5] < 1.0 and self.state[8] < 1.0 and self.state[7] < 700 and self.state[9] < 20):
            quality = 1
            reward = 0
        else:
            quality = 2
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [9]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 500, 14, 5, 5, 1000, 5, 50, 2], dtype=np.float32),  # Quality max = 2 (Poor)
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense
        if (self.state[2] > 6 and 6.5 <= self.state[4] <= 8.5 and 
            self.state[5] < 0.5 and self.state[8] < 0.5 and self.state[7] < 500 and self.state[9] < 15):
            quality = 0
            reward = 15
        elif (self.state[2] > 4 and 6.0 <= self.state[4] <= 9.0 and 
              self.state[5] < 1.0 and self.state[8] < 1.0 and self.state[7] < 700 and self.state[9] < 20):
            quality = 1
            reward = 0
        else:
            quality = 2
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        # Convertir quality_pred en float32
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [10]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/

In [11]:
import gymnasium as gym  # Remplacer gym par gymnasium
import numpy as np
import pandas as pd
from gymnasium import spaces  # Importer spaces depuis gymnasium
import joblib

class AquaticEnv(gym.Env):  # gymnasium.Env est maintenant utilisé via l'alias gym
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 500, 14, 5, 5, 1000, 5, 50, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense
        if (self.state[2] > 6 and 6.5 <= self.state[4] <= 8.5 and 
            self.state[5] < 0.5 and self.state[8] < 0.5 and self.state[7] < 500 and self.state[9] < 15):
            quality = 0
            reward = 15
        elif (self.state[2] > 4 and 6.0 <= self.state[4] <= 9.0 and 
              self.state[5] < 1.0 and self.state[8] < 1.0 and self.state[7] < 700 and self.state[9] < 20):
            quality = 1
            reward = 0
        else:
            quality = 2
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [12]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

AssertionError: The observation returned by the `reset()` method does not match the bounds of the given observation space Box(0.0, [ 100.   40.   15.  500.   14.    5.    5. 1000.    5.   50.    2.], (11,), float32). 
1 invalid indices: 
Expected: 0.0 <= obs[7] <= 1000.0, actual value: 3804.268798828125 


In [14]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 500, 14, 5, 5, 5000, 5, 50, 2], dtype=np.float32),  # Ajuster Plankton à 5000
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense
        if (self.state[2] > 6 and 6.5 <= self.state[4] <= 8.5 and 
            self.state[5] < 0.5 and self.state[8] < 0.5 and self.state[7] < 500 and self.state[9] < 15):
            quality = 0
            reward = 15
        elif (self.state[2] > 4 and 6.0 <= self.state[4] <= 9.0 and 
              self.state[5] < 1.0 and self.state[8] < 1.0 and self.state[7] < 700 and self.state[9] < 20):
            quality = 1
            reward = 0
        else:
            quality = 2
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [15]:
import pandas as pd
data = pd.read_csv("donnees.csv")
print(data.describe())  # Statistiques générales
print("\nStatistiques par catégorie de Water Quality :")
for column in data.columns:
    if column != "Water Quality":
        print(f"\n{column} :")
        print(data.groupby("Water Quality")[column].describe())

              Temp  Turbidity (cm)     DO(mg/L)          CO2          pH`  \
count  4300.000000     4300.000000  4300.000000  4300.000000  4300.000000   
mean     25.695663       39.046681     5.300346     6.375929     7.713468   
std       9.670202       20.942726     1.832736     2.831104     1.580282   
min       0.194046        0.051424     0.133928     0.001155     0.003944   
25%      19.775926       22.224140     3.978366     5.048761     6.443019   
50%      25.041849       30.205696     5.000754     6.598076     7.743176   
75%      30.277718       55.945735     6.521225     8.241532     9.035251   
max      84.251522       99.797743    14.970122    14.984216    14.851195   

       Alkalinity (mg L-1 )  Ammonia (mg L-1 )  Nitrite (mg L-1 )  \
count           4300.000000        4300.000000        4300.000000   
mean              93.717183           0.048269           0.642948   
std               68.949048           0.122886           0.903592   
min               25.012328   

In [16]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),  # Ajuster les max
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec les nouveaux seuils
        if (self.state[2] <= 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.02 and  # Ammonia
            self.state[8] <= 0.02 and  # Nitrite
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (5.0 < self.state[2] <= 8.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.05 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [17]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),  # Ajuster les max
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.02  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.01  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec les nouveaux seuils
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.02 and  # Ammonia
            self.state[8] <= 0.02 and  # Nitrite
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.05 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [18]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),  # Ajuster les max
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente (réduit de 0.02 à 0.01)
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente (réduit de 0.01 à 0.005)
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec les nouveaux seuils
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.025 and  # Ammonia (assoupli à 0.025)
            self.state[8] <= 0.025 and  # Nitrite (assoupli à 0.025)
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.05 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = self.state[2] <= 0
        truncated = self.current_step >= self.max_steps
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [19]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

AssertionError: The `terminated` signal must be a boolean

In [20]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),  # 11 dimensions
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),  # Ajuster les max
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.05)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.05)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.03)  # Nitrite
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente (réduit de 0.02 à 0.01)
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente (réduit de 0.01 à 0.005)
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.01 * self.state[0] - 0.1 * self.state[1] - 0.2 * self.state[5] - 0.1 * self.state[7] - 0.2 * self.state[8] - 0.05 * self.state[9])  # DO diminue
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec les nouveaux seuils
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.025 and  # Ammonia (assoupli à 0.025)
            self.state[8] <= 0.025 and  # Nitrite (assoupli à 0.025)
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.05 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        # S'assurer que terminated et truncated sont des booléens
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [21]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -15      |
| time/              |          |
|    fps             | 41       |
|    iterations      | 1        |
|    time_elapsed    | 48       |
|    total_timesteps | 2048     |
---------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -15      |
| time/                   |          |
|    fps                  | 37       |
|    iterations           | 2        |
|    time_elapsed         | 109      |
|    total_timesteps      | 4096     |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -1

KeyboardInterrupt: 

In [22]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia (augmenté de 0.1 à 0.2)
            self.state[8] = max(0, self.state[8] - 0.1)  # Nitrite (augmenté de 0.05 à 0.1)
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia (augmenté de 0.05 à 0.1)
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.06)  # Nitrite (augmenté de 0.03 à 0.06)
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        # Réduire l'impact sur le DO (divisé par 10)
        self.state[2] = max(0, self.state[2] - 0.001 * self.state[0] - 0.01 * self.state[1] - 0.02 * self.state[5] - 0.01 * self.state[7] - 0.02 * self.state[8] - 0.005 * self.state[9])
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec des seuils assouplis
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.05 and  # Ammonia (assoupli à 0.05)
            self.state[8] <= 0.05 and  # Nitrite (assoupli à 0.05)
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.1 and  # Ammonia (assoupli à 0.1)
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [23]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -15      |
| time/              |          |
|    fps             | 39       |
|    iterations      | 1        |
|    time_elapsed    | 52       |
|    total_timesteps | 2048     |
---------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1        |
|    ep_rew_mean          | -15      |
| time/                   |          |
|    fps                  | 42       |
|    iterations           | 2        |
|    time_elapsed         | 96       |
|    total_timesteps      | 4096     |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -1

KeyboardInterrupt: 

In [24]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite (augmenté de 0.1 à 0.3)
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite (augmenté de 0.06 à 0.15)
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente

In [25]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite (augmenté de 0.1 à 0.3)
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite (augmenté de 0.06 à 0.15)
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        # Réduire encore l'impact du Plankton sur le DO
        self.state[2] = max(0, self.state[2] - 0.001 * self.state[0] - 0.01 * self.state[1] - 0.02 * self.state[5] - 0.0001 * self.state[7] - 0.02 * self.state[8] - 0.005 * self.state[9])
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec des seuils assouplis
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.05 and  # Ammonia
            self.state[8] <= 0.1 and  # Nitrite (assoupli à 0.1)
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.1 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [26]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8        |
|    ep_rew_mean     | -118     |
| time/              |          |
|    fps             | 80       |
|    iterations      | 1        |
|    time_elapsed    | 25       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8           |
|    ep_rew_mean          | -118        |
| time/                   |             |
|    fps                  | 77          |
|    iterations           | 2           |
|    time_elapsed         | 52          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010691894 |
|    clip_fraction        | 0.0592      |
|    clip_range           | 0.2  

KeyboardInterrupt: 

In [27]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8        |
|    ep_rew_mean     | -118     |
| time/              |          |
|    fps             | 82       |
|    iterations      | 1        |
|    time_elapsed    | 24       |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8            |
|    ep_rew_mean          | -117         |
| time/                   |              |
|    fps                  | 78           |
|    iterations           | 2            |
|    time_elapsed         | 52           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0105730295 |
|    clip_fraction        | 0.0856       |
|    clip_range      

KeyboardInterrupt: 

In [28]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite (augmenté de 0.1 à 0.3)
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = max(0, self.state[0] - 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite (augmenté de 0.06 à 0.15)
            self.state[9] = max(0, self.state[9] - 1.0)  # CO2

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        # Réduire encore l'impact du Plankton sur le DO
        self.state[2] = max(0, self.state[2] - 0.001 * self.state[0] - 0.01 * self.state[1] - 0.02 * self.state[5] - 0.0001 * self.state[7] - 0.02 * self.state[8] - 0.005 * self.state[9])
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense avec des seuils assouplis
        if (self.state[2] > 5.0 and  # DO
            30 <= self.state[0] <= 80 and  # Turbidity
            5 <= self.state[9] <= 8 and  # CO2
            6.5 <= self.state[4] <= 9.0 and  # pH
            self.state[3] <= 100 and  # Alkalinity
            self.state[5] <= 0.05 and  # Ammonia
            self.state[8] <= 0.1 and  # Nitrite (assoupli à 0.1)
            3000 <= self.state[7] <= 4500):  # Plankton
            quality = 0  # Excellent
            reward = 15
        elif (3.0 < self.state[2] <= 5.0 and  # DO
              self.state[0] < 30 and  # Turbidity
              self.state[9] <= 10 and  # CO2
              6.0 <= self.state[4] <= 9.5 and  # pH
              self.state[3] <= 200 and  # Alkalinity
              self.state[5] <= 0.1 and  # Ammonia
              self.state[8] <= 2.0 and  # Nitrite
              2000 <= self.state[7] <= 6000):  # Plankton
            quality = 1  # Good
            reward = 0
        else:
            quality = 2  # Poor
            reward = -15

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [29]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8        |
|    ep_rew_mean     | -118     |
| time/              |          |
|    fps             | 61       |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8           |
|    ep_rew_mean          | -117        |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 2           |
|    time_elapsed         | 70          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011194663 |
|    clip_fraction        | 0.0117      |
|    clip_range           | 0.2  

In [30]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity (augmenté légèrement)
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.5)  # CO2 (augmenté légèrement)

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        # Réduire encore l'impact sur le DO
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.00001 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Prédire la qualité actuelle
        quality_pred = self._predict_quality(self.state)

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 5.0:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity (inversé pour Excellent)
            criteria_met += 1
        if 4 <= self.state[9] <= 8:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= self.state[4] <= 9.0:  # pH
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        # Récompense progressive : -15 si aucun critère, +15 si tous les critères
        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        # Prédire la qualité avec le modèle de classification
        state_df = pd.DataFrame([state], columns=["Turbidity (cm)", "Temp", "DO(mg/L)", "CO2", "pH`", 
                                                  "Alkalinity (mg L-1 )", "Ammonia (mg L-1 )", 
                                                  "Nitrite (mg L-1 )", "Phosphorus (mg L-1 )", "Plankton (No. L-1)"])
        quality_pred = self.classifier.predict(state_df)[0]
        quality_pred = np.float32(quality_pred)
        return quality_pred

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

# Test de l’environnement
env = AquaticEnv()
obs, info = env.reset()
print("État initial (avec prédiction de qualité) :", obs)

État initial (avec prédiction de qualité) : [3.8979897e+01 2.4581184e+01 5.2833147e+00 9.2016495e+01 7.7198887e+00
 4.2965785e-02 1.1633391e+00 3.8042688e+03 6.1079055e-01 6.3128324e+00
 2.0000000e+00]


In [31]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 69.7     |
|    ep_rew_mean     | -123     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 70.6        |
|    ep_rew_mean          | -66.2       |
| time/                   |             |
|    fps                  | 71          |
|    iterations           | 2           |
|    time_elapsed         | 57          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011519559 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2  

KeyboardInterrupt: 

In [32]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.5)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.5)  # CO2
            self.state[2] = min(15, self.state[2] + 0.5)  # Augmenter DO

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.00001 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO (assoupli)
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 3.5 <= self.state[9] <= 9:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= self.state[4] <= 9.0:  # pH
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 3.5 <= state[9] <= 9:  # CO2
            criteria_met += 1
        if 6.5 <= state[4] <= 9.0:  # pH
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [33]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.01,  # Augmenter l’exploration
    clip_range=0.1,  # Réduire le clip range pour des mises à jour plus conservatrices
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 186      |
| time/              |          |
|    fps             | 853      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 100         |
|    ep_rew_mean          | 210         |
| time/                   |             |
|    fps                  | 646         |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.002835816 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.1  

In [34]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.2)  # CO2 (réduit)
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.8)  # CO2 (augmenté)
            self.state[2] = min(15, self.state[2] + 0.8)  # Augmenter DO (augmenté)

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.000005 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])  # Réduction de l’impact sur DO
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= self.state[9] <= 9:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= self.state[4] <= 9.0:  # pH
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= state[9] <= 9:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= state[4] <= 9.0:  # pH
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [35]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.02,  # Augmenter l’exploration
    clip_range=0.1,  # Réduire le clip range pour des mises à jour plus conservatrices
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 156      |
| time/              |          |
|    fps             | 873      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 100          |
|    ep_rew_mean          | 228          |
| time/                   |              |
|    fps                  | 430          |
|    iterations           | 2            |
|    time_elapsed         | 9            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0031055182 |
|    clip_fraction        | 0.219        |
|    clip_range      

In [36]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.2)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.8)  # CO2
            self.state[2] = min(15, self.state[2] + 0.8)  # DO
            self.state[4] = min(14, self.state[4] + 0.1)  # Augmenter pH (nouveau)

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.000005 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])  # DO
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.005 * self.state[9]))  # pH (impact CO2 réduit)

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= self.state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= self.state[4] <= 9.0:  # pH (assoupli)
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= state[4] <= 9.0:  # pH (assoupli)
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [37]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.03,  # Augmenter l’exploration
    clip_range=0.1,  # Réduire le clip range pour des mises à jour plus conservatrices
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 417      |
| time/              |          |
|    fps             | 885      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 100          |
|    ep_rew_mean          | 429          |
| time/                   |              |
|    fps                  | 642          |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0046290793 |
|    clip_fraction        | 0.261        |
|    clip_range      

In [38]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.2)  # CO2
            self.state[7] = max(0, self.state[7] - 50.0)  # Réduire Plankton (nouveau)
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.8)  # CO2
            self.state[2] = min(15, self.state[2] + 0.8)  # DO
            self.state[4] = min(14, self.state[4] + 0.1)  # Augmenter pH

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.1 * self.state[6]  # Plankton augmente (impact réduit)
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.00005 * self.state[0] - 0.0005 * self.state[1] - 0.001 * self.state[5] - 0.000005 * self.state[7] - 0.001 * self.state[8] - 0.0003 * self.state[9])  # DO (impact réduit)
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.005 * self.state[9]))  # pH

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= self.state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= self.state[4] <= 9.0:  # pH
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.06:  # Ammonia (assoupli)
            criteria_met += 1
        if self.state[8] <= 0.15:  # Nitrite (assoupli)
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= state[4] <= 9.0:  # pH
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.06:  # Ammonia (assoupli)
            criteria_met += 1
        if state[8] <= 0.15:  # Nitrite (assoupli)
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [39]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.04,  # Augmenter l’exploration
    clip_range=0.1,
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 184      |
| time/              |          |
|    fps             | 876      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 100          |
|    ep_rew_mean          | 194          |
| time/                   |              |
|    fps                  | 639          |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0025795917 |
|    clip_fraction        | 0.0342       |
|    clip_range      

In [40]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.2)  # CO2
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.8)  # CO2
            self.state[2] = min(15, self.state[2] + 0.8)  # DO
            self.state[4] = min(14, self.state[4] + 0.1)  # Augmenter pH (nouveau)

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.000005 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])  # DO
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.005 * self.state[9]))  # pH (impact CO2 réduit)

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= self.state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= self.state[4] <= 9.0:  # pH (assoupli)
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= state[9] <= 9:  # CO2
            criteria_met += 1
        if 5.5 <= state[4] <= 9.0:  # pH (assoupli)
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [41]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.03,  # Augmenter l’exploration
    clip_range=0.1,  # Réduire le clip range pour des mises à jour plus conservatrices
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 371      |
| time/              |          |
|    fps             | 907      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 100          |
|    ep_rew_mean          | 412          |
| time/                   |              |
|    fps                  | 564          |
|    iterations           | 2            |
|    time_elapsed         | 7            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0027778745 |
|    clip_fraction        | 0.103        |
|    clip_range      

In [42]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
import joblib

class AquaticEnv(gym.Env):
    def __init__(self):
        # Charger les données
        data = pd.read_csv("donnees.csv")
        data = data[data["Temp"] <= 40]
        # Initialiser l'état avec les moyennes des colonnes
        self.initial_state = np.array([
            data["Turbidity (cm)"].mean(),
            data["Temp"].mean(),
            data["DO(mg/L)"].mean(),
            data["Alkalinity (mg L-1 )"].mean(),
            data["pH`"].mean(),
            data["Ammonia (mg L-1 )"].mean(),
            data["Phosphorus (mg L-1 )"].mean(),
            data["Plankton (No. L-1)"].mean(),
            data["Nitrite (mg L-1 )"].mean(),
            data["CO2"].mean()
        ], dtype=np.float32)

        # Charger le modèle de classification
        self.classifier = joblib.load("water_quality_classifier.pkl")

        # Définir l'espace d'état (10 variables + prédiction de qualité)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([100, 40, 15, 300, 15, 1, 5, 7500, 5, 15, 2], dtype=np.float32),
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)  # 0 = rien, 1 = réduire pollution, 2 = augmenter débit
        self.state = self.initial_state.copy()
        self.max_steps = 100
        self.current_step = 0

    def reset(self, seed=None):
        self.state = self.initial_state.copy()
        self.current_step = 0
        # Prédire la qualité initiale
        quality_pred = self._predict_quality(self.state)
        # Ajouter la prédiction à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        return full_state, {}

    def step(self, action):
        # Appliquer l’action
        if action == 1:  # Réduire pollution
            self.state[0] = max(0, self.state[0] - 5.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.2)  # Ammonia
            self.state[8] = max(0, self.state[8] - 0.3)  # Nitrite
            self.state[9] = max(0, self.state[9] - 0.2)  # CO2 (réduit)
        elif action == 2:  # Augmenter débit
            self.state[3] = min(500, self.state[3] + 10.0)  # Alkalinity
            self.state[0] = min(100, self.state[0] + 2.0)  # Turbidity
            self.state[5] = max(0, self.state[5] - 0.1)  # Ammonia
            self.state[6] = max(0, self.state[6] - 0.05)  # Phosphorus
            self.state[8] = max(0, self.state[8] - 0.15)  # Nitrite
            self.state[9] = min(15, self.state[9] + 0.8)  # CO2 (augmenté)
            self.state[2] = min(15, self.state[2] + 0.8)  # Augmenter DO (augmenté)

        # Simuler les effets environnementaux
        self.state[1] += 0.1  # Temp augmente
        self.state[0] += 1.0  # Turbidity augmente
        self.state[5] += 0.01  # Ammonia augmente
        self.state[6] += 0.01  # Phosphorus augmente
        self.state[7] += 0.5 * self.state[6]  # Plankton augmente
        self.state[8] += 0.005  # Nitrite augmente
        self.state[9] += 0.2  # CO2 augmente
        self.state[2] = max(0, self.state[2] - 0.0001 * self.state[0] - 0.001 * self.state[1] - 0.002 * self.state[5] - 0.000005 * self.state[7] - 0.002 * self.state[8] - 0.0005 * self.state[9])  # Réduction de l’impact sur DO
        self.state[4] = max(0, min(14, self.state[4] + 0.01 * (self.state[3] / 500) - 0.01 * (self.state[5] + self.state[8]) - 0.02 * self.state[9]))  # pH

        # Calculer la récompense de manière progressive
        criteria_met = 0
        if self.state[2] > 4.5:  # DO
            criteria_met += 1
        if self.state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= self.state[9] <= 9:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= self.state[4] <= 9.0:  # pH
            criteria_met += 1
        if self.state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if self.state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if self.state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= self.state[7] <= 4500:  # Plankton
            criteria_met += 1

        reward = -15 + (30 * criteria_met / 8)  # Échelle de -15 à +15
        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        quality_pred = np.float32(quality)

        # Ajouter la prédiction de qualité à l'état
        full_state = np.append(self.state, quality_pred).astype(np.float32)
        self.current_step += 1
        terminated = bool(self.state[2] <= 0)
        truncated = bool(self.current_step >= self.max_steps)
        return full_state, reward, terminated, truncated, {"predicted_quality": quality_pred}

    def _predict_quality(self, state):
        criteria_met = 0
        if state[2] > 4.5:  # DO
            criteria_met += 1
        if state[0] < 30:  # Turbidity
            criteria_met += 1
        if 2.5 <= state[9] <= 9:  # CO2 (assoupli)
            criteria_met += 1
        if 6.5 <= state[4] <= 9.0:  # pH
            criteria_met += 1
        if state[3] <= 100:  # Alkalinity
            criteria_met += 1
        if state[5] <= 0.05:  # Ammonia
            criteria_met += 1
        if state[8] <= 0.1:  # Nitrite
            criteria_met += 1
        if 3000 <= state[7] <= 4500:  # Plankton
            criteria_met += 1

        if criteria_met == 8:
            quality = 0  # Excellent
        elif criteria_met >= 5:
            quality = 1  # Good
        else:
            quality = 2  # Poor

        return np.float32(quality)

    def render(self):
        print(f"Turbidity: {self.state[0]} cm, Temp: {self.state[1]}°C, DO: {self.state[2]} mg/L, Alkalinity: {self.state[3]} mg/L, pH: {self.state[4]}, Ammonia: {self.state[5]} mg/L, Phosphorus: {self.state[6]} mg/L, Plankton: {self.state[7]} No./L, Nitrite: {self.state[8]} mg/L, CO2: {self.state[9]} mg/L")

In [43]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Vérifier l’environnement
env = AquaticEnv()
check_env(env)
print("L’environnement est valide !")

# Entraîner le modèle PPO avec des hyperparamètres ajustés
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    ent_coef=0.02,  # Augmenter l’exploration
    clip_range=0.1,  # Réduire le clip range pour des mises à jour plus conservatrices
    learning_rate=0.0003
)
model.learn(total_timesteps=500_000)
model.save("aqua_ppo_hybrid_model")
print("Entraînement terminé.")

# Tester le modèle
model = PPO.load("aqua_ppo_hybrid_model")
obs, info = env.reset()
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    env.render()
    print(f"Qualité prédite : {info['predicted_quality']} (0=Excellent, 1=Good, 2=Poor)")
    done = terminated or truncated

print(f"Récompense totale de l’épisode : {total_reward}")

L’environnement est valide !
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | 150      |
| time/              |          |
|    fps             | 935      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 100         |
|    ep_rew_mean          | 195         |
| time/                   |             |
|    fps                  | 452         |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003333657 |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.1  