# Allocation équitable des ressources en edge computing (RL)

Ce notebook présente un **benchmark minimal et reproductible** de plusieurs techniques d'allocation de ressources (aléatoire, heuristique, DRL) dans un environnement d'edge computing. L'objectif est d'optimiser la **latence moyenne** tout en améliorant l'**équité inter-utilisateurs**.

**Techniques évaluées :**
- Politique aléatoire (baseline)
- Heuristique `min-latence` (baseline)
- DQN (Stable-Baselines3)
- PPO (Stable-Baselines3)


## 1. Imports et configuration

In [None]:
import math
import random
from dataclasses import dataclass
from typing import Dict, List, Tuple

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
import torch

from stable_baselines3 import DQN, PPO

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)


## 2. Environnement Edge Computing (simplifié)
L'environnement simule un flux de tâches hétérogènes. À chaque pas, l'agent choisit d'exécuter la tâche sur un serveur **MEC** (proche, rapide mais limité) ou dans le **Cloud** (puissant mais plus distant).

**Objectifs :**
- Minimiser la latence
- Respecter les deadlines
- Maintenir l'équité inter-utilisateurs


In [None]:
@dataclass
class Task:
    user_type: str
    workload: float  # millions d'instructions
    data_size: float  # MB
    deadline: float  # ms
    priority: int


class EdgeResourceEnv(gym.Env):
    metadata = {"render.modes": ["ansi"]}

    def __init__(self, episode_length: int = 200):
        super().__init__()
        self.episode_length = episode_length
        self.step_count = 0

        # Action: 0 = MEC, 1 = Cloud
        self.action_space = spaces.Discrete(2)

        # Observation: [mec_load, cloud_load, workload, data_size, deadline, priority, fairness]
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 1, 1, 0], dtype=np.float32),
            high=np.array([1, 1, 5000, 200, 500, 5, 1], dtype=np.float32),
            dtype=np.float32,
        )

        self.user_types = ["IoT", "Mobile", "Vehicle", "AR_VR", "Industrial"]
        self.user_weights = [0.4, 0.3, 0.15, 0.1, 0.05]
        self.satisfaction: Dict[str, List[int]] = {u: [] for u in self.user_types}

        self.mec_capacity = 100  # ressources arbitraires
        self.cloud_capacity = 500
        self.mec_load = 0.2
        self.cloud_load = 0.4
        self.current_task: Task | None = None

    def _sample_task(self) -> Task:
        user_type = np.random.choice(self.user_types, p=self.user_weights)
        profiles = {
            "IoT": (200, 1.0, 150, 2),
            "Mobile": (800, 5.0, 120, 3),
            "Vehicle": (3000, 30.0, 30, 5),
            "AR_VR": (2000, 50.0, 40, 4),
            "Industrial": (4000, 80.0, 80, 5),
        }
        workload, data_size, deadline, priority = profiles[user_type]
        return Task(
            user_type=user_type,
            workload=workload * np.random.uniform(0.8, 1.2),
            data_size=data_size * np.random.uniform(0.7, 1.3),
            deadline=deadline * np.random.uniform(0.8, 1.2),
            priority=priority,
        )

    def _estimate_latency(self, action: int, task: Task) -> float:
        # Latence réseau + traitement
        if action == 0:  # MEC
            net = 5 + 30 * self.mec_load
            proc = (task.workload / (self.mec_capacity * (1 - self.mec_load)))
        else:  # Cloud
            net = 25 + 40 * self.cloud_load
            proc = (task.workload / (self.cloud_capacity * (1 - self.cloud_load)))
        data_delay = task.data_size / 5.0
        return net + proc + data_delay

    def _jain_fairness(self) -> float:
        values = []
        for user_type in self.user_types:
            if self.satisfaction[user_type]:
                values.append(np.mean(self.satisfaction[user_type]))
        if not values:
            return 0.0
        values = np.array(values)
        return (values.sum() ** 2) / (len(values) * (values ** 2).sum() + 1e-6)

    def _get_obs(self) -> np.ndarray:
        task = self.current_task
        fairness = self._jain_fairness()
        return np.array(
            [
                self.mec_load,
                self.cloud_load,
                task.workload,
                task.data_size,
                task.deadline,
                task.priority,
                fairness,
            ],
            dtype=np.float32,
        )

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.step_count = 0
        self.mec_load = 0.2
        self.cloud_load = 0.4
        self.satisfaction = {u: [] for u in self.user_types}
        self.current_task = self._sample_task()
        return self._get_obs(), {}

    def step(self, action):
        task = self.current_task
        latency = self._estimate_latency(action, task)
        success = latency <= task.deadline

        # Reward: pénalise la latence et les échecs, encourage l'équité
        reward = -latency / 100.0
        if not success:
            reward -= 2.0
        else:
            reward += 1.0

        self.satisfaction[task.user_type].append(int(success))
        fairness = self._jain_fairness()
        reward += 0.5 * fairness

        # Mise à jour des charges
        load_delta = 0.03 if action == 0 else 0.02
        if action == 0:
            self.mec_load = np.clip(self.mec_load + load_delta, 0.05, 0.95)
            self.cloud_load = np.clip(self.cloud_load - 0.01, 0.1, 0.9)
        else:
            self.cloud_load = np.clip(self.cloud_load + load_delta, 0.1, 0.95)
            self.mec_load = np.clip(self.mec_load - 0.01, 0.05, 0.9)

        self.step_count += 1
        terminated = self.step_count >= self.episode_length
        self.current_task = self._sample_task()
        info = {"latency": latency, "success": success, "fairness": fairness}
        return self._get_obs(), reward, terminated, False, info


## 3. Baselines et évaluation

In [None]:
def random_policy(_obs):
    return np.random.randint(0, 2)


def heuristic_policy(obs):
    mec_load, cloud_load, workload, data_size, deadline, priority, fairness = obs
    # Choix MEC si la latence estimée est inférieure au seuil et charge modérée
    mec_latency = 5 + 30 * mec_load + workload / (100 * (1 - mec_load)) + data_size / 5.0
    cloud_latency = 25 + 40 * cloud_load + workload / (500 * (1 - cloud_load)) + data_size / 5.0
    if mec_latency <= deadline and mec_load < 0.8:
        return 0
    return 1 if cloud_latency < mec_latency else 0


def evaluate_policy(env, policy_fn, episodes=30):
    metrics = {"latency": [], "success": [], "fairness": []}
    for _ in range(episodes):
        obs, _ = env.reset()
        done = False
        while not done:
            action = policy_fn(obs)
            obs, reward, done, _, info = env.step(action)
            metrics["latency"].append(info["latency"])
            metrics["success"].append(info["success"])
            metrics["fairness"].append(info["fairness"])
    return {
        "latency_ms": float(np.mean(metrics["latency"])),
        "success_rate": float(np.mean(metrics["success"])),
        "fairness": float(np.mean(metrics["fairness"])),
    }


## 4. Entraînement DRL (DQN / PPO)

In [None]:
env = EdgeResourceEnv()

dqn_model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=3e-4,
    buffer_size=5000,
    batch_size=64,
    gamma=0.98,
    verbose=0,
    seed=SEED,
)
dqn_model.learn(total_timesteps=5000)

ppo_model = PPO(
    policy="MlpPolicy",
    env=env,
    learning_rate=3e-4,
    n_steps=512,
    gamma=0.98,
    verbose=0,
    seed=SEED,
)
ppo_model.learn(total_timesteps=5000)


## 5. Benchmark comparatif

In [None]:
eval_env = EdgeResourceEnv()

results = []
results.append({"method": "Random", **evaluate_policy(eval_env, random_policy)})
results.append({"method": "Heuristic", **evaluate_policy(eval_env, heuristic_policy)})
results.append({"method": "DQN", **evaluate_policy(eval_env, lambda obs: dqn_model.predict(obs, deterministic=True)[0])})
results.append({"method": "PPO", **evaluate_policy(eval_env, lambda obs: ppo_model.predict(obs, deterministic=True)[0])})

pd.DataFrame(results).sort_values(by="latency_ms")


## 6. Conclusion (à compléter)
- Analysez le compromis **latence / équité / robustesse**
- Discutez l'impact de la charge MEC vs Cloud
- Proposez des pistes d'amélioration (multi-agent, priorités, contraintes énergétiques)
