# 0. Install Dependencies

In [None]:
%pip install tensorflow
%pip install gymnasium
%pip install keras
%pip install keras-rl2
%pip install stable-baselines3
%pip install tensorboard

# 1. Define the network scheduling environment

In [1]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import numpy as np
import random
import os
from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import plot_results, load_results

%load_ext autoreload
%autoreload 2

In [2]:
from gymnasium import spaces
import numpy as np

class FlightSchedulingEnv(gym.Env):
    def __init__(self, departure_times, arrival_times, max_steps=100):
        super(FlightSchedulingEnv, self).__init__()
        self.number_of_flights = len(departure_times)
        # Définir l'espace d'action et d'observation
        self.action_space = spaces.Discrete(self.number_of_flights*2 + 1) 
        self.observation_space = spaces.Box(
            low=np.zeros(self.number_of_flights), 
            high=np.array([3000 for _ in range(self.number_of_flights)])
        )

        # Initialiser les détails des vols
        self.departure_times = departure_times  # Heures de départ initiales
        self.arrival_times = arrival_times  # Heures d'arrivée initiales

        # Initialiser les contraintes
        self.constraints = {i: [0, 2000] for i in range(self.number_of_flights)}
        
        # Initialiser l'état actuel
        self.current_state = np.array(self.departure_times)

        # Autres paramètres du problème
        self.time_step = 20  # Multiple de 20 minutes
        self.max_steps = max_steps  # Limite d'étapes
        self.current_step = 0

    def reset(self, seed = None, options = None):
        super().reset(seed=seed)
        self.current_state = np.array(self.departure_times)
        self.current_step = 0
        return self.current_state, {}

    def step(self, action):
        old_revenue = self.calculate_revenue()
        # Mettez à jour l'état en déplaçant le vol correspondant
        if action == self.number_of_flights * 2:
            pass
        else:
            if action % 2 == 0:
            # Si l'action est paire, ajoutez self.time_step
                new_departure_time = self.current_state[action // 2] + self.time_step
            else:
            # Si l'action est impaire, soustrayez self.time_step
                new_departure_time = self.current_state[action // 2] - self.time_step
            
            # Assurer que les heures de départ respectent les contraintes
            if self.respect_constraints(action // 2, new_departure_time):
                self.current_state[action // 2] = new_departure_time
          
        # Calculer le revenu du nouveau planning
        new_revenue = self.calculate_revenue()
        # Calculer la récompense comme la variation de revenu
        reward = new_revenue - old_revenue

        # Vérifier si la limite d'étapes est atteinte
        done = self.current_step == self.max_steps

        # Mettre à jour le nombre d'étapes
        self.current_step += 1

        # Retourner l'observation, la récompense, l'état terminal et des informations supplémentaires
        return self.current_state.copy(), reward, done, False, {}

    def calculate_revenue(self):
        # Appeler la fonction existante pour calculer le revenu du planning actuel
        return np.sum(self.current_state)
    
    def respect_constraints(self, flight_number, departure_time): 
        min_departure, max_departure = self.constraints[flight_number]
        return min_departure <= departure_time <= max_departure

# 2. Random policy

In [3]:
departure_times = [8 * 60, 9 * 60, 10 * 60, 12 * 60, 14 * 60]  # Heures de départ initiales
arrival_times = [10 * 60, 11 * 60, 12 * 60, 14 * 60, 16 * 60]  # Heures d'arrivée initiales
# Exemple d'utilisation de l'environnement

env = FlightSchedulingEnv(departure_times, arrival_times, max_steps=1000)

# Réinitialiser l'environnement
obs = env.reset()

# Exemple d'itération sur les étapes
for _ in range(10):
    action = env.action_space.sample()  # Action aléatoire
    obs, reward, done, truncated, _ = env.step(action)
    print("Observation:", obs, "Reward:", reward)

env.close()

Observation: [460 540 600 720 840] Reward: -20
Observation: [460 540 600 720 820] Reward: -20
Observation: [460 540 620 720 820] Reward: 20
Observation: [460 540 620 720 840] Reward: 20
Observation: [440 540 620 720 840] Reward: -20
Observation: [440 520 620 720 840] Reward: -20
Observation: [440 520 620 720 840] Reward: 0
Observation: [420 520 620 720 840] Reward: -20
Observation: [420 520 620 700 840] Reward: -20
Observation: [420 520 620 700 840] Reward: 0


# 3. Learn a policy using RL

In [4]:
# Créer une instance de l'environnement personnalisé
#departure_times = [8 * 60]  # Heures de départ initiales
#arrival_times = [10 * 60]  # Heures d'arrivée initiales
departure_times = [8 * 60, 9 * 60, 10 * 60, 12 * 60, 14 * 60]  # Heures de départ initiales
arrival_times = [10 * 60, 11 * 60, 12 * 60, 14 * 60, 16 * 60]  # Heures d'arrivée initiales
#departure_times = [8 * 60 for _ in range(20)]  # Heures de départ initiales
#arrival_times = [10 * 60 for _ in range(20)]  # Heures d'arrivée initiales


In [8]:
env = FlightSchedulingEnv(departure_times=departure_times, arrival_times=arrival_times, max_steps=1000)
env = DummyVecEnv([lambda: env])

#model = A2C("MlpPolicy", env, verbose=1)
model = PPO("MlpPolicy", env, verbose=1)
#model = DQN("MlpPolicy", env, verbose=1)
# Entraîner le modèle sur un certain nombre d'itérations
model.learn(total_timesteps=100000)

# Tester le modèle entraîné
obs = env.reset()
for _ in range(3):
    obs = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, _ = env.step(action)
        print(obs)
        print(reward)
env.close()

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1693 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1146        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.019606844 |
|    clip_fraction        | 0.464       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.38       |
|    explained_variance   | 0.000933    |
|    learning_rate        | 0.0003      |
|    loss                 | 874         |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0576     |
|    value_loss           | 2.7e+03     |
-----------------------------------------
-----------------

In [12]:
for _ in range(1):
    obs = env.reset()
    total_reward = 0
    done = False
    while not done:
        action, _ = model.predict(obs)
        #action = [0]
        obs, reward, done, _ = env.step(action)
        #print('action : ', action[0])
        print('obs : ', obs[0])
        #print('reward : ', reward[0])
        total_reward += reward
    print(total_reward)
env.close()

obs :  [480. 540. 600. 740. 840.]
obs :  [480. 540. 600. 740. 840.]
obs :  [480. 540. 600. 740. 860.]
obs :  [500. 540. 600. 740. 860.]
obs :  [500. 560. 600. 740. 860.]
obs :  [520. 560. 600. 740. 860.]
obs :  [520. 560. 600. 740. 880.]
obs :  [540. 560. 600. 740. 880.]
obs :  [540. 560. 600. 740. 880.]
obs :  [540. 580. 600. 740. 880.]
obs :  [540. 600. 600. 740. 880.]
obs :  [540. 620. 600. 740. 880.]
obs :  [540. 620. 620. 740. 880.]
obs :  [560. 620. 620. 740. 880.]
obs :  [580. 620. 620. 740. 880.]
obs :  [580. 620. 620. 740. 880.]
obs :  [580. 640. 620. 740. 880.]
obs :  [580. 640. 620. 740. 900.]
obs :  [580. 640. 620. 740. 900.]
obs :  [600. 640. 620. 740. 900.]
obs :  [600. 660. 620. 740. 900.]
obs :  [620. 660. 620. 740. 900.]
obs :  [620. 660. 620. 740. 900.]
obs :  [620. 660. 620. 740. 900.]
obs :  [640. 660. 620. 740. 900.]
obs :  [640. 660. 620. 760. 900.]
obs :  [640. 660. 620. 760. 920.]
obs :  [640. 660. 620. 760. 920.]
obs :  [640. 680. 620. 760. 920.]
obs :  [660. 6

# 3. Viz

In [43]:
from stable_baselines3.common.monitor import Monitor
env = Monitor(
    FlightSchedulingEnv(
        departure_times=departure_times, 
        arrival_times=arrival_times, 
        max_steps=1000
    ),
    'logs/'
)

In [44]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="logs/")
model.learn(total_timesteps=100000)

Using cpu device
Wrapping the env in a DummyVecEnv.
Logging to logs/PPO_6
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 200      |
| time/              |          |
|    fps             | 2165     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 200         |
| time/                   |             |
|    fps                  | 1430        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.018792044 |
|    clip_fraction        | 0.489       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.38       

<stable_baselines3.ppo.ppo.PPO at 0x1ffddb7b310>

In [45]:
for _ in range(10):
    obs = env.reset()[0]
    total_reward = 0
    done = False
    while not done:
        action, _ = model.predict(obs)
        #action = [0]
        obs, reward, done, _, _ = env.step(action)
        #print('action : ', action)
        #print('obs : ', obs)
        #print('reward : ', reward)
        total_reward += reward
    print(total_reward)
env.close()

6820
6820
6820
6820
6820
6820
6820
6820
6820
6820


In [10]:
%tensorboard --logdir=logs/

'tensorboard' is not recognized as an internal or external command,
operable program or batch file.


# 4. Revenue estimation

In [83]:
import random
import pandas as pd
from datetime import datetime, timedelta

def generate_random_flight_schedule(N):
    flight_schedule = []

    # Définir une plage horaire pour les vols (par exemple, une journée)
    start_time = datetime.strptime("08:00", "%H:%M")
    end_time = datetime.strptime("22:00", "%H:%M")

    for _ in range(N):
        # Générer des horaires de départ et d'arrivée aléatoires
        departure_time = start_time + timedelta(minutes=random.randint(0, (end_time - start_time).seconds // 60))
        arrival_time = departure_time + timedelta(minutes=random.randint(30, 40))  # Exemple : vol de 30 à 240 minutes

        # Ajouter le vol à la liste
        flight_schedule.append((departure_time, arrival_time, random.choice([-1, 1])))

    # Trier la liste par horaire de départ
    flight_schedule.sort(key=lambda x: x[0])

    return pd.DataFrame(flight_schedule, columns=['departure', 'arrival', 'way'])

# Exemple : Générer un planning de départ aléatoire avec 5 vols
random_schedule = generate_random_flight_schedule(2)
random_schedule

Unnamed: 0,departure,arrival,way
0,1900-01-01 13:43:00,1900-01-01 14:17:00,-1
1,1900-01-01 15:35:00,1900-01-01 16:12:00,1


In [105]:
# Créer un dictionnaire pour stocker les valeurs aléatoires pour chaque paire de vols de sens opposé
lambdas = {}

# Parcourir le DataFrame pour chaque vol de départ
for i, departure_flight in random_schedule[random_schedule['way'] == 1].iterrows():
    # Sélectionner tous les vols d'arrivée de sens opposé
    arrival_flights = random_schedule[(random_schedule['way'] == -1) & (random_schedule.index != i)]

    # Associer une valeur aléatoire pour chaque paire de vols
    for _, arrival_flight in arrival_flights.iterrows():
        lambdas[(departure_flight.name, arrival_flight.name)] = random.randint(0, 100)

In [92]:
import pandas as pd
from datetime import timedelta

def calculate_feasible_connections(df, min_connection_time, max_connection_time):
    # Créer une copie du DataFrame pour les départs du hub
    departures = df[df['way'] == 1].copy()

    # Créer une copie du DataFrame pour les arrivées au hub
    arrivals = df[df['way'] == -1].copy()

    # Initialiser un DataFrame pour les connexions faisables
    feasible_connections = pd.DataFrame(columns=['departure_flight', 'arrival_flight', 'cnx_time'])

    # Parcourir tous les vols de départ
    for i in range(len(arrivals)):
        arrival_flight = arrivals.iloc[i]
        
        # Filtrer les vols d'arrivée qui ont un sens opposé et un temps de connexion faisable
        possible_departures = departures[
            (departures['departure'] >= arrival_flight['arrival'] + timedelta(minutes=min_connection_time)) &
            (departures['departure'] <= arrival_flight['arrival'] + timedelta(minutes=max_connection_time))
        ]

        # Ajouter les connexions faisables au DataFrame
        for _, departure_flight in possible_departures.iterrows():
            connection_time = departure_flight['departure'] - arrival_flight['arrival']
            connection_info = {
                'departure_flight': departure_flight.name,
                'arrival_flight': arrival_flight.name,
                'cnx_time': connection_time,
                'cnx_time_min' : int(connection_time.total_seconds() / 60)
            }
            feasible_connections = pd.concat([feasible_connections, pd.DataFrame([connection_info])], ignore_index=True)

    return feasible_connections

min_connection_time = 60
max_connection_time = 480

connections = calculate_feasible_connections(random_schedule, min_connection_time, max_connection_time)
print(connections)

  departure_flight arrival_flight        cnx_time  cnx_time_min
0                1              0 0 days 01:18:00          78.0


In [94]:
def preference_curve(x):
    if 60 <= x <= 120:
        return 0.5 + 0.5 * (x - 60) / 60
    elif 120 < x < 480:
        return 1 - 0.5 * (x - 120) / 360
    else:
        return 0
    
connections['z'] = connections['cnx_time_min'].apply(lambda x : preference_curve(x))

In [95]:
connections['lambdas'] = connections.apply(lambda x : lambdas[(x.departure_flight, x.arrival_flight)], axis=1)
connections['revenue'] = connections['z'] * connections['lambdas']

Unnamed: 0,departure_flight,arrival_flight,cnx_time,cnx_time_min,z
0,1,0,0 days 01:18:00,78.0,0.65


In [None]:
def calculate_revenue(self):
    flights = self.current_state
    connections = calculate_feasible_connections()