<a href="https://colab.research.google.com/github/alirezakavianifar/gitTutorial/blob/developer/RLProject4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gymnasium stable-baselines3

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_cu

In [2]:
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.logger import configure
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
import os

class HealthcareNetworkEnv(gym.Env):
    def __init__(self, H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances):
        super(HealthcareNetworkEnv, self).__init__()

        # Define constants
        self.H = H  # Number of hospitals
        self.P = P  # Number of products
        self.R = R  # Number of suppliers
        self.T = T  # Number of periods
        self.LeadTime = LeadTime  # Lead time for orders, array of shape (P,)

        # Costs
        self.transport_costs = transport_costs  # Cost of transporting products from suppliers to hospitals
        self.transshipment_costs = transshipment_costs  # Cost of transshipment between hospitals
        self.inventory_costs = inventory_costs  # Holding costs for inventory
        self.ordering_costs = ordering_costs  # Ordering costs

        # Coverage and distances
        self.coverage_distance = coverage_distance  # Maximum allowed distance for transshipments
        self.hospital_distances = hospital_distances  # Matrix of distances between hospitals, shape (H, H)

        # Define state space
        self.observation_space = spaces.Dict({
            'inventory': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'demand': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'supply_capacity': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'lead_time': spaces.Box(low=0, high=np.inf, shape=(P,), dtype=np.float32)
        })

        # Define action space as MultiDiscrete
        self.action_space = spaces.MultiDiscrete([10] * (H * P * R + H * H * P))

        # Initialize state
        self.state = self.reset()

    def reset(self, seed=None):
        self.state = {
            'inventory': np.zeros((self.H, self.P), dtype=np.float32),
            'demand': np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32),
            'supply_capacity': np.ones((self.H, self.P), dtype=np.float32),
            'lead_time': self.LeadTime.astype(np.float32)
        }
        return self.state, {}

    def step(self, action):
        # Extract order and transship actions from the MultiDiscrete action
        order_action_size = self.H * self.P * self.R
        order = np.array(action[:order_action_size]).reshape((self.H, self.P, self.R))
        transship = np.array(action[order_action_size:]).reshape((self.H, self.H, self.P))

        # Update inventory levels based on orders and transshipments
        self._update_inventory(order, transship)

        # Calculate rewards based on demand satisfaction and costs
        reward, demand_loss, costs = self._calculate_reward(order, transship)

        # Create a new state based on the updated inventory and new demand
        self._update_demand()
        next_state = self.state

        # Check if the episode is done (end of time period)
        done = self._check_done()

        return next_state, reward, False, done, {}

    def _update_inventory(self, order, transship):
        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    # Constraint: Capacity Constraint in Hospital h
                    supply_received = order[h, p, r]
                    capacity_available = self.state['supply_capacity'][h, p]
                    supply_received = min(supply_received, capacity_available)
                    self.state['inventory'][h, p] += supply_received

        # Process transshipments
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        if transship[h1, h2, p] > 0:
                            transfer_quantity = min(transship[h1, h2, p], self.state['inventory'][h1, p])
                            self.state['inventory'][h1, p] -= transfer_quantity
                            self.state['inventory'][h2, p] += transfer_quantity

        # Ensure inventory levels are non-negative
        self.state['inventory'] = np.maximum(self.state['inventory'], 0)

    def _calculate_reward(self, order, transship):
        reward = 0
        demand_loss = 0
        total_costs = 0

        epsilon_p = 0.01  # Small allowance

        # Calculate costs
        transport_cost = 0
        transshipment_cost = 0
        inventory_cost = 0
        ordering_cost = 0
        shortage_cost = 0

        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    ordered_quantity = order[h, p, r]
                    received_quantity = self.state['inventory'][h, p]

                    if received_quantity < ordered_quantity * (1 + epsilon_p):
                        demand_loss += (ordered_quantity * (1 + epsilon_p) - received_quantity)
                    elif received_quantity > ordered_quantity * (1 + epsilon_p):
                        demand_loss += (received_quantity - ordered_quantity * (1 + epsilon_p))
                    else:
                        reward += received_quantity

                    # Add transport cost
                    transport_cost += ordered_quantity * self.transport_costs[r, h, p]

                # Add inventory holding cost
                inventory_cost += self.state['inventory'][h, p] * self.inventory_costs[h, p]

                # Add shortage cost
                shortage = max(self.state['demand'][h, p] - self.state['inventory'][h, p], 0)
                shortage_cost += shortage  # Assuming shortage cost per unit is 1 for simplicity

        # Calculate transshipment costs
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        transshipment_quantity = transship[h1, h2, p]
                        transshipment_cost += transshipment_quantity * self.transshipment_costs[h1, h2, p]

        # Calculate ordering costs
        for h in range(self.H):
            for p in range(self.P):
                ordering_cost += np.sum(order[h, p, :]) * self.ordering_costs[p, h]

        # Calculate total costs
        total_costs = transport_cost + transshipment_cost + inventory_cost + ordering_cost + shortage_cost

        # Define the reward function: reward is negative of total costs
        reward = -total_costs

        return reward, demand_loss, total_costs

    def _update_demand(self):
        self.state['demand'] = np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32)

    def _check_done(self):
        return False

# Define parameters
H = 5  # Number of hospitals
P = 3  # Number of products
R = 1  # Number of suppliers
T = 10  # Number of periods
LeadTime = np.array([1, 2, 3])  # Different lead times for each product

# Define costs (for example purposes, using random values)
transport_costs = np.random.rand(R, H, P)
transshipment_costs = np.random.rand(H, H, P)
inventory_costs = np.random.rand(H, P)
ordering_costs = np.random.rand(P, H)

# Define coverage distance and hospital distances
coverage_distance = 5.0
hospital_distances = np.random.rand(H, H) * 10  # Random distances between hospitals

# Create the environment
env = HealthcareNetworkEnv(H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances)

# Check the environment
check_env(env)



<div dir="rtl"><h3>
 توضیحات تغییرات کد

در این نسخه دوم کد نسبت به نسخه اول تغییرات و بهبودهایی انجام شده است که به شرح زیر هستند:

#### ۱. افزودن پارامترهای جدید
- **فاصله پوشش (coverage_distance)**: این پارامتر حداکثر فاصله مجاز برای انتقال محصولات بین بیمارستان‌ها را مشخص می‌کند.
- **فاصله بیمارستان‌ها (hospital_distances)**: ماتریسی که فاصله بین بیمارستان‌ها را نشان می‌دهد و ابعاد آن \((H, H)\) است.

#### ۲. تغییرات در مقداردهی اولیه (init)
- **LeadTime**: این پارامتر به جای یک مقدار ثابت، اکنون یک آرایه با طول \(P\) است که نشان‌دهنده زمان تحویل برای هر محصول می‌باشد.
- **تعریف فضای حالت**: فضاهای حالت (state space) و عمل (action space) تغییری نکرده‌اند، اما مقادیر مربوط به lead time در فضای حالت به‌روزرسانی شده‌اند.

#### ۳. تغییر در تابع `reset`
- در `reset`، مقداردهی اولیه lead time به آرایه‌ای از مقادیر تبدیل شده است.

#### ۴. تغییرات در تابع `_update_inventory`
- در تابع `_update_inventory`، انتقال‌ها بین بیمارستان‌ها فقط در صورتی انجام می‌شوند که فاصله بین دو بیمارستان کمتر یا مساوی مقدار تعیین شده در `coverage_distance` باشد.
- مطمئن شدن از اینکه سطوح موجودی منفی نمی‌شوند.

#### ۵. تغییرات در تابع `_calculate_reward`
- اضافه کردن متغیر `epsilon_p` برای ایجاد یک تلورانس کوچک در محاسبه دریافت محصولات.
- محاسبه دقیق‌تر هزینه‌ها:
  - **هزینه حمل و نقل (transport cost)**: با توجه به هزینه‌های حمل و نقل تعریف شده.
  - **هزینه نگهداری موجودی (inventory holding cost)**: با توجه به هزینه‌های نگهداری موجودی.
  - **هزینه کسری (shortage cost)**: فرض شده که هزینه کسری به ازای هر واحد برابر ۱ است.
  - **هزینه سفارش‌دهی (ordering cost)**: با توجه به هزینه‌های سفارش‌دهی.


</h3></div>


<div dir="rtl"><h3>
 قسمت های تغییر یافته

#### ۱. افزودن پارامترهای جدید به `__init__`:
</h3></div>



```python
class HealthcareNetworkEnv(gym.Env):
    def __init__(self, H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances):
        super(HealthcareNetworkEnv, self).__init__()
        
        # Define constants
        self.H = H  # Number of hospitals
        self.P = P  # Number of products
        self.R = R  # Number of suppliers
        self.T = T  # Number of periods
        self.LeadTime = LeadTime  # Lead time for orders, array of shape (P,)
        
        # Costs
        self.transport_costs = transport_costs  # Cost of transporting products from suppliers to hospitals
        self.transshipment_costs = transshipment_costs  # Cost of transshipment between hospitals
        self.inventory_costs = inventory_costs  # Holding costs for inventory
        self.ordering_costs = ordering_costs  # Ordering costs
        
        # Coverage and distances
        self.coverage_distance = coverage_distance  # Maximum allowed distance for transshipments
        self.hospital_distances = hospital_distances  # Matrix of distances between hospitals, shape (H, H)
        
        # Define state space
        self.observation_space = spaces.Dict({
            'inventory': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'demand': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'supply_capacity': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'lead_time': spaces.Box(low=0, high=np.inf, shape=(P,), dtype=np.float32)
        })
        
        # Define action space as MultiDiscrete
        self.action_space = spaces.MultiDiscrete([10] * (H * P * R + H * H * P))
        
        # Initialize state
        self.state = self.reset()
```
<div dir="rtl"><h3>
۲. تغییرات در تابع `reset`:
</h3></div>


```python
def reset(self, seed=None):
    self.state = {
        'inventory': np.zeros((self.H, self.P), dtype=np.float32),
        'demand': np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32),
        'supply_capacity': np.ones((self.H, self.P), dtype=np.float32),
        'lead_time': self.LeadTime.astype(np.float32)  # Update lead time initialization
    }
    return self.state, {}
```
<div dir="rtl"><h3>
۳. تغییرات در تابع `_update_inventory`:
</h3></div>


```python
def _update_inventory(self, order, transship):
    for h in range(self.H):
        for p in range(self.P):
            for r in range(self.R):
                # Constraint: Capacity Constraint in Hospital h
                supply_received = order[h, p, r]
                capacity_available = self.state['supply_capacity'][h, p]
                supply_received = min(supply_received, capacity_available)
                self.state['inventory'][h, p] += supply_received

    # Process transshipments
    for h1 in range(self.H):
        for h2 in range(self.H):
            if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                for p in range(self.P):
                    if transship[h1, h2, p] > 0:
                        transfer_quantity = min(transship[h1, h2, p], self.state['inventory'][h1, p])
                        self.state['inventory'][h1, p] -= transfer_quantity
                        self.state['inventory'][h2, p] += transfer_quantity

    # Ensure inventory levels are non-negative
    self.state['inventory'] = np.maximum(self.state['inventory'], 0)
```
<div dir="rtl"><h3>
۴. تغییرات در تابع `_calculate_reward`:
</h3></div>


```python
def _calculate_reward(self, order, transship):
    reward = 0
    demand_loss = 0
    total_costs = 0

    epsilon_p = 0.01  # Small allowance

    # Calculate costs
    transport_cost = 0
    transshipment_cost = 0
    inventory_cost = 0
    ordering_cost = 0
    shortage_cost = 0

    for h in range(self.H):
        for p in range(self.P):
            for r in range(self.R):
                ordered_quantity = order[h, p, r]
                received_quantity = self.state['inventory'][h, p]

                if received_quantity < ordered_quantity * (1 + epsilon_p):
                    demand_loss += (ordered_quantity * (1 + epsilon_p) - received_quantity)
                elif received_quantity > ordered_quantity * (1 + epsilon_p):
                    demand_loss += (received_quantity - ordered_quantity * (1 + epsilon_p))
                else:
                    reward += received_quantity

                # Add transport cost
                transport_cost += ordered_quantity * self.transport_costs[r, h, p]

            # Add inventory holding cost
            inventory_cost += self.state['inventory'][h, p] * self.inventory_costs[h, p]

            # Add shortage cost
            shortage = max(self.state['demand'][h, p] - self.state['inventory'][h, p], 0)
            shortage_cost += shortage  # Assuming shortage cost per unit is 1 for simplicity

    # Calculate transshipment costs
    for h1 in range(self.H):
        for h2 in range(self.H):
            if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                for p in range(self.P):
                    transshipment_quantity = transship[h1, h2, p]
                    transshipment_cost += transshipment_quantity * self.transshipment_costs[h1, h2, p]

    # Calculate ordering costs
    for h in range(self.H):
        for p in range(self.P):
            ordering_cost += np.sum(order[h, p, :]) * self.ordering_costs[p, h]

    # Calculate total costs
    total_costs = transport_cost + transshipment_cost + inventory_cost + ordering_cost + shortage_cost

    # Define the reward function: reward is negative of total costs
    reward = -total_costs

    return reward, demand_loss, total_costs
```


In [3]:

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.logger import configure
import numpy as np
import os

class HealthcareNetworkEnv(gym.Env):
    def __init__(self, H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances):
        super(HealthcareNetworkEnv, self).__init__()

        # Define constants
        self.H = H  # Number of hospitals
        self.P = P  # Number of products
        self.R = R  # Number of suppliers
        self.T = T  # Number of periods
        self.LeadTime = LeadTime  # Lead time for orders, array of shape (P,)

        # Costs
        self.transport_costs = transport_costs  # Cost of transporting products from suppliers to hospitals
        self.transshipment_costs = transshipment_costs  # Cost of transshipment between hospitals
        self.inventory_costs = inventory_costs  # Holding costs for inventory
        self.ordering_costs = ordering_costs  # Ordering costs

        # Coverage and distances
        self.coverage_distance = coverage_distance  # Maximum allowed distance for transshipments
        self.hospital_distances = hospital_distances  # Matrix of distances between hospitals, shape (H, H)

        # Define state space
        self.observation_space = spaces.Dict({
            'inventory': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'demand': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'supply_capacity': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'lead_time': spaces.Box(low=0, high=np.inf, shape=(P,), dtype=np.float32)
        })

        # Define action space as MultiDiscrete
        self.action_space = spaces.MultiDiscrete([10] * (H * P * R + H * H * P))

        # Initialize state
        self.state = self.reset()

    def reset(self, seed=None):
        self.state = {
            'inventory': np.zeros((self.H, self.P), dtype=np.float32),
            'demand': np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32),
            'supply_capacity': np.ones((self.H, self.P), dtype=np.float32),
            'lead_time': self.LeadTime.astype(np.float32)
        }
        self.orders_in_transit = []  # Initialize orders in transit as an empty list
        self.current_time = 0
        return self.state, {}

    def step(self, action):
        # Extract order and transship actions from the MultiDiscrete action
        order_action_size = self.H * self.P * self.R
        order = np.array(action[:order_action_size]).reshape((self.H, self.P, self.R))
        transship = np.array(action[order_action_size:]).reshape((self.H, self.H, self.P))

        # Update inventory levels based on orders and transshipments
        self._update_inventory(order, transship)

        # Calculate rewards based on demand satisfaction and costs
        reward, demand_loss, costs = self._calculate_reward(order, transship)

        # Create a new state based on the updated inventory and new demand
        self._update_demand()
        next_state = self.state

        # Check if the episode is done (end of time period)
        done = self._check_done()

        self.current_time += 1

        return next_state, reward, False, done, {}

    def _update_inventory(self, order, transship):
        # Process orders received this period
        new_orders_in_transit = []
        for order_info in self.orders_in_transit:
            arrival_time, h, p, quantity = order_info
            if self.current_time >= arrival_time:
                self.state['inventory'][h, p] += quantity
            else:
                new_orders_in_transit.append(order_info)
        self.orders_in_transit = new_orders_in_transit

        # Process new orders
        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    supply_received = order[h, p, r]
                    capacity_available = self.state['supply_capacity'][h, p]
                    supply_received = min(supply_received, capacity_available)
                    arrival_time = self.current_time + self.LeadTime[p]
                    self.orders_in_transit.append((arrival_time, h, p, supply_received))

        # Process transshipments
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        if transship[h1, h2, p] > 0:
                            transfer_quantity = min(transship[h1, h2, p], self.state['inventory'][h1, p])
                            self.state['inventory'][h1, p] -= transfer_quantity
                            self.state['inventory'][h2, p] += transfer_quantity

        # Ensure inventory levels are non-negative
        self.state['inventory'] = np.maximum(self.state['inventory'], 0)

    def _calculate_reward(self, order, transship):
        reward = 0
        demand_loss = 0
        total_costs = 0

        epsilon_p = 0.01  # Small allowance

        # Calculate costs
        transport_cost = 0
        transshipment_cost = 0
        inventory_cost = 0
        ordering_cost = 0
        shortage_cost = 0

        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    ordered_quantity = order[h, p, r]
                    received_quantity = self.state['inventory'][h, p]

                    if received_quantity < ordered_quantity * (1 + epsilon_p):
                        demand_loss += (ordered_quantity * (1 + epsilon_p) - received_quantity)
                    elif received_quantity > ordered_quantity * (1 + epsilon_p):
                        demand_loss += (received_quantity - ordered_quantity * (1 + epsilon_p))
                    else:
                        reward += received_quantity

                    # Add transport cost
                    transport_cost += ordered_quantity * self.transport_costs[r, h, p]

                # Add inventory holding cost
                inventory_cost += self.state['inventory'][h, p] * self.inventory_costs[h, p]

                # Add shortage cost
                shortage = max(self.state['demand'][h, p] - self.state['inventory'][h, p], 0)
                shortage_cost += shortage  # Assuming shortage cost per unit is 1 for simplicity

        # Calculate transshipment costs
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        transshipment_quantity = transship[h1, h2, p]
                        transshipment_cost += transshipment_quantity * self.transshipment_costs[h1, h2, p]

        # Calculate ordering costs
        for h in range(self.H):
            for p in range(self.P):
                ordering_cost += np.sum(order[h, p, :]) * self.ordering_costs[p, h]

        # Calculate total costs
        total_costs = transport_cost + transshipment_cost + inventory_cost + ordering_cost + shortage_cost

        # Define the reward function: reward is negative of total costs
        reward = -total_costs

        return reward, demand_loss, total_costs

    def _update_demand(self):
        self.state['demand'] = np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32)

    def _check_done(self):
        return False

# Define parameters
H = 5  # Number of hospitals
P = 3  # Number of products
R = 1  # Number of suppliers
T = 10  # Number of periods
LeadTime = np.array([1, 2, 3])  # Different lead times for each product

# Define costs (for example purposes, using random values)
transport_costs = np.random.rand(R, H, P)
transshipment_costs = np.random.rand(H, H, P)
inventory_costs = np.random.rand(H, P)
ordering_costs = np.random.rand(P, H)

# Define coverage distance and hospital distances
coverage_distance = 5.0
hospital_distances = np.random.rand(H, H) * 10  # Random distances between hospitals

# Create the environment
env = HealthcareNetworkEnv(H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances)

# Check the environment
check_env(env)

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.logger import configure
import os

vec_env = DummyVecEnv([lambda: Monitor(env)])

# Set up the logging directory
log_dir = "./ppo_healthcare_logs/"
os.makedirs(log_dir, exist_ok=True)
new_logger = configure(log_dir, ["stdout", "csv", "tensorboard"])

# Define the evaluation environment and callback
eval_env = HealthcareNetworkEnv(H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances)
eval_callback = EvalCallback(eval_env, best_model_save_path=log_dir,
                             log_path=log_dir, eval_freq=500,
                             deterministic=True, render=False)

# Create the model and set the logger
model = PPO('MultiInputPolicy', vec_env, verbose=1)
model.set_logger(new_logger)

# Train the model
model.learn(total_timesteps=1000, callback=eval_callback)

# Save the model
model.save("ppo_healthcare_model")

# Load the model
model = PPO.load("ppo_healthcare_model", vec_env)




Logging to ./ppo_healthcare_logs/
Using cuda device




In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Load the reward data
results = pd.read_csv(os.path.join(log_dir, "progress.csv"))

# Plot the mean reward over time
plt.figure(figsize=(12, 6))
plt.plot(results["time/total_timesteps"], results["rollout/ep_rew_mean"])
plt.xlabel("Timesteps")
plt.ylabel("Mean Reward")
plt.title("Reward Convergence")
plt.show()
