<a href="https://colab.research.google.com/github/alirezakavianifar/gitTutorial/blob/developer/RLProject3_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.logger import configure
import numpy as np
import os

class HealthcareNetworkEnv(gym.Env):
    def __init__(self, H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances):
        super(HealthcareNetworkEnv, self).__init__()

        # Define constants
        self.H = H  # Number of hospitals
        self.P = P  # Number of products
        self.R = R  # Number of suppliers
        self.T = T  # Number of periods
        self.LeadTime = LeadTime  # Lead time for orders, array of shape (P,)

        # Costs
        self.transport_costs = transport_costs  # Cost of transporting products from suppliers to hospitals
        self.transshipment_costs = transshipment_costs  # Cost of transshipment between hospitals
        self.inventory_costs = inventory_costs  # Holding costs for inventory
        self.ordering_costs = ordering_costs  # Ordering costs

        # Coverage and distances
        self.coverage_distance = coverage_distance  # Maximum allowed distance for transshipments
        self.hospital_distances = hospital_distances  # Matrix of distances between hospitals, shape (H, H)

        # Define state space
        self.observation_space = spaces.Dict({
            'inventory': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'demand': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'supply_capacity': spaces.Box(low=0, high=np.inf, shape=(H, P), dtype=np.float32),
            'lead_time': spaces.Box(low=0, high=np.inf, shape=(P,), dtype=np.float32)
        })

        # Define action space as MultiDiscrete
        self.action_space = spaces.MultiDiscrete([10] * (H * P * R + H * H * P))

        # Initialize state
        self.state = self.reset()

    def reset(self, seed=None):
        self.state = {
            'inventory': np.zeros((self.H, self.P), dtype=np.float32),
            'demand': np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32),
            'supply_capacity': np.ones((self.H, self.P), dtype=np.float32),
            'lead_time': self.LeadTime.astype(np.float32)
        }
        self.orders_in_transit = []  # Initialize orders in transit as an empty list
        self.current_time = 0
        return self.state, {}

    def step(self, action):
        # Extract order and transship actions from the MultiDiscrete action
        order_action_size = self.H * self.P * self.R
        order = np.array(action[:order_action_size]).reshape((self.H, self.P, self.R))
        transship = np.array(action[order_action_size:]).reshape((self.H, self.H, self.P))

        # Update inventory levels based on orders and transshipments
        self._update_inventory(order, transship)

        # Calculate rewards based on demand satisfaction and costs
        reward, demand_loss, costs = self._calculate_reward(order, transship)

        # Create a new state based on the updated inventory and new demand
        self._update_demand()
        next_state = self.state

        # Check if the episode is done (end of time period)
        done = self._check_done()

        self.current_time += 1

        return next_state, reward, False, done, {}

    def _update_inventory(self, order, transship):
        # Process orders received this period
        new_orders_in_transit = []
        for order_info in self.orders_in_transit:
            arrival_time, h, p, quantity = order_info
            if self.current_time >= arrival_time:
                self.state['inventory'][h, p] += quantity
            else:
                new_orders_in_transit.append(order_info)
        self.orders_in_transit = new_orders_in_transit

        # Process new orders
        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    supply_received = order[h, p, r]
                    capacity_available = self.state['supply_capacity'][h, p]
                    supply_received = min(supply_received, capacity_available)
                    arrival_time = self.current_time + self.LeadTime[p]
                    self.orders_in_transit.append((arrival_time, h, p, supply_received))

        # Process transshipments
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        if transship[h1, h2, p] > 0:
                            transfer_quantity = min(transship[h1, h2, p], self.state['inventory'][h1, p])
                            self.state['inventory'][h1, p] -= transfer_quantity
                            self.state['inventory'][h2, p] += transfer_quantity

        # Ensure inventory levels are non-negative
        self.state['inventory'] = np.maximum(self.state['inventory'], 0)

    def _calculate_reward(self, order, transship):
        reward = 0
        demand_loss = 0
        total_costs = 0

        epsilon_p = 0.01  # Small allowance

        # Calculate costs
        transport_cost = 0
        transshipment_cost = 0
        inventory_cost = 0
        ordering_cost = 0
        shortage_cost = 0

        for h in range(self.H):
            for p in range(self.P):
                for r in range(self.R):
                    ordered_quantity = order[h, p, r]
                    received_quantity = self.state['inventory'][h, p]

                    if received_quantity < ordered_quantity * (1 + epsilon_p):
                        demand_loss += (ordered_quantity * (1 + epsilon_p) - received_quantity)
                    elif received_quantity > ordered_quantity * (1 + epsilon_p):
                        demand_loss += (received_quantity - ordered_quantity * (1 + epsilon_p))
                    else:
                        reward += received_quantity

                    # Add transport cost
                    transport_cost += ordered_quantity * self.transport_costs[r, h, p]

                # Add inventory holding cost
                inventory_cost += self.state['inventory'][h, p] * self.inventory_costs[h, p]

                # Add shortage cost
                shortage = max(self.state['demand'][h, p] - self.state['inventory'][h, p], 0)
                shortage_cost += shortage  # Assuming shortage cost per unit is 1 for simplicity

        # Calculate transshipment costs
        for h1 in range(self.H):
            for h2 in range(self.H):
                if h1 != h2 and self.hospital_distances[h1, h2] <= self.coverage_distance:
                    for p in range(self.P):
                        transshipment_quantity = transship[h1, h2, p]
                        transshipment_cost += transshipment_quantity * self.transshipment_costs[h1, h2, p]

        # Calculate ordering costs
        for h in range(self.H):
            for p in range(self.P):
                ordering_cost += np.sum(order[h, p, :]) * self.ordering_costs[p, h]

        # Calculate total costs
        total_costs = transport_cost + transshipment_cost + inventory_cost + ordering_cost + shortage_cost

        # Define the reward function: reward is negative of total costs
        reward = -total_costs

        return reward, demand_loss, total_costs

    def _update_demand(self):
        self.state['demand'] = np.random.randint(0, 10, size=(self.H, self.P)).astype(np.float32)

    def _check_done(self):
        return False

# Define parameters
H = 5  # Number of hospitals
P = 3  # Number of products
R = 1  # Number of suppliers
T = 10  # Number of periods
LeadTime = np.array([1, 2, 3])  # Different lead times for each product

# Define costs (for example purposes, using random values)
transport_costs = np.random.rand(R, H, P)
transshipment_costs = np.random.rand(H, H, P)
inventory_costs = np.random.rand(H, P)
ordering_costs = np.random.rand(P, H)

# Define coverage distance and hospital distances
coverage_distance = 5.0
hospital_distances = np.random.rand(H, H) * 10  # Random distances between hospitals

# Create the environment
env = HealthcareNetworkEnv(H, P, R, T, LeadTime, transport_costs, transshipment_costs, inventory_costs, ordering_costs, coverage_distance, hospital_distances)

# Check the environment
check_env(env)




### تغییرات در کلاس `HealthcareNetworkEnv`

#### 1. اضافه شدن ویژگی‌های `orders_in_transit` و `current_time` در متد سازنده (`__init__`)

```python
self.orders_in_transit = []  # Initialize orders in transit as an empty list
self.current_time = 0
```
این دو ویژگی جدید اضافه شده‌اند تا سفارشات در حال انتقال و زمان فعلی را پیگیری کنند. این‌ها کمک می‌کنند که مدل بهتر بتواند سفارشات در حال انتقال را مدیریت کند و وضعیت فعلی زمان را نگه دارد.

#### 2. تغییرات در متد `reset`

```python
self.orders_in_transit = []  # Initialize orders in transit as an empty list
self.current_time = 0
```
این دو خط به متد `reset` اضافه شده‌اند تا اطمینان حاصل شود که با هر بار بازنشانی محیط، سفارشات در حال انتقال پاک شده و زمان فعلی به صفر بازگردانده شود.

#### 3. تغییرات در متد `step`

```python
self.current_time += 1
```
این خط به متد `step` اضافه شده است تا زمان فعلی در هر مرحله افزایش یابد، که به مدل اجازه می‌دهد تا زمان را در طول اپیزود پیگیری کند.

#### 4. تغییرات در متد `_update_inventory`

**پردازش سفارشات دریافتی این دوره:**

```python
new_orders_in_transit = []
for order_info in self.orders_in_transit:
    arrival_time, h, p, quantity = order_info
    if self.current_time >= arrival_time:
        self.state['inventory'][h, p] += quantity
    else:
        new_orders_in_transit.append(order_info)
self.orders_in_transit = new_orders_in_transit
```
این بخش جدید اضافه شده است تا سفارشات دریافتی در این دوره پردازش شوند. اگر زمان فعلی برابر یا بیشتر از زمان رسیدن سفارش باشد، مقدار آن به موجودی افزوده می‌شود، و در غیر این صورت، سفارش همچنان در لیست سفارشات در حال انتقال باقی می‌ماند.

**پردازش سفارشات جدید:**

```python
for h in range(self.H):
    for p in range(self.P):
        for r in range(self.R):
            supply_received = order[h, p, r]
            capacity_available = self.state['supply_capacity'][h, p]
            supply_received = min(supply_received, capacity_available)
            arrival_time = self.current_time + self.LeadTime[p]
            self.orders_in_transit.append((arrival_time, h, p, supply_received))
```
این بخش جدید اضافه شده است تا سفارشات جدید پردازش شوند. برای هر بیمارستان، محصول و تأمین‌کننده، مقدار سفارش پردازش می‌شود و با ظرفیت موجود مقایسه می‌شود. سپس، زمان رسیدن سفارش محاسبه شده و سفارش به لیست سفارشات در حال انتقال اضافه می‌شود.



