# Single period inventory environments

> Static inventory environment where a decision only affects the next period (Newsvendor problem)

In [None]:
#| default_exp envs.inventory

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from typing import Union, Tuple

from ddopnew.envs.base import BaseEnvironment
from ddopnew.utils import Parameter, MDPInfo
from ddopnew.dataloaders.base import BaseDataLoader
from ddopnew.loss_functions import pinball_loss

import gymnasium as gym

import numpy as np
import time

In [None]:
#| export
class BaseInventoryEnv(BaseEnvironment):
    """
    Base class for inventory management environments. This class inherits from BaseEnvironment.
    
    """
    def __init__(self, 
        mdp_info: MDPInfo #
        ) -> None:

        super().__init__(mdp_info)
    
    def set_observation_space(self,
                            shape: tuple, # shape of the dataloader features
                            low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space
                            high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space
                            samples_dim_included = True # whether the first dimension of the shape input is the number of samples
                            ) -> None:
        '''
        Set the observation space of the environment.
        This is a standard function for simple observation spaces. For more complex observation spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is n_samples that is not relevant for the observation space.

        '''

        # To handle cases when no external information is available (e.g., parametric NV)
        
        if shape is None:
            self.observation_space = None

        else:
            if not isinstance(shape, tuple):
                raise ValueError("Shape must be a tuple.")
            
            if samples_dim_included:
                shape = shape[1:] # assumed that the first dimension is the number of samples

            self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)

    def set_action_space(self,
                            shape: tuple, # shape of the dataloader target
                            low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space
                            high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space
                            samples_dim_included = True # whether the first dimension of the shape input is the number of samples
                            ) -> None:
        '''
        Set the action space of the environment.
        This is a standard function for simple action spaces. For more complex action spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is n_samples that is not relevant for the action space.

        '''

        if not isinstance(shape, tuple):
            raise ValueError("Shape must be a tuple.")
        
        if samples_dim_included:
            shape = shape[1:] # assumed that the first dimension is the number of samples

        self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)
    
    def get_observation(self):
        
        """
        Return the current observation. This function is for the simple case where the observation
        is only an x,y pair. For more complex observations, this function should be overwritten.

        """
        
        X_item, Y_item = self.dataloader[self.index]
        return X_item, Y_item

In [None]:
show_doc(BaseInventoryEnv, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventorya.py#L16){target="_blank" style="float:right; font-size:smaller"}

## BaseInventoryEnv

>      BaseInventoryEnv (mdp_info:ddopnew.utils.MDPInfo)

*Base class for inventory management environments. This class inherits from BaseEnvironment.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| mdp_info | MDPInfo |  |
| **Returns** | **None** |  |

In [None]:
show_doc(BaseInventoryEnv.set_observation_space)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventorya.py#L30){target="_blank" style="float:right; font-size:smaller"}

### BaseInventoryEnv.set_observation_space

>      BaseInventoryEnv.set_observation_space (shape:tuple,
>                                              low:Union[numpy.ndarray,float]=-
>                                              inf, high:Union[numpy.ndarray,flo
>                                              at]=inf,
>                                              samples_dim_included=True)

*Set the observation space of the environment.
This is a standard function for simple observation spaces. For more complex observation spaces,
this function should be overwritten. Note that it is assumped that the first dimension
is n_samples that is not relevant for the observation space.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| shape | tuple |  | shape of the dataloader features |
| low | Union | -inf | lower bound of the observation space |
| high | Union | inf | upper bound of the observation space |
| samples_dim_included | bool | True | whether the first dimension of the shape input is the number of samples |
| **Returns** | **None** |  |  |

In [None]:
show_doc(BaseInventoryEnv.set_action_space)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventorya.py#L48){target="_blank" style="float:right; font-size:smaller"}

### BaseInventoryEnv.set_action_space

>      BaseInventoryEnv.set_action_space (shape:tuple,
>                                         low:Union[numpy.ndarray,float]=-inf,
>                                         high:Union[numpy.ndarray,float]=inf,
>                                         samples_dim_included=True)

*Set the action space of the environment.
This is a standard function for simple action spaces. For more complex action spaces,
this function should be overwritten. Note that it is assumped that the first dimension
is n_samples that is not relevant for the action space.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| shape | tuple |  | shape of the dataloader target |
| low | Union | -inf | lower bound of the observation space |
| high | Union | inf | upper bound of the observation space |
| samples_dim_included | bool | True | whether the first dimension of the shape input is the number of samples |
| **Returns** | **None** |  |  |

In [None]:
show_doc(BaseInventoryEnv.get_observation)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventorya.py#L63){target="_blank" style="float:right; font-size:smaller"}

### BaseInventoryEnv.get_observation

>      BaseInventoryEnv.get_observation ()

*Return the current observation. This function is for the simple case where the observation
is only an x,y pair. For more complex observations, this function should be overwritten.*

In [None]:
#| export
class NewsvendorEnv(BaseInventoryEnv, ABC):
    
    """
    Class implementing the Newsvendor problem, working for the single- and multi-item case. If underage_cost and overage_cost
    are scalars and there are multiple SKUs, then the same cost is used for all SKUs. If underage_cost and overage_cost are arrays,
    then they must have the same length as the number of SKUs. Num_SKUs can be set as parameter or inferred from the DataLoader.
    """

    def __init__(self,
        underage_cost: Union[np.ndarray, Parameter, int, float] = 1, # underage cost per unit
        overage_cost: Union[np.ndarray, Parameter, int, float] = 1, # overage cost per unit
        q_bound_low: Union[np.ndarray, Parameter, int, float] = 0, # lower bound of the order quantity
        q_bound_high: Union[np.ndarray, Parameter, int, float] = np.inf, # upper bound of the order quantity
        dataloader: BaseDataLoader = None, # dataloader
        num_SKUs: Union[int] = None, # if None it will be inferred from the DataLoader
        gamma: float = 1, # discount factor
        horizon_train: Union[str, int] = 100, # if "use_all_data" then horizon is inferred from the DataLoader
    ) -> None:

        self.horizon_train = horizon_train
        
        self.dataloader = dataloader

        num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs
        if not isinstance(num_SKUs, int):
            raise ValueError("num_SKUs must be an integer.")
        
        self.set_param("num_SKUs", num_SKUs, new=True)
        
        self.set_param("underage_cost", underage_cost, shape=(num_SKUs,), new=True)
        self.set_param("overage_cost", overage_cost, shape=(num_SKUs,), new=True)
        
        self.set_param("q_bound_low", q_bound_low, shape=(num_SKUs,), new=True)
        self.set_param("q_bound_high", q_bound_high, shape=(num_SKUs,), new=True)
        
        self.set_observation_space(dataloader.X_shape)
        self.set_action_space(dataloader.Y_shape)

        self.print=False
        
        super().__init__(mdp_info=MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train))

    def step(self, 
            action: np.ndarray # order quantity
            ) -> Tuple[np.ndarray, float, bool, bool, dict]:

        """
        Step function implementing the Newsvendor logic. Note that the dataloader will return an observation and a demad,
        which will be relevant in the next period. The observation will be returned directly, while the demand will be 
        temporarily stored under self.demand and used in the next step.

        """

        # Most agent give by default a batch dimension which is not needed for a single period action.
        # If action shape size is 2 and the first dimensiion is 1, then remove it
        if action.ndim == 2 and action.shape[0] == 1:
            action = np.squeeze(action, axis=0)  # Remove the first dimension

        if self.print:
            print(action)
        
        # Calculate cost
        cost_per_SKU = pinball_loss(self.demand, action, self.underage_cost, self.overage_cost)
        reward = -np.sum(cost_per_SKU) # negative because we want to minimize the cost

        terminated = False # in this problem there is no termination condition
        
        info = dict(
            demand=self.demand.copy(),
            action=action.copy(),
            cost_per_SKU=cost_per_SKU.copy()
        )

        # Set index will set the index and return True if the index is out of bounds
        truncated = self.set_index()

        if truncated:
            # No next observation when the episode terminates.
            return None, reward, terminated, truncated, info
        else:
            observation, self.demand = self.get_observation()

            if self.print:
                print("##################")
                print("observation:", observation)
                print("next demand:", self.demand)
                time.sleep(3)

            return observation, reward, terminated, truncated, info

    def reset(self,
        start_index: int | str = None # index to start from
        ) -> Tuple[np.ndarray, bool]:

        """
        Reset function for the Newsvendor problem. It will return the first observation and demand.
        For val and test modes, it will by default reset to 0, while for the train mode it depends
        on the paramter "horizon_train" whether a random point in the training data is selected or 0
        """

        if start_index is None:
            if self._mode == "train":
                if self.horizon_train == "use_all_data":
                    start_index = 0
                elif hasattr(self.dataloader, "is_distribution") and self.dataloader.is_distribution:
                    start_index = 0
                else:
                    start_index = "random"
            elif self._mode == "val":
                start_index = 0
            elif self._mode == "test":
                start_index = 0
            else:
                raise ValueError("Mode not recognized.")

        truncated = self.reset_index(start_index)

        observation, self.demand = self.get_observation()
        
        return observation, truncated

In [None]:
show_doc(NewsvendorEnv, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventory.py#L93){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorEnv

>      NewsvendorEnv
>                     (underage_cost:Union[numpy.ndarray,ddopnew.utils.Parameter
>                     ,int,float]=1, overage_cost:Union[numpy.ndarray,ddopnew.ut
>                     ils.Parameter,int,float]=1, q_bound_low:Union[numpy.ndarra
>                     y,ddopnew.utils.Parameter,int,float]=0, q_bound_high:Union
>                     [numpy.ndarray,ddopnew.utils.Parameter,int,float]=inf,
>                     dataloader:ddopnew.dataloaders.base.BaseDataLoader=None,
>                     num_SKUs:int=None, gamma:float=1,
>                     horizon_train:Union[str,int]=100)

*Class implementing the Newsvendor problem, working for the single- and multi-item case. If underage_cost and overage_cost
are scalars and there are multiple SKUs, then the same cost is used for all SKUs. If underage_cost and overage_cost are arrays,
then they must have the same length as the number of SKUs. Num_SKUs can be set as parameter or inferred from the DataLoader.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| underage_cost | Union | 1 | underage cost per unit |
| overage_cost | Union | 1 | overage cost per unit |
| q_bound_low | Union | 0 | lower bound of the order quantity |
| q_bound_high | Union | inf | upper bound of the order quantity |
| dataloader | BaseDataLoader | None | dataloader |
| num_SKUs | int | None | if None it will be inferred from the DataLoader |
| gamma | float | 1 | discount factor |
| horizon_train | Union | 100 | if "use_all_data" then horizon is inferred from the DataLoader |
| **Returns** | **None** |  |  |

In [None]:
show_doc(NewsvendorEnv.step)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventory.py#L133){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorEnv.step

>      NewsvendorEnv.step (action:numpy.ndarray)

*Step function implementing the Newsvendor logic. Note that the dataloader will return an observation and a demad,
which will be relevant in the next period. The observation will be returned directly, while the demand will be 
temporarily stored under self.demand and used in the next step.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| action | ndarray | order quantity |
| **Returns** | **Tuple** |  |

In [None]:
show_doc(NewsvendorEnv.reset)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventory.py#L169){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorEnv.reset

>      NewsvendorEnv.reset (start_index:int|str=None)

*Reset function for the Newsvendor problem. It will return the first observation and demand.
For val and test modes, it will by default reset to 0, while for the train mode it depends
on the paramter "horizon_train" whether a random point in the training data is selected or 0*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| start_index | int \| str | None | index to start from |
| **Returns** | **Tuple** |  |  |

Example usage of ```NewsvendorEnv``` with a distributional dataloader:

In [None]:
from ddopnew.dataloaders.distribution import NormalDistributionDataLoader

def run_test_loop(env):
    truncated = False
    while not truncated:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        print("##### STEP: ", env.index, "#####")
        print("reward:", reward)
        print("info:", info)
        print("next observation:", obs)
        print("truncated:", truncated)

dataloader = NormalDistributionDataLoader(mean=[4, 3], std=[1, 2], num_units=2)

test_env = NewsvendorEnv(underage_cost=1, overage_cost=2, dataloader=dataloader, horizon_train=3)

obs, truncated = test_env.reset(start_index=0)
print("##### RESET #####")

run_test_loop(test_env)

##### RESET #####
##### STEP:  1 #####
reward: -9.683512342207433
info: {'demand': array([2.96523275, 5.50928777]), 'action': array([-0.565432 , -0.6435598], dtype=float32), 'cost_per_SKU': array([3.53066476, 6.15284758])}
next observation: None
truncated: False
##### STEP:  2 #####
reward: -6.27484201285771
info: {'demand': array([4.19514687, 5.27463118]), 'action': array([1.4952474, 1.6996887], dtype=float32), 'cost_per_SKU': array([2.6998995 , 3.57494251])}
next observation: None
truncated: False
##### STEP:  3 #####
reward: -5.131366097837095
info: {'demand': array([4.95714161, 0.94625252]), 'action': array([ 1.6736219 , -0.90159386], dtype=float32), 'cost_per_SKU': array([3.28351971, 1.84784639])}
next observation: None
truncated: True


Example usage of ```NewsvendorEnv``` using a fixed dataset:

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

from ddopnew.dataloaders.tabular import XYDataLoader

# create a simple dataset bounded between 0 and 1.
# We just scale all the data, pretending that it is the demand.
# When using real data, one should only fit the scaler on the training data
X, Y = make_regression(n_samples=8, n_features=2, n_targets=2, noise=0.1, random_state=42)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
Y = scaler.fit_transform(Y)

dataloader = XYDataLoader(X, Y, val_index_start = 4, test_index_start = 6)
test_env = NewsvendorEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon_train="use_all_data")

obs, truncated= test_env.reset(start_index=0)
print("#################### RESET ####################")

print("#################### RUN IN TRAIN MODE ####################")
run_test_loop(test_env)

print("#################### RUN IN VAL MODE ####################")
test_env.val()
run_test_loop(test_env)

print("#################### RUN IN TEST MODE ####################")
test_env.test()
run_test_loop(test_env)

print("#################### RUN IN TRAIN MODE AGAIN ####################")
test_env.train()
run_test_loop(test_env)


#################### RESET ####################
#################### RUN IN TRAIN MODE ####################
##### STEP:  1 #####
reward: -0.3755964083686728
info: {'demand': array([0.41801109, 0.41814421]), 'action': array([1.0367227, 0.3519036], dtype=float32), 'cost_per_SKU': array([0.30935579, 0.06624062])}
next observation: [0.51654708 0.67238019]
truncated: False
##### STEP:  2 #####
reward: -0.9298401384556239
info: {'demand': array([0.61617324, 0.52211535]), 'action': array([-0.15317433,  0.36162278], dtype=float32), 'cost_per_SKU': array([0.76934757, 0.16049257])}
next observation: [0.71467365 0.37996181]
truncated: False
##### STEP:  3 #####
reward: -1.295123083725155
info: {'demand': array([0.45242345, 0.60924132]), 'action': array([ 1.6391478 , -0.09251961], dtype=float32), 'cost_per_SKU': array([0.59336215, 0.70176093])}
next observation: [0.78011439 1.        ]
truncated: False
##### STEP:  4 #####
reward: -3.6015985915564586
info: {'demand': array([1.        , 0.88968748]

In [None]:
# #| export
# class NewsvendorVariableSLEnv(NewsvendorEnv, ABC):
#     """

#     """
#     def __init__(self,
#         underage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         overage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         q_bound_low: Union[np.ndarray, Parameter] = np.array([0]),
#         q_bound_high: Union[np.ndarray, Parameter] = np.array([np.inf]),
#         dataloader: BaseDataLoader = None,
#         gamma: float = 1,
#         horizon: int = 100,

#         low_sl: np.ndarray = np.array([0.1]),
#         high_sl: np.ndarray = np.array([0.9]),

#     ) -> None:
    
#         super().__init__( 
#             underage_cost=underage_cost,
#             overage_cost=overage_cost,
#             q_bound_low=q_bound_low,
#             q_bound_high=q_bound_high,
#             dataloader=dataloader,
#             gamma=gamma,
#             horizon=horizon,
#         )

#         self.low_sl = set_env_parameter(low_sl, self.num_SKUs)
#         self.high_sl = set_env_parameter(high_sl, self.num_SKUs)
    
#     def set_observation_space(self,
#                             shape: tuple,
#                             low: Union[np.ndarray, float] = -np.inf,
#                             high: Union[np.ndarray, float] = np.inf) -> None:
        
#         '''
#         Set the observation space of the environment.
#         '''

#         ### THIS MAKES NO SENSE:

#         # if shape is not None:
#         #     if not isinstance(shape, tuple):
#         #         raise ValueError("Shape must be a tuple.")
            
#         #     shape = shape[1:]
        
#         #     self.observation_space = gym.spaces.Dict({
#         #         'X': gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32),
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })
#         # else:
#         #     self.observation_space = gym.spaces.Dict({
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })

#     def get_observation(self):
#         """
#         Return the current observation.
#         """
        
#         X_item, Y_item = self.dataloader[self.index]

#         underage_cost, overage_cost, sl = self.draw_service_level()

#         self.underage_cost.set_value(underage_cost, (self.num_SKUs,))
#         self.overage_cost.set_value(overage_cost, (self.num_SKUs,))

#         if X_item is not None:
#             obs = {'X': X_item, 'sl': sl}
#         else:
#             obs = {'sl': sl}

#         return obs, Y_item
    
#     def draw_service_level(self):
        
#         sl = np.random.uniform(self.low_sl, self.high_sl, self.num_SKUs)

#         overage_cost = np.ones_like(sl)
#         underage_cost = np.ones_like(sl)

#         # # Calculate underage_cost where sl >= 0.5
#         underage_cost = np.where(sl < 0.5, sl / (1 - sl), underage_cost)
        
#         # Calculate overage_cost where sl < 0.5
#         overage_cost = np.where(sl >= 0.5, 1 / sl -1, overage_cost)
        
#         return underage_cost, overage_cost, sl

In [None]:
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=3)

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

#### Simple Example with synthetic data:

In [None]:
# from sklearn.datasets import make_regression
# from sklearn.preprocessing import MinMaxScaler

# from ddopnew.dataloaders.tabular import XYDataLoader

# # create a simple dataset bounded between 0 and 1
# X, Y = make_regression(n_samples=100, n_features=2, n_targets=2, noise=0.1)
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)
# Y = scaler.fit_transform(Y)

# dataloader = XYDataLoader(X, Y)
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=len(dataloader))

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()