# Single period inventory environments

> Static inventory environment where a decision only affects the next period (Newsvendor problem)

In [None]:
#| default_exp envs.inventory.single_period

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from typing import Union, Tuple

from ddopnew.envs.base import BaseEnvironment
from ddopnew.utils import Parameter, MDPInfo
from ddopnew.dataloaders.base import BaseDataLoader
from ddopnew.loss_functions import pinball_loss
from ddopnew.envs.inventory.base import BaseInventoryEnv

import gymnasium as gym

import numpy as np
import time

In [None]:
#| export
class NewsvendorEnv(BaseInventoryEnv, ABC):
    
    """
    Class implementing the Newsvendor problem, working for the single- and multi-item case. If underage_cost and overage_cost
    are scalars and there are multiple SKUs, then the same cost is used for all SKUs. If underage_cost and overage_cost are arrays,
    then they must have the same length as the number of SKUs. Num_SKUs can be set as parameter or inferred from the DataLoader.
    """

    def __init__(self,
        underage_cost: Union[np.ndarray, Parameter, int, float] = 1, # underage cost per unit
        overage_cost: Union[np.ndarray, Parameter, int, float] = 1, # overage cost per unit
        q_bound_low: Union[np.ndarray, Parameter, int, float] = 0, # lower bound of the order quantity
        q_bound_high: Union[np.ndarray, Parameter, int, float] = np.inf, # upper bound of the order quantity
        dataloader: BaseDataLoader = None, # dataloader
        num_SKUs: Union[int] = None, # if None it will be inferred from the DataLoader
        gamma: float = 1, # discount factor
        horizon_train: Union[str, int] = 100, # if "use_all_data" then horizon is inferred from the DataLoader
        postprocessors: list[object] | None = None,  # default is empty list
        mode: str = "train", # Initial mode (train, val, test) of the environment
        return_truncation: str = True # whether to return a truncated condition in step function
    ) -> None:

        self.horizon_train = horizon_train
        
        self.dataloader = dataloader

        num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs
        if not isinstance(num_SKUs, int):
            raise ValueError("num_SKUs must be an integer.")
        
        self.set_param("num_SKUs", num_SKUs, new=True)
        
        self.set_param("underage_cost", underage_cost, shape=(num_SKUs,), new=True)
        self.set_param("overage_cost", overage_cost, shape=(num_SKUs,), new=True)
        
        self.set_param("q_bound_low", q_bound_low, shape=(num_SKUs,), new=True)
        self.set_param("q_bound_high", q_bound_high, shape=(num_SKUs,), new=True)
        
        self.set_observation_space(dataloader.X_shape)

        self.set_action_space(dataloader.Y_shape, low = self.q_bound_low, high = self.q_bound_high)

        self.print=False

        mdp_info = MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train)
        
        super().__init__(mdp_info=mdp_info, postprocessors = postprocessors,  mode=mode, return_truncation=return_truncation)

    def step_(self, 
            action: np.ndarray # order quantity
            ) -> Tuple[np.ndarray, float, bool, bool, dict]:

        """
        Step function implementing the Newsvendor logic. Note that the dataloader will return an observation and a demad,
        which will be relevant in the next period. The observation will be returned directly, while the demand will be 
        temporarily stored under self.demand and used in the next step.

        """

        # Most agent give by default a batch dimension which is not needed for a single period action.
        # If action shape size is 2 and the first dimensiion is 1, then remove it
        if action.ndim == 2 and action.shape[0] == 1:
            action = np.squeeze(action, axis=0)  # Remove the first dimension

        cost_per_SKU = pinball_loss(self.demand, action, self.underage_cost, self.overage_cost)
        reward = -np.sum(cost_per_SKU) # negative because we want to minimize the cost

        terminated = False # in this problem there is no termination condition
        
        info = dict(
            demand=self.demand.copy(),
            action=action.copy(),
            cost_per_SKU=cost_per_SKU.copy()
        )

        # Set index will set the index and return True if the index is out of bounds
        truncated = self.set_index()

        if truncated:

            observation = np.zeros_like(self.observation_space.sample()) if self.observation_space is not None else None
            demand = np.zeros_like(self.action_space.sample())

            return observation, reward, terminated, truncated, info
        
        else:

            observation, self.demand = self.get_observation()

            if self.print:
                print("next_period:", self.index+1)
                print("next observation:", observation)
                print("next demand:", self.demand)
                time.sleep(3)

            return observation, reward, terminated, truncated, info

    def reset(self,
        start_index: int | str = None, # index to start from
        state: np.ndarray = None # initial state
        ) -> Tuple[np.ndarray, bool]:

        """
        Reset function for the Newsvendor problem. It will return the first observation and demand.
        For val and test modes, it will by default reset to 0, while for the train mode it depends
        on the paramter "horizon_train" whether a random point in the training data is selected or 0
        """

        if start_index is None:
            if self._mode == "train":
                if self.horizon_train == "use_all_data":
                    start_index = 0
                elif hasattr(self.dataloader, "is_distribution") and self.dataloader.is_distribution:
                    start_index = 0
                else:
                    start_index = "random"
            elif self._mode == "val":
                start_index = 0
            elif self._mode == "test":
                start_index = 0
            else:
                raise ValueError("Mode not recognized.")

        truncated = self.reset_index(start_index)

        observation, self.demand = self.get_observation()
        
        return observation

In [None]:
show_doc(NewsvendorEnv, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventory.py#L101){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorEnv

>      NewsvendorEnv
>                     (underage_cost:Union[numpy.ndarray,ddopnew.utils.Parameter
>                     ,int,float]=1, overage_cost:Union[numpy.ndarray,ddopnew.ut
>                     ils.Parameter,int,float]=1, q_bound_low:Union[numpy.ndarra
>                     y,ddopnew.utils.Parameter,int,float]=0, q_bound_high:Union
>                     [numpy.ndarray,ddopnew.utils.Parameter,int,float]=inf,
>                     dataloader:ddopnew.dataloaders.base.BaseDataLoader=None,
>                     num_SKUs:int=None, gamma:float=1,
>                     horizon_train:Union[str,int]=100,
>                     postprocessors:list[object]|None=None, mode:str='train',
>                     return_truncation:str=True)

*Class implementing the Newsvendor problem, working for the single- and multi-item case. If underage_cost and overage_cost
are scalars and there are multiple SKUs, then the same cost is used for all SKUs. If underage_cost and overage_cost are arrays,
then they must have the same length as the number of SKUs. Num_SKUs can be set as parameter or inferred from the DataLoader.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| underage_cost | Union | 1 | underage cost per unit |
| overage_cost | Union | 1 | overage cost per unit |
| q_bound_low | Union | 0 | lower bound of the order quantity |
| q_bound_high | Union | inf | upper bound of the order quantity |
| dataloader | BaseDataLoader | None | dataloader |
| num_SKUs | int | None | if None it will be inferred from the DataLoader |
| gamma | float | 1 | discount factor |
| horizon_train | Union | 100 | if "use_all_data" then horizon is inferred from the DataLoader |
| postprocessors | list[object] \| None | None | default is empty list |
| mode | str | train | Initial mode (train, val, test) of the environment |
| return_truncation | str | True | whether to return a truncated condition in step function |
| **Returns** | **None** |  |  |

In [None]:
show_doc(NewsvendorEnv.step)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/base.py#LNone){target="_blank" style="float:right; font-size:smaller"}

### BaseEnvironment.step

>      BaseEnvironment.step (action)

*Step function of the environment. Do not overwrite this function. 
Instead, write the step_ function. Note that the postprocessor is applied here.*

In [None]:
show_doc(NewsvendorEnv.reset)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/envs/inventory.py#L218){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorEnv.reset

>      NewsvendorEnv.reset (start_index:int|str=None, state:numpy.ndarray=None)

*Reset function for the Newsvendor problem. It will return the first observation and demand.
For val and test modes, it will by default reset to 0, while for the train mode it depends
on the paramter "horizon_train" whether a random point in the training data is selected or 0*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| start_index | int \| str | None | index to start from |
| state | ndarray | None | initial state |
| **Returns** | **Tuple** |  |  |

Example usage of ```NewsvendorEnv``` with a distributional dataloader:

In [None]:
from ddopnew.dataloaders.distribution import NormalDistributionDataLoader

def run_test_loop(env):
    truncated = False
    while not truncated:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        print("##### STEP: ", env.index, "#####")
        print("reward:", reward)
        print("info:", info)
        print("next observation:", obs)
        print("truncated:", truncated)

dataloader = NormalDistributionDataLoader(mean=[4, 3], std=[1, 2], num_units=2)

test_env = NewsvendorEnv(underage_cost=1, overage_cost=2, dataloader=dataloader, horizon_train=3)

obs = test_env.reset(start_index=0)
print("##### RESET #####")

run_test_loop(test_env)

AttributeError: 'NewsvendorEnv' object has no attribute 'max_index_episode'

Example usage of ```NewsvendorEnv``` using a fixed dataset:

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

from ddopnew.dataloaders.tabular import XYDataLoader

# create a simple dataset bounded between 0 and 1.
# We just scale all the data, pretending that it is the demand.
# When using real data, one should only fit the scaler on the training data
X, Y = make_regression(n_samples=8, n_features=2, n_targets=2, noise=0.1, random_state=42)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
Y = scaler.fit_transform(Y)

dataloader = XYDataLoader(X, Y, val_index_start = 4, test_index_start = 6)
test_env = NewsvendorEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon_train="use_all_data")

obs = test_env.reset(start_index=0)
print("#################### RESET ####################")

print("#################### RUN IN TRAIN MODE ####################")
run_test_loop(test_env)

print("#################### RUN IN VAL MODE ####################")
test_env.val()
run_test_loop(test_env)

print("#################### RUN IN TEST MODE ####################")
test_env.test()
run_test_loop(test_env)

print("#################### RUN IN TRAIN MODE AGAIN ####################")
test_env.train()
run_test_loop(test_env)


#################### RESET ####################
#################### RUN IN TRAIN MODE ####################
##### STEP:  1 #####
reward: -0.6789389739421594
info: {'demand': array([0.41801109, 0.41814421]), 'action': array([0.04, 1.02], dtype=float32), 'cost_per_SKU': array([0.37801109, 0.30092789])}
next observation: [0.51654708 0.67238019]
truncated: False
##### STEP:  2 #####
reward: -0.31011557616595586
info: {'demand': array([0.61617324, 0.52211535]), 'action': array([0.35, 0.61], dtype=float32), 'cost_per_SKU': array([0.26617325, 0.04394233])}
next observation: [0.71467365 0.37996181]
truncated: False
##### STEP:  3 #####
reward: -0.8341675952170908
info: {'demand': array([0.45242345, 0.60924132]), 'action': array([0.67, 2.06], dtype=float32), 'cost_per_SKU': array([0.10878828, 0.72537931])}
next observation: [0.78011439 1.        ]
truncated: False
##### STEP:  4 #####
reward: -1.1401562849186946
info: {'demand': array([1.        , 0.88968748]), 'action': array([0.08, 1.33], dty

In [None]:
# #| export
# class NewsvendorVariableSLEnv(NewsvendorEnv, ABC):
#     """

#     """
#     def __init__(self,
#         underage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         overage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         q_bound_low: Union[np.ndarray, Parameter] = np.array([0]),
#         q_bound_high: Union[np.ndarray, Parameter] = np.array([np.inf]),
#         dataloader: BaseDataLoader = None,
#         gamma: float = 1,
#         horizon: int = 100,

#         low_sl: np.ndarray = np.array([0.1]),
#         high_sl: np.ndarray = np.array([0.9]),

#     ) -> None:
    
#         super().__init__( 
#             underage_cost=underage_cost,
#             overage_cost=overage_cost,
#             q_bound_low=q_bound_low,
#             q_bound_high=q_bound_high,
#             dataloader=dataloader,
#             gamma=gamma,
#             horizon=horizon,
#         )

#         self.low_sl = set_env_parameter(low_sl, self.num_SKUs)
#         self.high_sl = set_env_parameter(high_sl, self.num_SKUs)
    
#     def set_observation_space(self,
#                             shape: tuple,
#                             low: Union[np.ndarray, float] = -np.inf,
#                             high: Union[np.ndarray, float] = np.inf) -> None:
        
#         '''
#         Set the observation space of the environment.
#         '''

#         ### THIS MAKES NO SENSE:

#         # if shape is not None:
#         #     if not isinstance(shape, tuple):
#         #         raise ValueError("Shape must be a tuple.")
            
#         #     shape = shape[1:]
        
#         #     self.observation_space = gym.spaces.Dict({
#         #         'X': gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32),
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })
#         # else:
#         #     self.observation_space = gym.spaces.Dict({
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })

#     def get_observation(self):
#         """
#         Return the current observation.
#         """
        
#         X_item, Y_item = self.dataloader[self.index]

#         underage_cost, overage_cost, sl = self.draw_service_level()

#         self.underage_cost.set_value(underage_cost, (self.num_SKUs,))
#         self.overage_cost.set_value(overage_cost, (self.num_SKUs,))

#         if X_item is not None:
#             obs = {'X': X_item, 'sl': sl}
#         else:
#             obs = {'sl': sl}

#         return obs, Y_item
    
#     def draw_service_level(self):
        
#         sl = np.random.uniform(self.low_sl, self.high_sl, self.num_SKUs)

#         overage_cost = np.ones_like(sl)
#         underage_cost = np.ones_like(sl)

#         # # Calculate underage_cost where sl >= 0.5
#         underage_cost = np.where(sl < 0.5, sl / (1 - sl), underage_cost)
        
#         # Calculate overage_cost where sl < 0.5
#         overage_cost = np.where(sl >= 0.5, 1 / sl -1, overage_cost)
        
#         return underage_cost, overage_cost, sl

In [None]:
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=3)

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

#### Simple Example with synthetic data:

In [None]:
# from sklearn.datasets import make_regression
# from sklearn.preprocessing import MinMaxScaler

# from ddopnew.dataloaders.tabular import XYDataLoader

# # create a simple dataset bounded between 0 and 1
# X, Y = make_regression(n_samples=100, n_features=2, n_targets=2, noise=0.1)
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)
# Y = scaler.fit_transform(Y)

# dataloader = XYDataLoader(X, Y)
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=len(dataloader))

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()