# Single period inventory environments

> To be written.

In [None]:
#| default_exp envs.inventory

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from typing import Union

from ddopnew.envs.base import BaseEnvironment
from ddopnew.utils import Parameter, MDPInfo
from ddopnew.dataloaders.base import BaseDataLoader
from ddopnew.loss_functions import pinball_loss

import gymnasium as gym

import numpy as np
import time

In [None]:
#| export
class BaseInventoryEnv(BaseEnvironment):
    def __init__(self, mdp_info: MDPInfo) -> None:
        """
        Constructor.
        
        Args:
            mdp_info (MDPInfo): the information of the environment;
            kwargs (dict): the keyword arguments to set the environment.
            
        """
        super().__init__(mdp_info)
    
    def set_observation_space(self,
                            shape: tuple,
                            low: Union[np.ndarray, float] = -np.inf,
                            high: Union[np.ndarray, float] = np.inf,
                            samples_dim_included = True) -> None:
        '''
        Set the observation space of the environment.
        This is a standard function for simple observation spaces. For more complex observation spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is the n_samples that is not relevant for the observation space.

        '''

        # To handle cases when no external information is available (e.g., parametric NV)
        
        if shape is None:
            self.observation_space = None

        else:
            if not isinstance(shape, tuple):
                raise ValueError("Shape must be a tuple.")
            
            if samples_dim_included:
                shape = shape[1:] # assumed that the first dimension is the number of samples

            self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)

    def set_action_space(self,
                        shape: tuple,
                        low: Union[np.ndarray, float] = 0,
                        high: Union[np.ndarray, float] = np.inf,
                        samples_dim_included = True) -> None:
        '''
        Set the action space of the environment.
        This is a standard function for simple action spaces. For more complex action spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is the n_samples that is not relevant for the action space.

        '''

        if not isinstance(shape, tuple):
            raise ValueError("Shape must be a tuple.")
        
        if samples_dim_included:
            shape = shape[1:] # assumed that the first dimension is the number of samples

        self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)
    
    def get_observation(self):
        
        """
        Return the current observation. This function is for the simple case where the observation
        is only an x,y pair. For more complex observations, this function should be overwritten.

        """
        
        X_item, Y_item = self.dataloader[self.index]
        return X_item, Y_item

In [None]:
#| export
class NewsvendorEnv(BaseInventoryEnv, ABC):
    
    """
    XXX
    """

    def __init__(self,
        underage_cost: Union[np.ndarray, Parameter, int, float] = 1,
        overage_cost: Union[np.ndarray, Parameter, int, float] = 1,
        q_bound_low: Union[np.ndarray, Parameter, int, float] = 0,
        q_bound_high: Union[np.ndarray, Parameter, int, float] = np.inf,
        dataloader: BaseDataLoader = None,
        num_SKUs: Union[int] = None, # if None it will be inferred from the DataLoader
        gamma: float = 1,
        horizon_train: Union[str, int] = 100, # if "use_all_data" then horizon is inferred from the DataLoader
    ) -> None:

        self.horizon_train = horizon_train
        
        self.dataloader = dataloader

        num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs
        if not isinstance(num_SKUs, int):
            raise ValueError("num_SKUs must be an integer.")
        
        self.set_param("num_SKUs", num_SKUs, new=True)
        
        self.set_param("underage_cost", underage_cost, shape=(num_SKUs,), new=True)
        self.set_param("overage_cost", overage_cost, shape=(num_SKUs,), new=True)
        
        self.set_param("q_bound_low", q_bound_low, shape=(num_SKUs,), new=True)
        self.set_param("q_bound_high", q_bound_high, shape=(num_SKUs,), new=True)
        
        self.set_observation_space(dataloader.X_shape)
        self.set_action_space(dataloader.Y_shape)

        self.print=False
        
        super().__init__(mdp_info=MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train))

    def step(self, action):
        
        if self.print:
            print(action)
        
        cost_per_SKU = pinball_loss(self.demand, action, self.underage_cost, self.overage_cost)
        reward = -np.sum(cost_per_SKU) # negative because we want to minimize the cost

        terminated = False # in this problem there is no termination condition
        
        info = dict(
            demand=self.demand.copy(),
            action=action.copy(),
            cost_per_SKU=cost_per_SKU.copy()
        )

        truncated = self.set_index()

        if truncated:
            return None, reward, terminated, truncated, info
        else:
            observation, self.demand = self.get_observation()

            if self.print:
                print("##################")
                print("observation:", observation)
                print("next demand:", self.demand)
                time.sleep(3)

            return observation, reward, terminated, truncated, info

    def reset(self,
        start_index: Union[int,str] = None):

        if start_index is None:
            if self._mode == "train":
                if self.horizon_train == "use_all_data":
                    start_index = 0
                elif hasattr(self.dataloader, "is_distribution") and self.dataloader.is_distribution:
                    start_index = 0
                else:
                    start_index = "random"
            elif self._mode == "val":
                start_index = 0
            elif self._mode == "test":
                start_index = 0
            else:
                raise ValueError("Mode not recognized.")

        truncated = self.reset_index(start_index)

        observation, self.demand = self.get_observation()
        
        return observation, truncated

In [None]:
from ddopnew.dataloaders.distribution import NormalDistributionDataLoader

def run_test_loop(env):
    truncated = False
    while not truncated:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        print("##### STEP: ", env.index, "#####")
        print("reward:", reward)
        print("info:", info)
        print("next observation:", obs)
        print("truncated:", truncated)

dataloader = NormalDistributionDataLoader(mean=[4, 3], std=[1, 2], num_units=2)

test_env = NewsvendorEnv(underage_cost=1, overage_cost=2, dataloader=dataloader, horizon_train=3)

obs, truncated = test_env.reset(start_index=0)
print("##### RESET #####")

run_test_loop(test_env)

##### RESET #####
##### STEP:  1 #####
reward: -5.458583888728
info: {'demand': array([5.5036361 , 0.30576809]), 'action': array([1.9467258, 1.2566049], dtype=float32), 'cost_per_SKU': array([3.55691026, 1.90167363])}
next observation: None
truncated: False
##### STEP:  2 #####
reward: -5.244157587239242
info: {'demand': array([3.83382667, 1.80498302]), 'action': array([0.19830196, 0.19635014], dtype=float32), 'cost_per_SKU': array([3.63552471, 1.60863287])}
next observation: None
truncated: False
##### STEP:  3 #####
reward: -2.5394090540910588
info: {'demand': array([3.72794562, 1.77236117]), 'action': array([2.9494576 , 0.01144009], dtype=float32), 'cost_per_SKU': array([0.77848798, 1.76092108])}
next observation: None
truncated: True


In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

from ddopnew.dataloaders.tabular import XYDataLoader

# create a simple dataset bounded between 0 and 1.
# We just scale all the data, pretending that it is the demand.
# When using real data, one should only fit the scaler on the training data
X, Y = make_regression(n_samples=8, n_features=2, n_targets=2, noise=0.1, random_state=42)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
Y = scaler.fit_transform(Y)

dataloader = XYDataLoader(X, Y, val_index_start = 4, test_index_start = 6)
test_env = NewsvendorEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon_train="use_all_data")

obs, truncated= test_env.reset(start_index=0)
print("#################### RESET ####################")

print("#################### RUN IN TRAIN MODE ####################")
run_test_loop(test_env)

print("#################### RUN IN VAL MODE ####################")
test_env.val()
run_test_loop(test_env)

print("#################### RUN IN TEST MODE ####################")
test_env.test()
run_test_loop(test_env)

print("#################### RUN IN TRAIN MODE AGAIN ####################")
test_env.train()
run_test_loop(test_env)


#################### RESET ####################
#################### RUN IN TRAIN MODE ####################
##### STEP:  1 #####
reward: -0.3027596736935365
info: {'demand': array([0.41801109, 0.41814421]), 'action': array([0.37043503, 0.92851144], dtype=float32), 'cost_per_SKU': array([0.04757606, 0.25518362])}
next observation: [0.51654708 0.67238019]
truncated: False
##### STEP:  2 #####
reward: -0.29506728090648016
info: {'demand': array([0.61617324, 0.52211535]), 'action': array([0.5369136, 0.9537306], dtype=float32), 'cost_per_SKU': array([0.07925967, 0.21580761])}
next observation: [0.71467365 0.37996181]
truncated: False
##### STEP:  3 #####
reward: -0.8944882258866846
info: {'demand': array([0.45242345, 0.60924132]), 'action': array([0.4851286, 2.3655126], dtype=float32), 'cost_per_SKU': array([0.01635258, 0.87813565])}
next observation: [0.78011439 1.        ]
truncated: False
##### STEP:  4 #####
reward: -1.6336274053646946
info: {'demand': array([1.        , 0.88968748]), '

In [None]:
# #| export
# class NewsvendorVariableSLEnv(NewsvendorEnv, ABC):
#     """

#     """
#     def __init__(self,
#         underage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         overage_cost: Union[np.ndarray, Parameter] = np.array([1]),
#         q_bound_low: Union[np.ndarray, Parameter] = np.array([0]),
#         q_bound_high: Union[np.ndarray, Parameter] = np.array([np.inf]),
#         dataloader: BaseDataLoader = None,
#         gamma: float = 1,
#         horizon: int = 100,

#         low_sl: np.ndarray = np.array([0.1]),
#         high_sl: np.ndarray = np.array([0.9]),

#     ) -> None:
    
#         super().__init__( 
#             underage_cost=underage_cost,
#             overage_cost=overage_cost,
#             q_bound_low=q_bound_low,
#             q_bound_high=q_bound_high,
#             dataloader=dataloader,
#             gamma=gamma,
#             horizon=horizon,
#         )

#         self.low_sl = set_env_parameter(low_sl, self.num_SKUs)
#         self.high_sl = set_env_parameter(high_sl, self.num_SKUs)
    
#     def set_observation_space(self,
#                             shape: tuple,
#                             low: Union[np.ndarray, float] = -np.inf,
#                             high: Union[np.ndarray, float] = np.inf) -> None:
        
#         '''
#         Set the observation space of the environment.
#         '''

#         ### THIS MAKES NO SENSE:

#         # if shape is not None:
#         #     if not isinstance(shape, tuple):
#         #         raise ValueError("Shape must be a tuple.")
            
#         #     shape = shape[1:]
        
#         #     self.observation_space = gym.spaces.Dict({
#         #         'X': gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32),
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })
#         # else:
#         #     self.observation_space = gym.spaces.Dict({
#         #         'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
#         #     })

#     def get_observation(self):
#         """
#         Return the current observation.
#         """
        
#         X_item, Y_item = self.dataloader[self.index]

#         underage_cost, overage_cost, sl = self.draw_service_level()

#         self.underage_cost.set_value(underage_cost, (self.num_SKUs,))
#         self.overage_cost.set_value(overage_cost, (self.num_SKUs,))

#         if X_item is not None:
#             obs = {'X': X_item, 'sl': sl}
#         else:
#             obs = {'sl': sl}

#         return obs, Y_item
    
#     def draw_service_level(self):
        
#         sl = np.random.uniform(self.low_sl, self.high_sl, self.num_SKUs)

#         overage_cost = np.ones_like(sl)
#         underage_cost = np.ones_like(sl)

#         # # Calculate underage_cost where sl >= 0.5
#         underage_cost = np.where(sl < 0.5, sl / (1 - sl), underage_cost)
        
#         # Calculate overage_cost where sl < 0.5
#         overage_cost = np.where(sl >= 0.5, 1 / sl -1, overage_cost)
        
#         return underage_cost, overage_cost, sl

In [None]:
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=3)

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

#### Simple Example with synthetic data:

In [None]:
# from sklearn.datasets import make_regression
# from sklearn.preprocessing import MinMaxScaler

# from ddopnew.dataloaders.tabular import XYDataLoader

# # create a simple dataset bounded between 0 and 1
# X, Y = make_regression(n_samples=100, n_features=2, n_targets=2, noise=0.1)
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)
# Y = scaler.fit_transform(Y)

# dataloader = XYDataLoader(X, Y)
# test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=len(dataloader))

# print(test_env.observation_space)
# print(test_env.observation_space.sample())

# obs = test_env.reset(start_index=0)
# print("##### RESET #####")
# print("obs:", obs)

# truncated = False
# while not truncated:
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)
#     print("##### STEP: ", test_env.index, "#####")
#     print("reward:", reward)
#     print("info:", info)
#     print("obs:", obs)
#     print("truncated:", truncated)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()