# Inventory Management Environments

> To be written.

In [None]:
#| default_exp envs.inventory

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from typing import Union


from ddopnew.envs.base import BaseEnvironment
from ddopnew.utils import Parameter, set_env_parameter, MDPInfo
from ddopnew.dataloaders.base import BaseDataLoader
from ddopnew.loss_functions import pinball_loss

import gymnasium as gym

import numpy as np

In [None]:
#| export
class BaseInventoryEnv(BaseEnvironment):
    def __init__(self, mdp_info: MDPInfo) -> None:
        """
        Constructor.

        Args:
            mdp_info (MDPInfo): the information of the environment;
            kwargs (dict): the keyword arguments to set the environment.

        # TODO check if this class is necessary
            
        """
        super().__init__(mdp_info)
    
    def set_observation_space(self, shape: tuple, low: Union[np.ndarray, float] = -np.inf, high: Union[np.ndarray, float] = np.inf) -> None:
        '''
        Set the observation space of the environment.
        This is a standard function for simple observation spaces. For more complex observation spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is the n_samples that is not relevant for the observation space.

        '''
        if shape is not None:
            if not isinstance(shape, tuple):
                raise ValueError("Shape must be a tuple.")
            
            shape = shape[1:] # assumed that the first dimension is the number of samples

            self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)
        else:
            self.observation_space = None

    def set_action_space(self, shape: tuple, low: Union[np.ndarray, float] = 0, high: Union[np.ndarray, float] = np.inf) -> None:
        '''
        Set the action space of the environment.
        This is a standard function for simple action spaces. For more complex action spaces,
        this function should be overwritten. Note that it is assumped that the first dimension
        is the n_samples that is not relevant for the action space.

        '''
        if not isinstance(shape, tuple):
            raise ValueError("Shape must be a tuple.")
        
        shape = shape[1:] # assumed that the first dimension is the number of samples

        self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)
    
    def get_observation(self):
        """
        Return the current observation. This function is for the simple case where the observation
        is only an x,y pair. For more complex observations, this function should be overwritten.

        """
        X_item, Y_item = self.dataloader[self.index]
        return X_item, Y_item

In [None]:
#| export
class NewsvendorEnv(BaseInventoryEnv, ABC):
    """
    
    """
    def __init__(self,
        underage_cost: Union[np.ndarray, Parameter] = np.array([1]),
        overage_cost: Union[np.ndarray, Parameter] = np.array([1]),
        q_bound_low: Union[np.ndarray, Parameter] = np.array([0]),
        q_bound_high: Union[np.ndarray, Parameter] = np.array([np.inf]),
        dataloader: BaseDataLoader = None,
        gamma: float = 1,
        horizon: int = 100,
    ) -> None:
    
        self.dataloader = dataloader
        self.num_SKUs = dataloader.num_SKUs

        self.underage_cost = set_env_parameter(underage_cost, self.num_SKUs)
        self.overage_cost = set_env_parameter(overage_cost, self.num_SKUs)

        q_bound_low = set_env_parameter(q_bound_low, self.num_SKUs)
        q_bound_high = set_env_parameter(q_bound_high, self.num_SKUs)

        self.set_observation_space(dataloader.X_shape)
        self.set_action_space(dataloader.Y_shape)

        super().__init__(mdp_info=MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon))
        
    def step(self, action):
        
        cost_per_SKU = -pinball_loss(self.demand, action, self.underage_cost, self.overage_cost)
        reward = -np.sum(cost_per_SKU)

        terminated = False # in this problem there is no termination condition
        
        info = dict(
            demand=self.demand.copy(),
            action=action.copy(),
            cost_per_SKU=cost_per_SKU.copy()
        )

        observation, self.demand = self.get_observation()
        
        truncated = self.handle_index()

        return observation, reward, terminated, truncated, info

    def reset(self,
        start_index: Union[int,str] = 0):
 
        if start_index=="random":
            self.index = np.random.randint(0, len(self.dataloader))
        elif isinstance(start_index, int):
            self.index=start_index

        observation, self.demand = self.get_observation()

        return observation

In [None]:
from ddopnew.dataloaders.distribution import NormalDistributionDataloader

dataloader = NormalDistributionDataloader(mean=[4, 3], std=[1, 2], num_SKUs=2)

test_env = NewsvendorEnv(underage_cost=1, overage_cost=2, dataloader=dataloader, horizon=3)

obs = test_env.reset(start_index=0)
print("##### RESET #####")

truncated = False
while not truncated:
    action = test_env.action_space.sample()
    obs, reward, terminated, truncated, info = test_env.step(action)
    print("##### STEP: ", test_env.index, "#####")
    print("reward:", reward)
    print("info:", info)
    print("truncated:", truncated)

##### RESET #####
##### STEP:  1 #####
reward: 5.542450213935796
info: {'demand': array([4.47795297, 2.15738063]), 'action': array([0.80758834, 0.28529504], dtype=float32), 'cost_per_SKU': array([-3.67036463, -1.87208559])}
truncated: False
##### STEP:  2 #####
reward: 4.054803721240395
info: {'demand': array([2.90442584, 4.69303633]), 'action': array([1.2076548, 2.3350036], dtype=float32), 'cost_per_SKU': array([-1.69677101, -2.35803271])}
truncated: False
##### STEP:  3 #####
reward: 9.203596623984481
info: {'demand': array([3.90127942, 1.44856021]), 'action': array([3.770002 , 5.9847198], dtype=float32), 'cost_per_SKU': array([-0.13127753, -9.07231909])}
truncated: True


In [None]:
#| export
class NewsvendorVariableSLEnv(NewsvendorEnv, ABC):
    """

    """
    def __init__(self,
        underage_cost: Union[np.ndarray, Parameter] = np.array([1]),
        overage_cost: Union[np.ndarray, Parameter] = np.array([1]),
        q_bound_low: Union[np.ndarray, Parameter] = np.array([0]),
        q_bound_high: Union[np.ndarray, Parameter] = np.array([np.inf]),
        dataloader: BaseDataLoader = None,
        gamma: float = 1,
        horizon: int = 100,

        low_sl: np.ndarray = np.array([0.1]),
        high_sl: np.ndarray = np.array([0.9]),

    ) -> None:
    
        super().__init__( 
            underage_cost=underage_cost,
            overage_cost=overage_cost,
            q_bound_low=q_bound_low,
            q_bound_high=q_bound_high,
            dataloader=dataloader,
            gamma=gamma,
            horizon=horizon,
        )

        self.low_sl = set_env_parameter(low_sl, self.num_SKUs)
        self.high_sl = set_env_parameter(high_sl, self.num_SKUs)
    
    def set_observation_space(self, shape: tuple, low: Union[np.ndarray, float] = -np.inf, high: Union[np.ndarray, float] = np.inf) -> None:
        '''
        Set the observation space of the environment.
        '''
        if shape is not None:
            if not isinstance(shape, tuple):
                raise ValueError("Shape must be a tuple.")
            
            shape = shape[1:]
        
            self.observation_space = gym.spaces.Dict({
                'X': gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32),
                'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
            })
        else:
            self.observation_space = gym.spaces.Dict({
                'sl': gym.spaces.Box(low=0, high=1, shape=(self.num_SKUs,), dtype=np.float32)
            })

    def get_observation(self):
        """
        Return the current observation.
        """
        
        X_item, Y_item = self.dataloader[self.index]

        underage_cost, overage_cost, sl = self.draw_service_level()

        self.underage_cost.set_value(underage_cost, (self.num_SKUs,))
        self.overage_cost.set_value(overage_cost, (self.num_SKUs,))

        if X_item is not None:
            obs = {'X': X_item, 'sl': sl}
        else:
            obs = {'sl': sl}

        return obs, Y_item
    
    def draw_service_level(self):
        
        sl = np.random.uniform(self.low_sl, self.high_sl, self.num_SKUs)

        overage_cost = np.ones_like(sl)
        underage_cost = np.ones_like(sl)

        # # Calculate underage_cost where sl >= 0.5
        underage_cost = np.where(sl < 0.5, sl / (1 - sl), underage_cost)
        
        # Calculate overage_cost where sl < 0.5
        overage_cost = np.where(sl >= 0.5, 1 / sl -1, overage_cost)
        
        return underage_cost, overage_cost, sl

In [None]:
test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=3)

print(test_env.observation_space)
print(test_env.observation_space.sample())

obs = test_env.reset(start_index=0)
print("##### RESET #####")
print("obs:", obs)

truncated = False
while not truncated:
    action = test_env.action_space.sample()
    obs, reward, terminated, truncated, info = test_env.step(action)
    print("##### STEP: ", test_env.index, "#####")
    print("reward:", reward)
    print("info:", info)
    print("obs:", obs)
    print("truncated:", truncated)

Dict('sl': Box(0.0, 1.0, (2,), float32))
OrderedDict([('sl', array([0.05374725, 0.5697927 ], dtype=float32))])
##### RESET #####
obs: {'sl': array([0.73450979, 0.6189658 ])}
##### STEP:  1 #####
reward: 5.329709523595074
info: {'demand': array([4.12916234, 3.10559496]), 'action': array([1.4849977 , 0.42005002], dtype=float32), 'cost_per_SKU': array([-2.64416459, -2.68554494])}
obs: {'sl': array([0.14588848, 0.64665467])}
truncated: False
##### STEP:  2 #####
reward: 2.937032206548674
info: {'demand': array([5.10964613, 2.19902566]), 'action': array([0.43145132, 0.06106325], dtype=float32), 'cost_per_SKU': array([-0.79906979, -2.13796241])}
obs: {'sl': array([0.12577141, 0.69816345])}
truncated: False
##### STEP:  3 #####
reward: 1.6705307944992105
info: {'demand': array([2.29232759, 1.45771627]), 'action': array([0.0148089, 4.5638533], dtype=float32), 'cost_per_SKU': array([-0.32765658, -1.34287421])}
obs: {'sl': array([0.2791242 , 0.45465606])}
truncated: True


#### Simple Example with synthetic data:

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

from ddopnew.dataloaders.tabular import XYDataloader

# create a simple dataset bounded between 0 and 1
X, Y = make_regression(n_samples=100, n_features=2, n_targets=2, noise=0.1)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
Y = scaler.fit_transform(Y)

dataloader = XYDataloader(X, Y)
test_env = NewsvendorVariableSLEnv(underage_cost=Parameter(np.array([1,1]), shape = (2,)), overage_cost=Parameter(np.array([0.5,0.5]), shape = (2,)), dataloader=dataloader, horizon=len(dataloader))

print(test_env.observation_space)
print(test_env.observation_space.sample())

obs = test_env.reset(start_index=0)
print("##### RESET #####")
print("obs:", obs)

truncated = False
while not truncated:
    action = test_env.action_space.sample()
    obs, reward, terminated, truncated, info = test_env.step(action)
    print("##### STEP: ", test_env.index, "#####")
    print("reward:", reward)
    print("info:", info)
    print("obs:", obs)
    print("truncated:", truncated)

Dict('X': Box(-inf, inf, (2,), float32), 'sl': Box(0.0, 1.0, (2,), float32))
OrderedDict([('X', array([-1.2803899, -1.444424 ], dtype=float32)), ('sl', array([0.6157879 , 0.55508596], dtype=float32))])
##### RESET #####
obs: {'X': array([0.45345553, 0.0930219 ]), 'sl': array([0.26396155, 0.56539625])}
##### STEP:  1 #####
reward: 2.128186244197629
info: {'demand': array([0.08596278, 0.37222611]), 'action': array([2.1789317, 0.3370088], dtype=float32), 'cost_per_SKU': array([-2.09296893, -0.03521731])}
obs: {'X': array([0.45345553, 0.0930219 ]), 'sl': array([0.73776043, 0.57318857])}
truncated: False
##### STEP:  2 #####
reward: 0.1061390085086635
info: {'demand': array([0.08596278, 0.37222611]), 'action': array([0.29799017, 0.41355303], dtype=float32), 'cost_per_SKU': array([-0.07536589, -0.03077312])}
obs: {'X': array([0.70368513, 0.46897918]), 'sl': array([0.81293078, 0.19752028])}
truncated: False
##### STEP:  3 #####
reward: 0.5407211289533198
info: {'demand': array([0.50421863, 0.

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()