In [2]:
import gym
from gym.spaces import Box
import numpy as np


class InventoryEnvHard(gym.Env):
    def __init__(self):
        """
        Must define self.observation_space and self.action_space here
        """
        self.max_capacity = 4000

        self.action_space = Box(low=np.array([0]), high=np.array([self.max_capacity]))

        self.lead_time = 5
        # ----- Solution ----- #
        # Observation should have one more dimension corresponding to the goodwill penalty ----- #
        self.obs_dim = self.lead_time + 5

        self.max_mean_daily_demand = 200
        self.max_unit_selling_price = 100
        self.max_daily_holding_cost_per_unit = 5
        # ----- Solution ----- #
        # We need the max possible value of goodwill penalty to set the high of the observation space
        self.max_goodwill_penalty_per_unit = 10

        obs_low = np.zeros((self.obs_dim,))
        # ----- Solution ----- #
        # Set the high of the observation space, given that the last entry corresponds to the goodwill penalty
        obs_high = np.array([self.max_capacity for _ in range(self.lead_time)] +
                            [self.max_mean_daily_demand, self.max_unit_selling_price,
                             self.max_unit_selling_price, self.max_daily_holding_cost_per_unit,
                             self.max_goodwill_penalty_per_unit
                             ]
                            )
        self.observation_space = Box(low=obs_low, high=obs_high)
        
    def reset(self):
        """
        Returns: the observation of the initial state
        Reset the environment to initial state so that a new episode (independent of previous ones) may start
        """
        raise NotImplementedError

    def step(self, action):
        """
        Returns: the next observation, the reward, done and optionally additional info
        """
        raise NotImplementedError

    def render(self, mode="human"):
        """
        Returns: None
        Show the current environment state e.g. the graphical window in `CartPole-v1`
        This method must be implemented, but it is OK to have an empty implementation if rendering is not
        important
        """
        pass

    def close(self):
        """
        Returns: None
        This method is optional. Used to cleanup all resources (threads, graphical windows) etc.
        """
        pass
    
    def seed(self, seed=None):
        """
        Returns: List of seeds
        This method is optional. Used to set seeds for the environment's random number generator for 
        obtaining deterministic behavior
        """
        return