In [1]:
from dataclasses import dataclass
from scipy.stats import poisson
from stochastic_processes.distribution import Categorical, FiniteDistribution, SampledDistribution, Constant
from stochastic_processes.policy import DeterministicPolicy, Policy, FiniteDeterministicPolicy
from stochastic_processes.markov_process import FiniteMarkovProcess, MarkovRewardProcess, FiniteMarkovRewardProcess, \
    NonTerminal, State
from stochastic_processes.markov_decision_process import MarkovDecisionProcess, FiniteMarkovDecisionProcess
from typing import Mapping, Dict, Tuple, Iterator
import numpy as np
import itertools
import random

@dataclass(frozen=True)
class InventoryState:
    on_hand: int
    on_order: int

    def inventory_position(self) -> int:
        return self.on_hand + self.on_order

In [2]:
class SimpleInventoryMPFinite(FiniteMarkovProcess[InventoryState]):
    def __init__(self, capacity: int, poisson_lambda: float) -> None:
        self.capacity = capacity
        self.poission_lambda = poisson_lambda
        self.poission_distr = poisson(poisson_lambda)
        super().__init__(self.get_transition_map())
    
    def get_transition_map(self) -> Mapping[InventoryState, FiniteDistribution[InventoryState]]:
        d: Dict[InventoryState, Categorical[InventoryState]] = {}
        for alpha in range(self.capacity + 1):
            for beta in range(self.capacity + 1 - alpha):
                state = InventoryState(alpha, beta)
                ip = state.inventory_position()
                beta1 = self.capacity - ip
                state_probs_map: Mapping[InventoryState, float] = {
                    InventoryState(ip - i, beta1) : (self.poission_distr.pmf(i) if i < ip else 1 - self.poission_distr.cdf(ip - 1))
                    for i in range(ip + 1)
                }
                d[InventoryState(alpha, beta)] = Categorical(state_probs_map)
        return d

In [3]:
si_mp = SimpleInventoryMPFinite(capacity=2, poisson_lambda=1.0)
print(si_mp)

From State InventoryState(on_hand=0, on_order=0):
To State InventoryState(on_hand=0, on_order=2) with Probability 1.000
From State InventoryState(on_hand=0, on_order=1):
To State InventoryState(on_hand=1, on_order=1) with Probability 0.368
To State InventoryState(on_hand=0, on_order=1) with Probability 0.632
From State InventoryState(on_hand=0, on_order=2):
To State InventoryState(on_hand=2, on_order=0) with Probability 0.368
To State InventoryState(on_hand=1, on_order=0) with Probability 0.368
To State InventoryState(on_hand=0, on_order=0) with Probability 0.264
From State InventoryState(on_hand=1, on_order=0):
To State InventoryState(on_hand=1, on_order=1) with Probability 0.368
To State InventoryState(on_hand=0, on_order=1) with Probability 0.632
From State InventoryState(on_hand=1, on_order=1):
To State InventoryState(on_hand=2, on_order=0) with Probability 0.368
To State InventoryState(on_hand=1, on_order=0) with Probability 0.368
To State InventoryState(on_hand=0, on_order=0) wit

In [18]:
class SimpleInventoryMRP(MarkovRewardProcess[InventoryState]):
    def __init__(self, capacity: int, poisson_lambda: float, holding_cost: float, stockout_cost: float) -> None:
        self.capacity = capacity
        self.poisson_lambda = poisson_lambda
        self.holding_cost = holding_cost
        self.stockout_cost = stockout_cost
    
    def transition_reward(self, state: NonTerminal[InventoryState]) -> SampledDistribution[Tuple[State[InventoryState], float]]:
        def sample_next_state_reward(state=state) -> Tuple[State[InventoryState], float]:
            demand_sample = np.random.poisson(self.poisson_lambda)
            ip = state.state.inventory_position()
            next_state = InventoryState(max(ip - demand_sample, 0), max(self.capacity - ip, 0))
            reward = -self.holding_cost * state.state.on_hand - self.stockout_cost * max(demand_sample - ip , 0)
            return NonTerminal(next_state), reward
        return SampledDistribution(sample_next_state_reward)

In [19]:
si_mrp = SimpleInventoryMRP(capacity=2, poisson_lambda=1.0, holding_cost=1.0, stockout_cost=5.0)
t = si_mrp.transition_reward(Categorical({NonTerminal(InventoryState(0, 0)):1.0}))

In [2]:
class SimpleInventoryMRPFinite(FiniteMarkovRewardProcess[InventoryState]):
    def __init__(self, capacity: int, poisson_lambda: float, holding_cost: float, stockout_cost: float):
        self.capacity = capacity
        self.poisson_lambda = poisson_lambda
        self.holding_cost = holding_cost
        self.stockout_cost = stockout_cost
        self.poisson_distr = poisson(poisson_lambda)
        super().__init__(self.get_transition_reward_map())

    def get_transition_reward_map(self) -> Mapping[InventoryState, FiniteDistribution[Tuple[InventoryState, float]]]:
        d = {}
        for alpha in range(self.capacity + 1):
            for beta in range(self.capacity + 1 - alpha):
                state = InventoryState(alpha, beta)
                ip = state.inventory_position()
                beta1 = self.capacity - ip
                base_reward = - self.holding_cost * state.on_hand
                sr_probs_map = {(InventoryState(ip - i, beta1),  base_reward) : self.poisson_distr.pmf(i) for i in range(ip)}
                probability = 1 - self.poisson_distr.cdf(ip - 1)
                reward = base_reward - self.stockout_cost * (probability * (self.poisson_lambda - ip) + ip * self.poisson_distr.pmf(ip))
                sr_probs_map[(InventoryState(0, beta1), reward)] = probability
                d[state] = Categorical(sr_probs_map)
        return d

In [4]:
user_capacity = 2
user_poisson_lambda = 1.0
user_holding_cost = 1.0
user_stockout_cost = 10.0

user_gamma = 0.9

si_mrp = SimpleInventoryMRPFinite(user_capacity, user_poisson_lambda, user_holding_cost, user_stockout_cost)

print("Transition Reward Map")
print("---------------------")
print(si_mrp)

print("Stationary Distribution")
print("-----------------------")
si_mrp.display_stationary_distribution()
print()

print("Reward Function")
print("---------------")
si_mrp.display_reward_function()
print()


print("Value Function")
print("--------------")
si_mrp.display_value_function(gamma=user_gamma)
print()

Transition Map
--------------
From State InventoryState(on_hand=0, on_order=0):
To State InventoryState(on_hand=0, on_order=2) with Probability 1.000
From State InventoryState(on_hand=0, on_order=1):
To State InventoryState(on_hand=1, on_order=1) with Probability 0.368
To State InventoryState(on_hand=0, on_order=1) with Probability 0.632
From State InventoryState(on_hand=0, on_order=2):
To State InventoryState(on_hand=2, on_order=0) with Probability 0.368
To State InventoryState(on_hand=1, on_order=0) with Probability 0.368
To State InventoryState(on_hand=0, on_order=0) with Probability 0.264
From State InventoryState(on_hand=1, on_order=0):
To State InventoryState(on_hand=1, on_order=1) with Probability 0.368
To State InventoryState(on_hand=0, on_order=1) with Probability 0.632
From State InventoryState(on_hand=1, on_order=1):
To State InventoryState(on_hand=2, on_order=0) with Probability 0.368
To State InventoryState(on_hand=1, on_order=0) with Probability 0.368
To State InventorySt

In [2]:
class SimpleInventoryDeterministicPolicy(DeterministicPolicy[InventoryState, int]):
    def __init__(self, reorder_point: int) -> None:
        self.reorder_point = reorder_point

        def action_for(s: InventoryState) -> int:
            return max(self.reorder_point - s.inventory_position(), 0)
        
        super().__init__(action_for)

class SimpleInventoryStochasticPolicy(Policy[InventoryState, int]):
    def __init__(self, reorder_point_poisson_mean: float) -> None:
        self.reorder_point_poisson_mean = reorder_point_poisson_mean

    def act(self, state: NonTerminal[InventoryState]) -> SampledDistribution[int]:
        def action_func(state=state) -> int:
            reorder_point_sample: int = np.random.poisson(self.reorder_point_poisson_mean)
            return max(reorder_point_sample - state.state.inventory_position(), 0)
        return SampledDistribution(action_func)

In [3]:
@dataclass
class SimpleInventoryMDPNoCap(MarkovDecisionProcess[InventoryState, int]):
    poisson_lambda: float
    holding_cost: float
    stockout_cost: float

    def step(self, state: NonTerminal[InventoryState], order: int) -> SampledDistribution[Tuple[State[InventoryState], float]]:
        def sample_next_state_reward(state=state, order=order) -> Tuple[State[InventoryState], float]:
            demand_sample = np.random.poisson(self.poisson_lambda)
            ip = state.state.inventory_position()
            next_state = InventoryState(max(ip - demand_sample, 0), order)
            reward = - self.holding_cost * state.state.on_hand - self.stockout_cost * max(demand_sample - ip, 0)
            return NonTerminal(next_state), reward
        return SampledDistribution(sample_next_state_reward)

    def action(self, state: NonTerminal[InventoryState]) -> Iterator[int]:
        return itertools.count(start=0, step=1)

    def fraction_of_days_oos(self, policy: Policy[InventoryState, int], time_steps: int, num_traces: int) -> float:
        impl_mrp = self.apply_policy(policy)
        count = 0
        high_fractile = int(poisson(self.poisson_lambda).ppf(0.98))
        start = random.choice([InventoryState(i, 0) for i in range(high_fractile + 1)])

        for _ in range(num_traces):
            steps = itertools.islice(impl_mrp.simulate_reward(Constant(NonTerminal(start))), time_steps)
            for step in steps:
                if step.reward < -self.holding_cost * step.state.state.on_hand:
                    count += 1
        
        return float(count) / (time_steps * num_traces)


In [4]:
user_poisson_lambda = 2.0
user_holding_cost = 1.0
user_stockout_cost = 10.0

user_reorder_point = 8
user_reorder_point_poisson_mean = 8.0

user_time_steps = 1000
user_num_traces = 1000

si_mdp_nocap = SimpleInventoryMDPNoCap(poisson_lambda=user_poisson_lambda,
                                        holding_cost=user_holding_cost,
                                        stockout_cost=user_stockout_cost)

si_dp = SimpleInventoryDeterministicPolicy(reorder_point=user_reorder_point)

oos_frac_dp = si_mdp_nocap.fraction_of_days_oos(policy=si_dp,
                                                time_steps=user_time_steps,
                                                num_traces=user_num_traces)
print(
    f"Deterministic Policy yields {oos_frac_dp * 100:.2f}%"
    + " of Out-Of-Stock days"
)

Deterministic Policy yields 1.91% of Out-Of-Stock days


In [2]:
InvOrderMapping = Mapping[InventoryState, Mapping[int, Categorical[Tuple[InventoryState, float]]]]

class SimpleInventoryMDPCap(FiniteMarkovDecisionProcess[InventoryState, int]):
    def __init__(self, capacity: int, poisson_lambda: float, holding_cost: float, stockout_cost: float) -> None:
        self.capacity = capacity
        self.poisson_lambda = poisson_lambda
        self.holding_cost = holding_cost
        self.stockout_cost = stockout_cost
        self.poisson_distr = poisson(poisson_lambda)
        super().__init__(self.get_action_transition_reward_map())

    def get_action_transition_reward_map(self) -> InvOrderMapping:
        d = {}
        for alpha in range(self.capacity + 1):
            for beta in range(self.capacity + 1 - alpha):
                state = InventoryState(alpha, beta)
                ip = state.inventory_position()
                base_reward = -self.holding_cost * alpha
                d1 = {}
                for order in range(self.capacity - ip + 1):
                    sr_probs_dict = {(InventoryState(ip - i, order), base_reward): self.poisson_distr.pmf(i) for i in range(ip)}
                    probability = 1 - self.poisson_distr.cdf(ip - 1)
                    reward = base_reward - self.stockout_cost * (probability * (self.poisson_lambda - ip) + ip * self.poisson_distr.pmf(ip))
                    sr_probs_dict[(InventoryState(0, order), reward)] = probability
                    d1[order] = Categorical(sr_probs_dict)
                d[state] = d1
        return d


In [3]:
user_capacity = 2
user_poisson_lambda = 1.0
user_holding_cost = 1.0
user_stockout_cost = 10.0

si_mdp = SimpleInventoryMDPCap(user_capacity, user_poisson_lambda, user_holding_cost, user_stockout_cost)
print("MDP Transition Map")
print("------------------")
print(si_mdp)
fdp = FiniteDeterministicPolicy({
    InventoryState(alpha, beta): user_capacity - (alpha + beta)
    for alpha in range(user_capacity + 1) 
    for beta in range(user_capacity + 1 - alpha)
})
print("Deterministic Policy Map")
print("------------------------")
print(fdp)
implied_mrp = si_mdp.apply_finite_policy(fdp)
print("Implied MP Transition Map")
print("--------------")
print(FiniteMarkovProcess(
    {s.state: Categorical({s1.state: p for s1, p in v.table().items()})
        for s, v in implied_mrp.transition_map.items()}
))
print("Implied MRP Transition Reward Map")
print("---------------------")
print(implied_mrp)



TypeError: Can't instantiate abstract class SimpleInventoryMDPCap with abstract method action