In [38]:
using Distributions, Statistics

# 1 - Maintenance problem

- Constants : 

In [2]:
# all costs are written in lists as follows [New,good shape,old,broken]
maintenance_cost = 10
repair_costs = [0,15,30,50]
monthly_earnings = [30,20,10,0]
replacement_cost = 70
Transition_matrix_without_maintenance = [
    0 1 0 0;
    0 0.3 0.7 0;
    0 0 0.5 0.5;
    0 0 0 1
]
Transition_matrix_with_maintenance = [
    0 0 0 0; # a new machine cannot be maintained
    0 0.8 0.2 0;
    0 0 0.9 0.1;
    0 0 0 0 # a broken machine cannot be maintained
]
# the next two matrices are just to make the main code easier it essentially just means you cannot repair or replace a new machine 
Transition_matrix_of_repairs = [
    0 0 0 0;
    0 1 0 0;
    0 1 0 0;
    0 1 0 0;
]
Transition_matrix_of_replacement = [
    0 0 0 0;
    1 0 0 0;
    1 0 0 0;
    1 0 0 0;
]
number_of_months = 12 
vector_of_possibilities = [
    Transition_matrix_without_maintenance,
    Transition_matrix_with_maintenance,
    Transition_matrix_of_repairs,
    Transition_matrix_of_replacement
]

4-element Vector{Matrix{Float64}}:
 [0.0 1.0 0.0 0.0; 0.0 0.3 0.7 0.0; 0.0 0.0 0.5 0.5; 0.0 0.0 0.0 1.0]
 [0.0 0.0 0.0 0.0; 0.0 0.8 0.2 0.0; 0.0 0.0 0.9 0.1; 0.0 0.0 0.0 0.0]
 [0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 1.0 0.0 0.0]
 [0.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0]

- the main problem :

In [3]:
function cost_of_action(state,possibility)
    if possibility == 1 
        return 0
    elseif possibility == 2 
        return 10
    elseif possibility == 3 
        return repair_costs[state]
    else 
        return 70
    end
end

cost_of_action (generic function with 1 method)

In [4]:
decision_matrix = zeros(4,number_of_months)
cost_matrix = zeros(4,number_of_months)
cost_matrix[:,number_of_months] = monthly_earnings
for month ∈ number_of_months-1:-1:1
    for state ∈ 1:4
        cost_of_possibilities = [0.0 0.0 0.0 0.0]
        for possibility ∈ 1:4
           cost_of_possibilities[possibility] = monthly_earnings[state] + (vector_of_possibilities[possibility][state,:]')*cost_matrix[:,month+1] - cost_of_action(state,possibility)
        end
        val, idx = findmax(cost_of_possibilities)
        cost_matrix[state,month] = val
        decision_matrix[state,month] = idx[2]
    end
end
best_cost = cost_matrix[1,1]

110.82

# 2 - Stock management

- constants :

In [78]:
T = 14
price_up = 1 
price_down = 3
storage_cost = 0.1
initial_stock = 10
n = 10 
p = [0.2 0.2 0.4 0.4 0.7 0.7 0.2 0.2 0.8 0.8 0.5 0.5 0.2 0.2]
max_stock = 20 
min_stock = 1
max_order = 5

5

- policy estimation code

In [79]:
function get_cost_of_complete_policy(policy,n,N=10000)
    costs = zeros(N)
    for itr ∈ 1:N
        stock = initial_stock
        cost = 0
        for t ∈ 1:T
            up_stock = min(policy[t,stock+1],max_stock-stock)
            cost -= up_stock*price_up
            stock += up_stock
            d = rand(Binomial(n, p[t]))
            down_stock = min(d,stock-1)
            stock -= down_stock
            cost += down_stock*price_down
            cost -= storage_cost*up_stock
        end
        costs[itr] = cost
    end
    std_var = std(costs)
    final_cost = mean(costs)
    CI_half_lenght = std_var*1.96/sqrt(N)
    CI_upper_bound = final_cost + CI_half_lenght[1]
    CI_lower_bound = final_cost - CI_half_lenght[1]
    return final_cost,CI_upper_bound,CI_lower_bound
end

get_cost_of_complete_policy (generic function with 2 methods)

In [80]:
function get_cost_of_policy(policy,n,N=10000)
    costs = zeros(N)
    for itr ∈ 1:N
        stock = initial_stock
        cost = 0
        for t ∈ 1:T
            up_stock = min(policy[t],max_stock-stock)
            cost -= up_stock*price_up
            stock += up_stock
            d = rand(Binomial(n, p[t]))
            down_stock = min(d,stock-1)
            stock -= down_stock
            cost += down_stock*price_down
            cost -= storage_cost*up_stock
        end
        costs[itr] = cost
    end
    final_cost = mean(costs)
    CI_half_lenght = 1.96*std(costs)/sqrt(N)
    CI_upper_bound = final_cost + CI_half_lenght
    CI_lower_bound = final_cost - CI_half_lenght
    return final_cost,CI_upper_bound,CI_lower_bound
end

get_cost_of_policy (generic function with 2 methods)

In [84]:
test_policy = 5 * ones(T)
cost,CI_upper_bound,CI_lower_bound = get_cost_of_policy(test_policy,10)
print("the expected cost obtained is $cost and a 95% confidence interval is [$CI_lower_bound,$CI_upper_bound]")

the expected cost obtained is 109.307 and a 95% confidence interval is [109.06253991226441,109.55146008773559]

- dynamic programming

In [85]:
function inventory_dp()
    V = zeros(T+1, max_stock + 1)
    policy = zeros(Int, T, max_stock + 1) 

    binom_probs = Vector{Vector{Float64}}(undef, T)
    for t in 1:T
        dist = Binomial(10, p[t])
        binom_probs[t] = [pdf(dist, k) for k in 0:10]
    end

    for t in T:-1:1  
        probs = binom_probs[t]
        
        for x in 0:max_stock  
            max_expected_cost = -Inf
            best_u = 0
            for u in 0:min(max_order, max_stock - x)
                S = x + u  
                total_cost = 0.0
                for (k_idx, k) in enumerate(0:10)
                    prob = probs[k_idx]
                    sales = min(k, S-1)
                    next_stock = max(0, S - k)
                    order_cost = price_up*u
                    holding_cost = storage_cost * next_stock
                    revenue = price_down* sales
                    cost_component = revenue - order_cost - holding_cost
                    if t < T
                        cost_component += V[t+1, next_stock + 1] 
                    end
                    
                    total_cost += prob * cost_component
                end
                if total_cost > max_expected_cost
                    max_expected_cost = total_cost
                    best_u = u
                end
            end
            V[t, x+1] = max_expected_cost
            policy[t, x+1] = best_u
        end
    end

    return V, policy
end

V, policy = inventory_dp()

expected_cost = V[1, 10+1]

116.75121513292399

In [94]:
cost,LB,UB = get_cost_of_complete_policy(policy,10)
print("the expected cost obtained for the optimal policy is $cost and a 95% confidence interval is [$UB,$LB]")

the expected cost obtained for the optimal policy is 121.14041999999999 and a 95% confidence interval is [120.96738221461526,121.31345778538473]

In [29]:
function inventory_dp_mod(T)
    V = zeros(T+1, max_stock + 1)
    policy = zeros(Int, T, max_stock + 1) 
    len = length(p)
    print(len)
    binom_probs = Vector{Vector{Float64}}(undef, T)
    for t in 1:T
        dist = Binomial(10, p[((t-1)%len)+1])
        binom_probs[t] = [pdf(dist, k) for k in 0:10]
    end

    for t in T:-1:1  
        probs = binom_probs[t]
        
        for x in 0:max_stock  
            max_expected_cost = -Inf
            best_u = 0
            for u in 0:min(max_order, max_stock - x)
                S = x + u  
                total_cost = 0.0
                for (k_idx, k) in enumerate(0:10)
                    prob = probs[k_idx]
                    sales = min(k, S-1)
                    next_stock = max(0, S - k)
                    order_cost = price_up*u
                    holding_cost = storage_cost * next_stock
                    revenue = price_down* sales
                    cost_component = revenue - order_cost - holding_cost
                    if t < T
                        cost_component += V[t+1, next_stock + 1] 
                    end
                    
                    total_cost += prob * cost_component
                end
                if total_cost > max_expected_cost
                    max_expected_cost = total_cost
                    best_u = u
                end
            end
            V[t, x+1] = max_expected_cost
            policy[t, x+1] = best_u
        end
    end

    return V, policy
end

V, policy = inventory_dp_mod(96)

expected_cost = V[1, 10+1]

14

765.2803067250243

In [None]:
function solve_stock_dp()
    p = [0.2, 0.2, 0.4, 0.4, 0.7, 0.7, 0.2, 0.2, 0.8, 0.8, 0.5, 0.5, 0.2, 0.2]
    
    # Adjusted state dimensions with proper bounds
    stock_range = min_stock:max_stock
    order_range = 0:max_order
    
    # Value function dimensions: [time, stock, prev_order1, prev_order2]
    V = Array{Float64}(undef, T+1, length(stock_range), length(order_range), length(order_range))
    policy = Array{Int}(undef, T, length(stock_range), length(order_range), length(order_range))
    
    # Terminal condition: V[T+1, :, :, :] = 0
    fill!(view(V, T+1, :, :, :), 0.0)

    # Precompute binomial probabilities
    binom_probs = Vector{Vector{Float64}}(undef, T)
    for t in 1:T
        dist = Binomial(10, p[t])
        binom_probs[t] = [pdf(dist, d) for d in 0:10]
    end

    for t in T:-1:1
        for x in stock_range 
            for u1 in order_range
                for u2 in order_range
                    min_cost = Inf
                    best_action = 0

                    current_stock = x + u1
                    current_stock = min(current_stock, max_stock)

                    expected_cost = 0.0
                    for (d_idx, d) in enumerate(0:10)
                        prob = binom_probs[t][d_idx]
                        after_demand = max(min_stock, current_stock - d)
                        sales = min(d, current_stock)
                        holding = 0.1 * after_demand
                        revenue = 3 * sales
                        
                        best_u = 0
                        max_cost_d = -Inf
                        
                        
                        max_feasible = min(max_order, max_stock - after_demand)
                        
                        for u in 0:max_feasible
                            
                            next_stock = max(min_stock, after_demand + u2)
                            next_stock = min(next_stock, max_stock)
                            
                            x_idx = next_stock - min_stock + 1
                            u1_idx = u2 + 1  
                            u2_idx = u + 1
                        
                            order_cost = u
                            total_cost = revenue- order_cost - holding  
                            
                            if t < T
                                total_cost += V[t+1, x_idx, u1_idx, u2_idx]
                            end
                            
                            if total_cost > max_cost_d
                                max_cost_d = total_cost
                                best_u = u
                            end
                        end
                        
                        expected_cost += prob * max_cost_d
                    end
                    x_idx = x - min_stock + 1
                    u1_idx = u1 + 1
                    u2_idx = u2 + 1
                    
                    V[t, x_idx, u1_idx, u2_idx] = expected_cost
                    policy[t, x_idx, u1_idx, u2_idx] = best_action
                end
            end
        end
    end
    initial_x_idx = 10 - min_stock + 1
    return V[1, initial_x_idx, 1, 1]  
end

optimal_value = solve_stock_dp()
println("Optimal expected cost: ", round(optimal_value, digits=2))

Optimal expected cost: 148.0


# 3 - Dice trading

- constants :

In [97]:
T = 10
price_per_dice = 5
max_dice = 3

3

In [98]:
function simulate_strategy(strategy,N=10000)
    results = zeros(N)
    
    for i in 1:N
        x = 0  
        d = 1  
        for t in 1:T
            action = strategy[t, x+1, d]
            
            if action == 1 && x >= 6 && d < 3
                x -= price_per_dice
                d += 1
            end
            
            roll = rand(1:6, d)
            x += maximum(roll)
        end
        results[i] = x
    end
    
    μ = mean(results)
    ci = 1.96 * std(results)/sqrt(N)
    return μ, μ - ci, μ + ci
end

simulate_strategy (generic function with 2 methods)

In [99]:
function solve_dice_dp()
    T = 10
    max_dice = 3
    max_points = 61 # 6 * 10  and we add 1 for safety 

    V = zeros(T+1, max_points+1, max_dice+1)
    policy = zeros(Int, T, max_points+1, max_dice+1)

    for x in 0:max_points, d in 1:max_dice
        V[T+1, x+1, d] = x
    end
    

    dice_probs = [Dict{Int,Float64}() for _ in 1:3]
    for dice in 1:3
        for m in 1:6
            prob = (m/6)^dice - ((m-1)/6)^dice
            dice_probs[dice][m] = prob
        end
    end
    
    for t in T:-1:1
        for x in 0:max_points
            for d in 1:max_dice
                best_value = -Inf
                best_action = 0

                for action in 0:1
                    if action == 1 && (x < 6 || d >= 3)
                        continue
                    end
                    
                    current_value = 0.0
                    if action == 1
                        new_d = min(d + 1, 3)
                        new_x = x - price_per_dice
                    else
                        new_d = d
                        new_x = x
                    end
                    
                    new_x < 0 && continue
                    
                    for (m, prob) in dice_probs[new_d]
                        next_x = new_x + m
                        next_x = min(next_x, max_points)
                        
                        if t+1 <= T && next_x <= max_points
                            current_value += prob * V[t+1, next_x+1, new_d]
                        else
                            current_value += prob * next_x
                        end
                    end
                    
                    if current_value > best_value
                        best_value = current_value
                        best_action = action
                    end
                end
                
                V[t, x+1, d] = best_value
                policy[t, x+1, d] = best_action
            end
        end
    end
    
    V, policy
end

solve_dice_dp (generic function with 1 method)

In [100]:
V, policy = solve_dice_dp()
print("the gains from the optimal strategy is $(V[1,1,1])")
println()
μ,LB,UB =  simulate_strategy(policy)
print("through simulation we find the gain associated with the optimal strategy is $μ and the 95% confidence interval is [$LB,$UB]")

the gains from the optimal strategy is 37.62152777777777
through simulation we find the gain associated with the optimal strategy is 37.6336 and the 95% confidence interval is [37.53408230701705,37.73311769298295]