In [1]:
using Distributions

# 1 - Maintenance problem

- Constants : 

In [2]:
# all costs are written in lists as follows [New,good shape,old,broken]
maintenance_cost = 10
repair_costs = [0,15,30,50]
monthly_earnings = [30,20,10,0]
replacement_cost = 70
Transition_matrix_without_maintenance = [
    0 1 0 0;
    0 0.3 0.7 0;
    0 0 0.5 0.5;
    0 0 0 1
]
Transition_matrix_with_maintenance = [
    0 0 0 0; # a new machine cannot be maintained
    0 0.8 0.2 0;
    0 0 0.9 0.1;
    0 0 0 0 # a broken machine cannot be maintained
]
# the next two matrices are just to make the main code easier it essentially just means you cannot repair or replace a new machine 
Transition_matrix_of_repairs = [
    0 0 0 0;
    0 1 0 0;
    0 1 0 0;
    0 1 0 0;
]
Transition_matrix_of_replacement = [
    0 0 0 0;
    1 0 0 0;
    1 0 0 0;
    1 0 0 0;
]
number_of_months = 12 
vector_of_possibilities = [
    Transition_matrix_without_maintenance,
    Transition_matrix_with_maintenance,
    Transition_matrix_of_repairs,
    Transition_matrix_of_replacement
]

4-element Vector{Matrix{Float64}}:
 [0.0 1.0 0.0 0.0; 0.0 0.3 0.7 0.0; 0.0 0.0 0.5 0.5; 0.0 0.0 0.0 1.0]
 [0.0 0.0 0.0 0.0; 0.0 0.8 0.2 0.0; 0.0 0.0 0.9 0.1; 0.0 0.0 0.0 0.0]
 [0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 1.0 0.0 0.0]
 [0.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0; 1.0 0.0 0.0 0.0]

- the main problem :

In [3]:
function cost_of_action(state,possibility)
    if possibility == 1 
        return 0
    elseif possibility == 2 
        return 10
    elseif possibility == 3 
        return repair_costs[state]
    else 
        return 70
    end
end

cost_of_action (generic function with 1 method)

In [4]:
I3 = [1 0 0;0 1 0;0 0 1]
I1 = [1,0,1]


3-element Vector{Int64}:
 1
 0
 1

In [5]:
decision_matrix = zeros(4,number_of_months)
cost_matrix = zeros(4,number_of_months)
cost_matrix[:,number_of_months] = monthly_earnings
for month ∈ number_of_months-1:-1:1
    for state ∈ 1:4
        cost_of_possibilities = [0.0 0.0 0.0 0.0]
        for possibility ∈ 1:4
           cost_of_possibilities[possibility] = monthly_earnings[state] + (vector_of_possibilities[possibility][state,:]')*cost_matrix[:,month+1] - cost_of_action(state,possibility)
        end
        val, idx = findmax(cost_of_possibilities)
        cost_matrix[state,month] = val
        decision_matrix[state,month] = idx[2]
    end
end
best_cost = cost_matrix[1,1]

110.82

# 2 - Stock management

- constants :

In [6]:
T = 14
price_up = 1 
price_down = 3
storage_cost = 0.1
initial_stock = 10
n = 10 
p = [0.2 0.2 0.4 0.4 0.7 0.7 0.2 0.2 0.8 0.8 0.5 0.5 0.2 0.2]
max_stock = 20 
max_order = 5

5

- policy estimation code

In [7]:
function get_cost_of_complete_policy(policy,n,N=100)
    costs = zeros(N)
    for itr ∈ 1:N
        stock = initial_stock
        cost = 0
        for t ∈ 1:T
            up_stock = min(policy[t,stock+1],max_stock-stock)
            cost -= up_stock*price_up
            stock += up_stock
            d = rand(Binomial(n, p[t]))
            down_stock = min(d,stock)
            stock -= down_stock
            cost += down_stock*price_down
            cost -= storage_cost*up_stock
        end
        costs[itr] = cost
    end
    std_var = std(costs)
    final_cost = mean(costs)
    CI_half_lenght = std_var*1.96/sqrt(N)
    CI_upper_bound = final_cost + CI_half_lenght[1]
    CI_lower_bound = final_cost - CI_half_lenght[1]
    return final_cost,CI_upper_bound,CI_lower_bound
end

get_cost_of_complete_policy (generic function with 2 methods)

In [8]:
function get_cost_of_policy(policy,n,N=100)
    costs = zeros(N)
    for itr ∈ 1:N
        stock = initial_stock
        cost = 0
        for t ∈ 1:T
            up_stock = min(policy[t],max_stock-stock)
            cost -= up_stock*price_up
            stock += up_stock
            d = rand(Binomial(n, p[t]))
            down_stock = min(d,stock)
            stock -= down_stock
            cost += down_stock*price_down
            cost -= storage_cost*up_stock
        end
        costs[itr] = cost
    end
    final_cost = mean(costs)
    CI_half_lenght = 1.96*std(costs)/sqrt(N)
    CI_upper_bound = final_cost + CI_half_lenght
    CI_lower_bound = final_cost - CI_half_lenght
    return final_cost,CI_upper_bound,CI_lower_bound
end

get_cost_of_policy (generic function with 2 methods)

In [9]:
test_policy = 5 * ones(T)
cost,CI_upper_bound,CI_lower_bound = get_cost_of_policy(test_policy,10)
print("the expected cost obtained is $cost and a 95% confidence interval is [$CI_lower_bound,$CI_upper_bound]")

the expected cost obtained is 110.40899999999999 and a 95% confidence interval is [107.7254427304615,113.09255726953849]

- dynamic programming

In [10]:
function inventory_dp()
    V = zeros(T+1, max_stock + 1)
    policy = zeros(Int, T, max_stock + 1) 

    binom_probs = Vector{Vector{Float64}}(undef, T)
    for t in 1:T
        dist = Binomial(10, p[t])
        binom_probs[t] = [pdf(dist, k) for k in 0:10]
    end

    for t in T:-1:1  
        probs = binom_probs[t]
        
        for x in 0:max_stock  
            max_expected_cost = -Inf
            best_u = 0
            for u in 0:min(max_order, max_stock - x)
                S = x + u  
                total_cost = 0.0
                for (k_idx, k) in enumerate(0:10)
                    prob = probs[k_idx]
                    sales = min(k, S)
                    next_stock = max(0, S - k)
                    order_cost = price_up*u
                    holding_cost = storage_cost * next_stock
                    revenue = price_down* sales
                    cost_component = revenue - order_cost - holding_cost
                    if t < T
                        cost_component += V[t+1, next_stock + 1] 
                    end
                    
                    total_cost += prob * cost_component
                end
                if total_cost > max_expected_cost
                    max_expected_cost = total_cost
                    best_u = u
                end
            end
            V[t, x+1] = max_expected_cost
            policy[t, x+1] = best_u
        end
    end

    return V, policy
end

V, policy = inventory_dp()

expected_cost = V[1, 10+1]

120.09210160261003

In [11]:
cost,UB,LB = get_cost_of_complete_policy(policy,10)
print("the expected cost obtained for the optimal policy is $cost and a 95% confidence interval is [$UB,$LB]")

the expected cost obtained for the optimal policy is 120.55999999999999 and a 95% confidence interval is [122.21702597896034,118.90297402103964]

In [31]:
function inventory_dp_mod(T)
    V = zeros(T+1, max_stock + 1)
    policy = zeros(Int, T, max_stock + 1) 
    len = length(p)
    print(len)
    binom_probs = Vector{Vector{Float64}}(undef, T)
    for t in 1:T
        dist = Binomial(10, p[((t-1)%len)+1])
        binom_probs[t] = [pdf(dist, k) for k in 0:10]
    end

    for t in T:-1:1  
        probs = binom_probs[t]
        
        for x in 0:max_stock  
            max_expected_cost = -Inf
            best_u = 0
            for u in 0:min(max_order, max_stock - x)
                S = x + u  
                total_cost = 0.0
                for (k_idx, k) in enumerate(0:10)
                    prob = probs[k_idx]
                    sales = min(k, S)
                    next_stock = max(0, S - k)
                    order_cost = price_up*u
                    holding_cost = storage_cost * next_stock
                    revenue = price_down* sales
                    cost_component = revenue - order_cost - holding_cost
                    if t < T
                        cost_component += V[t+1, next_stock + 1] 
                    end
                    
                    total_cost += prob * cost_component
                end
                if total_cost > max_expected_cost
                    max_expected_cost = total_cost
                    best_u = u
                end
            end
            V[t, x+1] = max_expected_cost
            policy[t, x+1] = best_u
        end
    end

    return V, policy
end

V, policy = inventory_dp_mod(96)

expected_cost = V[1, 10+1]

14

784.2349650620882

question 3 - b  for later

# 3 - Dice trading

- constants :

In [None]:
T = 10
price_per_dice = 5
price_required_to_buy = 6
maximum_amount_of_dice = 3

In [None]:
function simulate_strategy(T,strategy)
    points = 0
end