# Dynamic programming project

In [None]:
import numpy as np
import plotly.graph_objects as go

# 1. Approximation of the value function

$$
V(H_t) = \max_{L_t \in [0,1]} [ U(C_t) + \beta V(H_{t+1})] \\
$$
Subject to:
$$
(i)\quad C_t = H_t^\alpha L_t \\
$$
$$
(ii)\quad H_{t+1} = (1-\delta)H_t + (1 - L_t)
$$

In [49]:
def utility(c, sigma=0.9):
    if sigma!=1:
        return (c**(1 - sigma)) / (1 - sigma)
    else:
        return np.log(c)

def consumption(H,L,alpha=0.4):
    return (H**alpha) * L

def human_capital_accumulation(H,L,delta=0.05):
    return (1 - delta) * H + (1 - L)

def bellman_equation(C, H_next, H_values, V, beta=0.9):
    return utility(C) + beta * np.interp(H_next, H_values, V)

def value_function_update(new_value, old_value, L, old_L):
    if new_value > old_value:
        return new_value, L
    else:
        return old_value, old_L

def compute_value_function(V, H_values, H_min=0.1, H_max=5.0):
    new_V = np.zeros_like(V)
    policy = np.zeros_like(V)

    for i, H in enumerate(H_values):
        max_value = -np.inf
        optimal_L = 0
        L_candidates = np.linspace(0, 1, 100)

        for L in L_candidates:
            C = consumption(H,L)
            H_next = human_capital_accumulation(H,L)

            if H_next < H_min or H_next > H_max or C <= 0:
                continue

            value = bellman_equation(C, H_next, H_values, V)
            max_value, optimal_L = value_function_update(value, max_value, L, optimal_L)

        new_V[i] = max_value
        policy[i] = optimal_L

    return new_V, policy

def value_function_iteration(tolerance=1e-5, max_iterations=500, H_min=0.1, H_max=5, H_points=200):
    H_values = np.linspace(H_min, H_max, H_points)
    V = np.zeros(H_points)

    for iteration in range(max_iterations):
        new_V, policy = compute_value_function(V, H_values)

        if np.max(np.abs(new_V - V)) < tolerance:
            print(f"Converged in {iteration} iterations")
            break
        V = new_V
    return V, policy

In [58]:
V, optimal_policy = value_function_iteration()

Converged in 132 iterations


# 2. Graphics

## 2.1 Value function

In [59]:
H_min, H_max, H_points = 0.1, 5, 200
H_values = np.linspace(H_min, H_max, H_points)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=V,
    mode = "lines",
    name = "approximated value function"
))
fig.update_layout(
    xaxis_title="Human Capital (H)",
    yaxis_title="Value function",
    title={
    "text": "Approximation of the Value Function<br><sup>Parameters: α = 0.4, β = 0.9, σ = 0.9, δ = 0.05</sup>",
    "x": 0.5,
    "xanchor": "center"
    },
    # showlegend=True
)
fig.show()

Each point of the value function represents the maximum lifetime utility the agent can achieve starting with a specific level of human capital. 

## 2.2 Policy function

The policy function specifies the agent's optimal choice of labor allocation (Lt) for a given level of human capital (Ht).
$$
L_t^*(H_t)
$$

In [63]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=optimal_policy,
    mode = "lines",
    name='Optimal Policy',
    line=dict(color='red')
))

fig.update_layout(
    title = {
        "text":"Optimal Policy",
        "x": 0.5,
        "xanchor": "center"
    },
    xaxis_title='Ht',
    yaxis_title='Lt',
    showlegend=False
)
fig.show()

## 2.3 Consumption C(H) and Labour Supply L(H)

In [64]:
policy_max = np.where(optimal_policy==1)[0][0]
no_more_learning_point = H_values[policy_max]

optimal_consumption = consumption(H_values,optimal_policy)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=optimal_policy,
    mode = "lines",
    name='optimal L',
    line=dict(color='blue'),
    opacity=0.7
))
fig.add_trace(go.Scatter(
    x=H_values,
    y=optimal_consumption,
    name="optimal C",
    line=dict(color='green'),
    opacity=0.7
))
fig.update_layout(
    title = {
        "text":"Consumption C(H) and Labour Supply L(H)",
        "x": 0.5,
        "xanchor": "center"
    },
    xaxis_title='Human Capital (H)',
    showlegend=True
)

fig.add_vline(x=no_more_learning_point,
              line=dict(color='black', dash='dash'),
              opacity=0.7
)
fig.show()

# 3. Compute a Value function for any arbitrary function

## 3.1 Arbitrary policy functions

Lets study the following arbitrary policy function: 
1. <b>Constant policy</b>: the agent allocate a constant fraction of its time to consumption production
$$
L_t(H_t) = c , \quad c \in [0,1]
$$
2. <b>Random policy</b>: the agent allocates labor randomly following a uniform law 
$$
L_t(H_t)∼U(0,1)
$$
3. <b>Proportional allocation</b>: the agent allocates its time to consumption production proportionally to its human capital
$$
L_t(H_t) = \frac{H_t}{H_{max}}
$$

In [56]:
def constant_policy_function(H, c=0.5):
    if isinstance(H, np.ndarray):
        return np.full_like(H, c)
    else:
        return c

def random_policy_function(H):
    if isinstance(H, np.ndarray):
        return np.random.uniform(0, 1, size=H.shape)
    else:
        return np.random.uniform(0, 1)

def proportional_policy_function(H):
    return  H / H_max

In [None]:
porportional_policy = proportional_policy_function(H_values)
constant_policy = constant_policy_function(H_values)
random_policy = random_policy_function(H_values)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=porportional_policy,
    mode = "lines",
    name='Proportional Policy'
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=random_policy,
    mode = "lines",
    name='Random Policy'
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=constant_policy,
    mode = "lines",
    name='Constant Policy'
))

fig.update_layout(
    title = {
        "text":"Arbitrary policy for labor allocation",
        "x": 0.5,
        "xanchor": "center"
    },
    xaxis_title='Ht',
    yaxis_title='Lt',
    showlegend=True
)
fig.show()

## 3.2 Approximate the value function for the arbitrary policy

In [30]:
V_proportional = np.zeros(H_points)
for iteration in range(max_iterations):
    new_V_proportional, proportional_policy = compute_value_function(V_proportional, H_values,arbitrary_policy=True)
    if np.max(np.abs(new_V_proportional - V_proportional)) < tolerance:
        print(f"Converged in {iteration} iterations")
        break
    V_proportional = new_V_proportional

V_constant = np.zeros(H_points)
for iteration in range(max_iterations):
    new_V_constant, constant_policy = compute_value_function(V_constant, H_values,arbitrary_policy=True, arbitrary_policy_type='constant')
    if np.max(np.abs(new_V_constant - V_constant)) < tolerance:
        print(f"Converged in {iteration} iterations")
        break
    V_constant = new_V_constant

V_random = np.zeros(H_points)
for iteration in range(max_iterations):
    new_V_random, random_policy = compute_value_function(V_random, H_values,arbitrary_policy=True, arbitrary_policy_type='random')
    if np.max(np.abs(new_V_random - V_random)) < tolerance:
        print(f"Converged in {iteration} iterations")
        break
    V_random = new_V_random

TypeError: 'NoneType' object is not subscriptable

In [29]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=V,
    mode = "lines",
    name = "optimal policy"
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=V_proportional,
    mode = "lines",
    name = "arbitrary policy"
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=V_constant,
    mode = "lines",
    name = "constant policy"
))

fig.update_layout(
    xaxis_title="Human Capital (H)",
    yaxis_title="Value function",
    title={
    "text": "Approximation of the Value Function for an arbitrary policy function<br><sup>Parameters: α = 0.4, β = 0.9, σ = 0.9, δ = 0.05</sup>",
    "x": 0.5,  # Center align title
    "xanchor": "center"
    },
    # showlegend=True
)
fig.show()

NameError: name 'V_proportional' is not defined

# 4. Implement a greedy procedure to the policy function

In [99]:
def compute_greedy_procedure_value_function(V, H_values, epsilon = 0.1, H_min=0.1, H_max=5.0):
    new_V = np.zeros_like(V)
    policy = np.zeros_like(V)
    L_candidates = np.linspace(0, 1, 100)

    # epsilon = max(0.01, epsilon - 0.005)

    for i, H in enumerate(H_values):
        max_value = -np.inf
        optimal_L = 0

        if np.random.rand() < epsilon:
            L = proportional_policy_function(H)
            C = consumption(H,L)
            H_next = human_capital_accumulation(H,L)

            if H_next < H_min or H_next > H_max or C <= 0:
                continue

            value = bellman_equation(C, H_next, H_values, V)
            max_value, optimal_L = value_function_update(value, max_value, L)

        else:
            for L in L_candidates:
                C = consumption(H,L)
                H_next = human_capital_accumulation(H,L)

                if H_next < H_min or H_next > H_max or C <= 0:
                    continue

                value = bellman_equation(C, H_next, H_values, V)
                max_value, optimal_L = value_function_update(value, max_value, L)

        new_V[i] = max_value
        policy[i] = optimal_L

    return new_V, policy

In [100]:
V_greedy = np.zeros(H_points)
epsilon = 0.1
for iteration in range(1000):
    new_V_greedy, greedy_policy = compute_greedy_procedure_value_function(V_greedy, H_values,epsilon = epsilon)
    epsilon = max(0.01, epsilon - 0.005)
    if np.max(np.abs(new_V_greedy - V_greedy)) < tolerance:
        print(f"Converged in {iteration} iterations")
        break
    V_greedy = new_V_greedy

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=V,
    mode = "lines",
    name = "optimal policy"
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=V_greedy,
    mode = "lines",
    name = "Greedy policy"
))
fig.update_layout(
    xaxis_title="Human Capital (H)",
    yaxis_title="Value function",
    title={
    "text": "Approximation of the Value Function for an arbitrary policy function<br><sup>Parameters: α = 0.4, β = 0.9, σ = 0.9, δ = 0.05</sup>",
    "x": 0.5,  # Center align title
    "xanchor": "center"
    },
    # showlegend=True
)
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=H_values,
    y=policy,
    mode = "lines",
    name='Optimal Policy'
))

fig.add_trace(go.Scatter(
    x=H_values,
    y=greedy_policy,
    mode = "lines",
    name='Greedy Policy'
))

fig.update_layout(
    title = {
        "text":"Optimal Policy versus Optimal policy with a greedy procedure",
        "x": 0.5,
        "xanchor": "center"
    },
    xaxis_title='Ht',
    yaxis_title='Lt',
    showlegend=True
)
fig.show()