The agent:
 - begins in some input states $\vec{X} \in \vec{X}$
 - experiences some exogeneous shocks $\vec{k} \in \vec{K}$
 - can choose some actions $\vec{a} \in \vec{A}$
 - subject to conditions $\Gamma: \vec{X} \times \vec{K} \times \vec{A} \rightarrow \mathbb{B}$
 - experience a reward $F: \vec{X} \times \vec{K} \times \vec{A} \rightarrow \mathbb{R}$
 - together, these determine some output states $\vec{y} \in \vec{Y}$ via...
 - a **deterministic** transition function $T: \vec{X} \times \vec{K} \times \vec{A} \rightarrow \vec{Y}$
   - _This is deterministic because shocks have been isolated to the beginning of the stage._
   - CDC thinks there needs to be an additional between-stage transition function.
 - The agent has a discount factor $\beta$ for future utility.
   - CDC: These can be stochastic!

In [128]:
from dataclasses import dataclass, field
from collections.abc import Callable, Mapping, Sequence

import itertools

from scipy.optimize import minimize

In [336]:
@dataclass
class Stage:
    """A single Bellman problem stage."""
    transition: Callable[[Mapping, Mapping, Mapping], Mapping] # TODO: type signature # TODO: Defaults to identity function
    
    inputs: Sequence[str] = field(default_factory=list)
    shocks: Sequence[str] = field(default_factory=list) # maybe becomes a dictionary, with shocks from a distribution?
    actions: Sequence[str] = field(default_factory=list)
    outputs: Sequence[str] = field(default_factory=list)

    discount: float = 1.0 # might become more complicated, like a distribution
    
    reward: Callable[[Mapping, Mapping, Mapping], ...] = lambda x, k, a : 0 # TODO: type signature # TODO: Defaults to no reward
    
    # Condition must be continuously valued, with a negative value if it fails
    condition: Callable[[Mapping, Mapping, Mapping], float] = lambda x, k, a : 0
        
    def T(self, x : Mapping, k : Mapping, a : Mapping) -> Mapping:
        #assert self.condition(x, k, a)
        
        return self.transition(x, k, a)
    
    def q(self, x : Mapping[str, ...] , k : Mapping[str, ...], a : Mapping[str, ...], v_y : Callable[[Mapping, Mapping, Mapping], float]) -> Mapping:
        """Value of state, shock, action state given a end-of-stage value function v_y"""
        return self.reward(x, k, a) + self.discount * v_y(self.T(x, k, a))
    
    def optimal_policy(self,
                       x_grid : Mapping[str, Sequence] = {},
                       k_grid : Mapping[str, Sequence] = {},
                       v_y : Callable[[Mapping, Mapping, Mapping], float] = lambda x : 0):
        """
        Given a grid over input and shock state values, compute the optimal action.
        Optimizes over values of Q.
        """
                       
        pi_data = xr.DataArray(
            np.zeros([len(v) for v in x_grid.values()] + [len(v) for v in k_grid.values()]),
            dims= {**x_grid, **k_grid}.keys(),
            coords={**x_grid, **k_grid}
        )
        
        def q_for_minimizer(action_values, x : Mapping[str, ...] , k : Mapping[str, ...], v_y):
            """Flips negative for the _minimizer_"""
            return -self.q(
                x = x,
                k = k,
                a = {an : av for an,av in zip(self.actions, action_values)},
                v_y = v_y
            )
            
        
        for x_point in itertools.product(*x_grid.values()):
            x_vals = {k : v for k, v in zip(x_grid.keys() , x_point)}
            
            for k_point in itertools.product(*k_grid.values()):
                k_vals = {k : v for k, v in zip(k_grid.keys() , k_point)}
                
                def condition_for_optimizer(action_values):
                    """
                    Expects a non-negative number if passing.
                    Will take the minimum of any action condition value tested.
                    """
                    return np.array(self.condition(
                        x = x_vals,
                        k = k_vals,
                        a = {an : av for an,av in zip(self.actions, action_values)}
                    ))
                    
                    #return min(conds.values())
                
                pi_star_res = minimize(
                    q_for_minimizer,
                    np.zeros(len(self.actions)), # better default than 0?
                    args = (x_vals, k_vals, v_y),
                    constraints = {
                        'type' : 'ineq', # test must equal 0, which we'll manage,
                        'fun' : condition_for_optimizer
                        # Jacobian is an option for speed?
                    },
                    method="cobyla",
                    options = {
                        #'disp' : True, # for debugging
                        'maxiter' : 200000
                    }
                )
                
                if pi_star_res.success:
                    pi_data.sel(**x_vals, **k_vals).variable.data.put(0, pi_star_res.x)
                else:
                    pi_data.sel(**x_vals, **k_vals).variable.data.put(0, np.nan)
                
                    print(pi_star_res)
                    
        # TODO: Store these values on a grid, so it does not need to be recomputed
        #       when taking expectations
                
        return pi_data


In [337]:
int(not(False))

1

Example: The consumption stage:

* $c \in A_0 = \mathbb{R}$
* $m \in X_0 = \mathbb{R}$
* $a \in Y_0 = \mathbb{R}$
* $\Gamma_0$ ... restricts consumption $c \leq m$
* $F_0(m,c) = CRRA(c, \rho)$
* $T_0(m,c) = m - c$ 
* $\beta_0 = \beta $

Requires a parameter $\rho$

In [338]:
from HARK.utilities import CRRAutility

CRRA = 5

consumption_stage = Stage(
    transition = lambda x, k, a : {'a' : x['m'] - a['c']}, 
    reward = lambda x, k, a : CRRAutility(a['c'], CRRA), 
    inputs = ['m'], 
    actions = ['c'],
    outputs = ['a'],
    condition = lambda x, k, a: x['m'] - a['c'], # has to be nonnegative to clear
    discount = .96
)

In [339]:
def consumption_v_y(y : Mapping[str,...]):
    return CRRAutility(y['a'], CRRA)

consumption_stage.optimal_policy({'m' : [9, 11, 20, 300, 4000, 5500]}, v_y = consumption_v_y)

In [326]:
consumption_stage.T({'m' : 100}, {}, {'c' : 50})

{'a': 50}

In [288]:
consumption_stage.T({'m' : 100}, {}, {'c' : 101})

{'a': -1}

In [289]:
consumption_stage.reward({'m' : 100}, {}, {'c' : 50})

-4e-08

In [290]:

consumption_stage.q({'m' : 100}, {}, {'c' : 50}, v_y = consumption_v_y)

-7.84e-08

The allocation stage. Note that this is a trivial transition function.:

* $\alpha \in A_1 = \mathbb{R}$
* $a \in X_1 = \mathbb{R}$
* $(a, \alpha) \in Y_1 = \mathbb{R}^2$
* $\Gamma_1$ ... restricts allocation $0 \leq \alpha \leq 1$
* $F_1(a,\alpha) = 0$
* $T_1(a,\alpha) = (a, \alpha)$
* $\beta_1 = 1 $

In [146]:
allocation_stage = Stage(
    transition = lambda x, k, a : {'a' : x['a'], 'alpha' : a['alpha']}, 
    inputs = ['a'], 
    actions = ['alpha'],
    outputs = ['a', 'alpha'],
    condition = lambda x, k, a: 0 <= a['alpha'] and a['alpha']<= 1
)

In [101]:
allocation_stage.T({'a': 100}, {}, {'alpha' : 0.5})

{'a': 100, 'alpha': 0.5}

In [102]:
allocation_stage.T({'a': 100}, {}, {'alpha' : -0.1})

AssertionError: 

In [103]:
allocation_stage.reward({'a': 100}, {}, {'alpha' : 0.5})

0

The growth stage stage:

* $A_2 = \emptyset$
* $(a, \alpha) \in X_2 = \mathbb{R}^2$
* $m \in Y_0 = \mathbb{R}$
* Shocks:
    * $\psi \sim \text{Lognormal}(0,\sigma_\psi)$
    * $\theta \sim \text{Lognormal}(0,\sigma_\theta)$
    * $\eta \sim \text{Lognormal}(0,\sigma_\eta)$
* $F_2(a,\alpha) = 0$
* $T_2(a,\alpha) =  \frac{(\alpha \eta + (1 - \alpha) R) a + \theta}{\psi G} $ 

Requires parameters $R$ and $G$

In [104]:
R = 1.01
G = 1.02

def growth_transition(x, k, a): 
    return {'m' : ((x['alpha'] * k['eta'] + (1 - x['alpha']) * R) 
                   * x['a'] + k['theta']) 
            / (k['psi'] * G)}

growth_stage = Stage(
    transition = growth_transition,
    inputs = ['a', 'alpha'],
    shocks = ['psi', 'theta', 'eta'], # need to map to distributions
    outputs = ['m'],
)

In [105]:
growth_stage.T(
    {'a': 100, 'alpha' : 0.5},
    {'psi' : 1.00, 'theta' : 1.10, 'eta' : 1.05},
    {}
)

{'m': 102.05882352941175}

## Solving one stage

For any stage, consider two value functions.
 - $v_x$ is the value of its input states
 - $v_y$ is the value of its output states. Others migth write this $\mathfrak{v}$
 
The stage is solved with respect to a value function $v_y : \vec{Y} \rightarrow \mathbb{R}$ over the output states. The $q: \vec{X} \times \vec{K} \times \vec{A} \rightarrow \mathbb{R}$ is the value of a state, shock, action combination.

$$q(\vec{x}, \vec{k}, \vec{a}) = F(\vec{x}, \vec{k}, \vec{a}) + \beta v_y(T(\vec{x}, \vec{k}, \vec{a}))$$

where $\beta$ is the agent's discount factor for that stage. Note that there is no expecation taking in this operation because $T$ is deterministic.

The optimal policy $\pi: \vec{X} \times \vec{K} \rightarrow \vec{A}$ is:

$$\pi^*(\vec{x}, \vec{k}) = \mathrm{argmax}_{\vec{a} \in \vec{A}} q(\vec{x}, \vec{k}, \vec{a})$$

(This is solved by griding over $x$ and $k$ ...)

The optimal policy $\pi^*$ can then be used to derive the value function over the input states $V_x: \vec{X} \rightarrow \mathbb{R}$.

$$v_x(\vec{x}) = \mathbb{E}_{\vec{k} \in \vec{K}}[q(\vec{x}, \vec{k}, \pi^*(\vec{x}, \vec{k}))]$$

Note that this requires no optimization, but does require the taking of expectations over the probability distribution over the shocks.

## Playing with xarrays...

In [4]:
import numpy as np
import xarray as xr

In [5]:
data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]})

data

In [8]:
data.values



array([[-1.77282874, -1.18150493, -0.93007295],
       [ 1.73925697, -0.52557243, -0.59693632]])

In [9]:
data.dims


('x', 'y')

In [10]:

data.coords



Coordinates:
  * x        (x) int64 10 20

In [11]:
data.attrs

{}

In [17]:
data.sel(x =10)

In [60]:
x_grid = {'a' : [0, 1, 2, 3, 4], 'b' : [10, 20, 30, 40]}
k_grid = {'psi' : [0, 0.5, 1.0], 'theta' : [0.25, .75, 1.0, 1.25, 1.5]}


q_data = xr.DataArray(
            np.zeros([len(v) for v in x.values()] + [len(v) for v in k.values()]),
            dims= {**x_grid, **k_grid}.keys(),
            coords={**x_grid, **k_grid})

In [61]:
np.zeros([len(v) for v in x.values()] + [len(v) for v in k.values()]).shape

(5, 4, 3, 5)

In [62]:
def print_and_add1(x):
    print(x)
    return x + 1

In [71]:
for em in q_data:
    for ef in em:
        print(ef)

<xarray.DataArray (psi: 3, theta: 5)>
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
Coordinates:
    a        int64 0
    b        int64 10
  * psi      (psi) float64 0.0 0.5 1.0
  * theta    (theta) float64 0.25 0.75 1.0 1.25 1.5
<xarray.DataArray (psi: 3, theta: 5)>
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
Coordinates:
    a        int64 0
    b        int64 20
  * psi      (psi) float64 0.0 0.5 1.0
  * theta    (theta) float64 0.25 0.75 1.0 1.25 1.5
<xarray.DataArray (psi: 3, theta: 5)>
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
Coordinates:
    a        int64 0
    b        int64 30
  * psi      (psi) float64 0.0 0.5 1.0
  * theta    (theta) float64 0.25 0.75 1.0 1.25 1.5
<xarray.DataArray (psi: 3, theta: 5)>
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
Coordinates:
    a        int64 0
    b        int64 40

In [119]:



coord_product_iter = itertools.product(*[[y.data.tolist() for y in cv.variable] for cv in q_data.coords.values()])

[{k : v for k,v in zip(q_data.coords.keys(), co_co)} for co_co in coord_product_iter]


[{'a': 0, 'b': 10, 'psi': 0.0, 'theta': 0.25},
 {'a': 0, 'b': 10, 'psi': 0.0, 'theta': 0.75},
 {'a': 0, 'b': 10, 'psi': 0.0, 'theta': 1.0},
 {'a': 0, 'b': 10, 'psi': 0.0, 'theta': 1.25},
 {'a': 0, 'b': 10, 'psi': 0.0, 'theta': 1.5},
 {'a': 0, 'b': 10, 'psi': 0.5, 'theta': 0.25},
 {'a': 0, 'b': 10, 'psi': 0.5, 'theta': 0.75},
 {'a': 0, 'b': 10, 'psi': 0.5, 'theta': 1.0},
 {'a': 0, 'b': 10, 'psi': 0.5, 'theta': 1.25},
 {'a': 0, 'b': 10, 'psi': 0.5, 'theta': 1.5},
 {'a': 0, 'b': 10, 'psi': 1.0, 'theta': 0.25},
 {'a': 0, 'b': 10, 'psi': 1.0, 'theta': 0.75},
 {'a': 0, 'b': 10, 'psi': 1.0, 'theta': 1.0},
 {'a': 0, 'b': 10, 'psi': 1.0, 'theta': 1.25},
 {'a': 0, 'b': 10, 'psi': 1.0, 'theta': 1.5},
 {'a': 0, 'b': 20, 'psi': 0.0, 'theta': 0.25},
 {'a': 0, 'b': 20, 'psi': 0.0, 'theta': 0.75},
 {'a': 0, 'b': 20, 'psi': 0.0, 'theta': 1.0},
 {'a': 0, 'b': 20, 'psi': 0.0, 'theta': 1.25},
 {'a': 0, 'b': 20, 'psi': 0.0, 'theta': 1.5},
 {'a': 0, 'b': 20, 'psi': 0.5, 'theta': 0.25},
 {'a': 0, 'b': 20, 'p

In [85]:
list(q_data.coords.values())

[<xarray.DataArray 'a' (a: 5)>
 array([0, 1, 2, 3, 4])
 Coordinates:
   * a        (a) int64 0 1 2 3 4,
 <xarray.DataArray 'b' (b: 4)>
 array([10, 20, 30, 40])
 Coordinates:
   * b        (b) int64 10 20 30 40,
 <xarray.DataArray 'psi' (psi: 3)>
 array([0. , 0.5, 1. ])
 Coordinates:
   * psi      (psi) float64 0.0 0.5 1.0,
 <xarray.DataArray 'theta' (theta: 5)>
 array([0.25, 0.75, 1.  , 1.25, 1.5 ])
 Coordinates:
   * theta    (theta) float64 0.25 0.75 1.0 1.25 1.5]

In [121]:
[[y.data.tolist() for y in cv.variable] for cv in q_data.coords.values()]

[[0, 1, 2, 3, 4],
 [10, 20, 30, 40],
 [0.0, 0.5, 1.0],
 [0.25, 0.75, 1.0, 1.25, 1.5]]