In [1]:
import collections

import numpy as np
import pandas as pd

### Preliminary

In [2]:
MISSING_INT = -99
INVALID_FLOAT = -99.00
HOURS = [8, 12, 16]
T = 24

In [3]:
DATA_LABLES_SIM = [
    "Period", 
    "Experience", 
    "Choice_Lagged",
    "Choice",
]

In [4]:
model_spec = dict()
model_spec["num_periods"] = 3
model_spec["num_choices"] = 3
model_spec["experience_factor_0"] = 0.25
model_spec["experience_factor_1"] = 0.15
model_spec["delta"] = 1

model_spec = collections.namedtuple("model_specification", model_spec.keys())(
        **model_spec
    )

In [5]:
model_params = dict()
model_params["alpha"] = 0.5
model_params["beta"] = 0.5
model_params = collections.namedtuple("model_specification", model_params.keys())(
        **model_params
    )

In [6]:
model_spec

model_specification(num_periods=3, num_choices=3, experience_factor_0=0.25, experience_factor_1=0.15, delta=1)

In [7]:
model_params

model_specification(alpha=0.5, beta=0.5)

### Create states and indexer

In [8]:
def create_state_space(model_spec):  
    """This function creates the state space matrix
    and assignes an index value to every state"""
    
    data = []
    
    shape = (
        model_spec.num_periods,
        model_spec.num_periods*2,
        model_spec.num_choices,
    )
    
    indexer = np.full(shape, MISSING_INT)
    
    # Initialize index counter
    i = 0
    
    # Loop over all periods
    for period in range(model_spec.num_periods):
        
        # Define entry state in period zero:
        if period == 0:

            indexer[period, 0, 0] = i
            row = [period, 0, 0]

            # Update counter
            i += 1

            # Add to state space matrix
            data.append(row)
        
        else:
        
            # Loop over all admissible values of experience
            for exp in range((period)*2 + 1):

                # Loop over all choices:
                for choice_lagged in range(model_spec.num_choices):
                    
                    # Exclude impossible states:
                    if (exp < choice_lagged):
                        continue
                        
                    if (choice_lagged == 0) and (exp > 2*(period - 1)):
                        continue
                    
                    if (choice_lagged == 1) and (exp > 2*(period - 1) + 1):
                        continue
                    
                    if (choice_lagged != 0) and (exp == 0):
                        continue

                    # Check for duplicate states:
                    if (
                        indexer[period, exp, choice_lagged] != MISSING_INT
                    ):
                        continue

                    # Record index of currently reached admissible state
                    # space point
                    indexer[
                        period, exp, choice_lagged
                    ] = i

                    # Update count
                    i += 1

                    # Add to state space matrix
                    row = [period, exp, choice_lagged]
                    data.append(row)
    
        states = np.array(data)
    
    return states, indexer
    
    

In [9]:
states, indexer = create_state_space(model_spec)

In [10]:
states

array([[0, 0, 0],
       [1, 0, 0],
       [1, 1, 1],
       [1, 2, 2],
       [2, 0, 0],
       [2, 1, 0],
       [2, 1, 1],
       [2, 2, 0],
       [2, 2, 1],
       [2, 2, 2],
       [2, 3, 1],
       [2, 3, 2],
       [2, 4, 2]])

In [11]:
indexer[2,0,0]

4

In [12]:
states[4]

array([2, 0, 0])

### State space derived covariates

In [13]:
produce = np.full(states.shape[0], MISSING_INT)
produce = np.where(states[:, 1] == 0, 1, produce)
produce = np.where((states[:, 1] != 0) & (states[:, 0] == 1), (1 + states[:, 1]*model_spec.experience_factor_0), produce)
produce = np.where((states[:, 1] != 0) & (states[:, 0] == 2), (1 + (states[:, 1] - states[:, 2])*model_spec.experience_factor_0 + states[:, 2]*model_spec.experience_factor_1), produce)

### Backward induction

In [14]:
def get_continuation_values(states_subset, indexer, emaxs):
    """Obtain continuation values for each of the choices at each state
    of the period currently reached by the parent loop."""
    
    for i in range(states_subset.shape[0]):
    
        # Unpack parent state and get index
        (
            period,
            exp,
            choice_lagged,
        ) = states_subset[i]

        k_parent = indexer[period, exp, choice_lagged]

        # Choice 8 hours
        k_0 = indexer[period + 1, exp, 0]

        # Choice 12 hours
        k_1 = indexer[period + 1, exp + 1, 1]

        # Choice 16 hours
        k_2 = indexer[period + 1, exp + 2, 2]

        # Get emax
        emaxs[k_parent, 0] = emaxs[k_0, 3]
        emaxs[k_parent, 1] = emaxs[k_1, 3]
        emaxs[k_parent, 2] = emaxs[k_2, 3]
    
    return emaxs
    

In [15]:
def construct_emax(model_spec, model_params, produce_period, flow_utilities_period, value_functions_period, emaxs):
    """Calculates the maximum value function as the flow ulity plus continuatin value
    of the most profitable choice"""
    emax_period = np.full(emaxs.shape[0], INVALID_FLOAT) 
    
    for i in range(emaxs.shape[0]):
        current_max_value_function = INVALID_FLOAT

        for j in range(model_spec.num_choices):

            #flow_utility = ((produce[i]*HOURS[j])**model_params.alpha)*((T - HOURS[j])**model_params.beta)
            flow_utility = ((produce[i])**model_params.alpha)*((T - HOURS[j])**model_params.beta)
            flow_utilities_period[i, j] = flow_utility

            value_function_choice = flow_utility + model_spec.delta * emaxs[i, j]
            value_functions_period[i,j] = value_function_choice

            if value_function_choice > current_max_value_function:
                current_max_value_function = value_function_choice
            
            emax_period[i] = current_max_value_function

    return flow_utilities_period, value_functions_period, emax_period

In [16]:
def backward_induction(model_spec, model_params, states, indexer, produce):
    """This function performs the backward induction and returns
    the model solution"""
    
    emaxs = np.zeros((states.shape[0], model_spec.num_choices + 1))
    flow_utilities = np.zeros((states.shape[0], model_spec.num_choices))
    value_functions = np.zeros((states.shape[0], model_spec.num_choices))
    
    
    # Loop backwards over all periods:
    for period in reversed(range(model_spec.num_periods)):
        
        # Extract period information
        # States
        states_period = states[np.where(states[:, 0] == period)]
        # Produce
        produce_period = produce[states[:, 0] == period]
        
        # Continuation value calculation not performed for last period
        # since continuation values are known to be zero
        if period == model_spec.num_periods - 1:
            pass
        
        else:
            
            # Fill first block of elements in emaxs for the current period
            # corresponding to the continuation values
            emaxs = get_continuation_values(
                states_period,
                indexer,
                emaxs,
            )
            
        # Extract current period information for current loop calculation
        emaxs_period = emaxs[np.where(states[:, 0] == period)]
        flow_utilities_period = flow_utilities[np.where(states[:, 0] == period)]
        value_functions_period = flow_utilities[np.where(states[:, 0] == period)]
            
        # Calculate emax for current period reached by the loop
        flow_utilities_period, value_functions_period, emax_period = construct_emax(
            model_spec,
            model_params,
            produce_period,
            flow_utilities_period,
            value_functions_period,
            emaxs_period[:, :3],
        )
        
        emaxs_period[:, 3] = emax_period
        emaxs[np.where(states[:, 0] == period)] = emaxs_period
        flow_utilities[np.where(states[:, 0] == period)] = flow_utilities_period
        value_functions[np.where(states[:, 0] == period)] = value_functions_period
        
    return flow_utilities, value_functions, emaxs

In [17]:
flow_utilities, value_functions, emaxs = backward_induction(model_spec, model_params, states, indexer, produce)

In [18]:
flow_utilities

array([[4.        , 3.46410162, 2.82842712],
       [4.        , 3.46410162, 2.82842712],
       [4.        , 3.46410162, 2.82842712],
       [4.47213595, 3.87298335, 3.16227766],
       [4.        , 3.46410162, 2.82842712],
       [4.        , 3.46410162, 2.82842712],
       [4.47213595, 3.87298335, 3.16227766],
       [4.89897949, 4.24264069, 3.46410162],
       [4.        , 3.46410162, 2.82842712],
       [4.47213595, 3.87298335, 3.16227766],
       [4.28952212, 3.71483512, 3.03315018],
       [4.89897949, 4.24264069, 3.46410162],
       [4.73286383, 4.09878031, 3.34664011]])

In [19]:
value_functions

array([[12.        , 11.46410162, 12.19954257],
       [ 8.        ,  7.93623757,  7.30056308],
       [ 8.        ,  7.46410162,  7.72740661],
       [ 9.37111544,  8.16250546,  7.89514149],
       [ 4.        ,  3.46410162,  2.82842712],
       [ 4.        ,  3.46410162,  2.82842712],
       [ 4.47213595,  3.87298335,  3.16227766],
       [ 4.89897949,  4.24264069,  3.46410162],
       [ 4.        ,  3.46410162,  2.82842712],
       [ 4.47213595,  3.87298335,  3.16227766],
       [ 4.28952212,  3.71483512,  3.03315018],
       [ 4.89897949,  4.24264069,  3.46410162],
       [ 4.73286383,  4.09878031,  3.34664011]])

In [20]:
emaxs

array([[ 8.        ,  8.        ,  9.37111544, 12.19954257],
       [ 4.        ,  4.47213595,  4.47213595,  8.        ],
       [ 4.        ,  4.        ,  4.89897949,  8.        ],
       [ 4.89897949,  4.28952212,  4.73286383,  9.37111544],
       [ 0.        ,  0.        ,  0.        ,  4.        ],
       [ 0.        ,  0.        ,  0.        ,  4.        ],
       [ 0.        ,  0.        ,  0.        ,  4.47213595],
       [ 0.        ,  0.        ,  0.        ,  4.89897949],
       [ 0.        ,  0.        ,  0.        ,  4.        ],
       [ 0.        ,  0.        ,  0.        ,  4.47213595],
       [ 0.        ,  0.        ,  0.        ,  4.28952212],
       [ 0.        ,  0.        ,  0.        ,  4.89897949],
       [ 0.        ,  0.        ,  0.        ,  4.73286383]])

In [21]:
states

array([[0, 0, 0],
       [1, 0, 0],
       [1, 1, 1],
       [1, 2, 2],
       [2, 0, 0],
       [2, 1, 0],
       [2, 1, 1],
       [2, 2, 0],
       [2, 2, 1],
       [2, 2, 2],
       [2, 3, 1],
       [2, 3, 2],
       [2, 4, 2]])

### Simulation

In [19]:
def simulate(model_spec, model_params, produce, emaxs):
    
    data = []
    hours = np.asarray(HOURS)
    
    for period in range(model_spec.num_periods):
        
        if period == 0:    
            current_states = states[states[:, 0] == period]
        
        else:
            current_states = current_states

        idx = indexer[
            current_states[:, 0],
            current_states[:, 1],
            current_states[:, 2]
        ]
        
        #flow_utilities = ((produce[idx]*hours)**model_params.alpha)*((T - hours)**model_params.beta)
        flow_utilities = ((produce[idx])**model_params.alpha)*((T - hours)**model_params.beta)

        value_functions = flow_utilities + model_spec.delta * emaxs[idx, :3]
        
        # Determine choice as option with highest choice specific value function
        choice = np.argmax(value_functions, axis=1)
        
        # Record period experiences
        rows = np.column_stack(
            (
                current_states.copy(),
                choice,
                #flow_utilities,
                #continuation_values,
                #value_functions,
            )
        )

        data.append(rows)
        
        # Update current states according to choice
        current_states[:, 0] += 1
        current_states[:, 2] = choice
        current_states[:, 1] = np.where(
            choice == 1, current_states[:, 1] + 1, current_states[:, 1]
        )
        current_states[:, 1] = np.where(
            choice == 2, current_states[:, 1] + 2, current_states[:, 1]
        )

        dataset = pd.DataFrame(np.vstack(data), columns=DATA_LABLES_SIM)
    
    return dataset

In [20]:
df = simulate(model_spec, model_params, produce, emaxs)

In [21]:
df

Unnamed: 0,Period,Experience,Choice_Lagged,Choice
0,0,0,0,2
1,1,2,2,0
2,2,2,0,0
