In [1]:
%load_ext Cython

# Model Solution - Cython
A cython code optimize the solution of the model

In [2]:
%%cython

# Cython code to optimise in C the solution of the model portion of the code

##################### Import Modules and math functions ######################

#Global Cython Compiler Directives:

#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
#cython: cdivision=True

#Modules
from libc.stdlib cimport  rand, RAND_MAX, calloc, free
from libc.math cimport exp, log, HUGE_VAL

#Define euler's constant
cdef double eg = 0.5772156649015328606065

############################# Define Globals #################################

cdef size_t tot_states, tot_decisions, iteration

########################### Define functions to use ##########################

#This function will generate random integers between 1 and 4 (inclusive) to 
#give initial generation their states

cdef inline double rand_val() nogil:
    #generate a random number between 0 and 1
    return rand()/<double>RAND_MAX

#The infimum norm for test of convergence, releasing the gil of the function

cdef double infnorm( double *arr ) nogil:
    cdef:
        double temp, norm=(-1)*HUGE_VAL
        Py_ssize_t j
        
        #manually allocate the memory of the array to avoid calls to the cpython api
        double* diff = <double*>calloc(tot_states, sizeof(double))
    
    with gil:
        if not diff: raise MemoryError()
        
    try:  
        #calculate the absolute value of the differences between the two vectors
        for j in range(tot_states):
            temp = arr[j + tot_states] - arr[j]
            #manual absolute value to prevent using abs function (a cpython api call)
            if temp<0:
                temp=(-1)*temp
            diff[j]=temp
        for j in range(tot_states):
            if diff[j]>norm: norm=diff[j]
        return norm
        
    finally:
        free(diff)

#Define the inner-array product, releasing the gil of the function

cdef double dot( double[:] a, double *b ) nogil:
    cdef:
        double result=0
        Py_ssize_t i, dim=a.shape[0]
    for i in range(dim):
        result += a[i]*b[i]
    return result

#The model:

cpdef void Modelo(double[:] param, double[:,:,:] wages, double[:,:,:] c_wages, 
                  double[:,:] mover, double[:,:,:] tranny, double[:,:] CCP, double[:] V) nogil: 
             
    #declare and assign globals
    global iteration, tot_states, tot_decisions

    iteration=<size_t>param[2]
    tot_states=tranny.shape[0]
    tot_decisions=V.shape[0]
    
    #declare variables and arrays
    cdef:
        #parameters
        double alpha=param[0]
        double tol=param[1]
        double v_temp=0
        double total=0
        double check=0
        
        #declare loop iterators:
        Py_ssize_t j, k, l
        
        #define array types
        double *V_model
        double *v
        double *sums
        
    V_model=<double*> calloc(tot_states*2, sizeof(double))
    v=<double*> calloc(tot_states*tot_decisions, sizeof(double))
    sums=<double*> calloc(tot_states, sizeof(double)) 

    with gil:
        #check memory was allocated:
        if not (V_model or v or sums): raise MemoryError()

    #run the model solution
    try:
        #initial guess
        for j in range(tot_states):
            V_model[j]=rand_val()
        
        #run the model loop
        for j in xrange(1,iteration):
    
            for k in xrange(tot_states):            # iterate over the states (the third dimension)

                total=0
            
                for l in xrange(tot_decisions):     # iterate over each decision to fill in the v matrix

                    v_temp=wages[k,0,l] + c_wages[k,0,l] + mover[0,l] + alpha*dot(tranny[k,l,:],V_model)
                   
                    v[l + k*tot_decisions]=v_temp

                    #sum the exponential of the choice specific value
                    total += exp(v_temp)    
                    
                #add the total unsigned shorto the sum array
                sums[k] = total
                
                #update the V_model array with the new values
                V_model[k + tot_states] = eg + log(total)
                
            #check for convergence
            check = infnorm(V_model)
            
            #convergence:
            if check < tol:
                #copy elements from iteration to the output arrays
                for k in xrange(tot_states):
                    V[k] = V_model[k + tot_states]
                    for l in range(tot_decisions):
                        CCP[l,k] = exp(v[l+k*tot_decisions])/sums[k]
                break
            #move the first column in V_model to the zeroeth column for next iteration
            else:
                for k in xrange(tot_states):
                    V_model[k]=V_model[k+tot_states]
                
    finally:
        free(V_model)
        free(v)
        free(sums)

# Simulation - Cython
Here, the function that simulates the model from the GEV-Type 3 distribution

Output of the model is a CCP array

The code will be reworked so that the relative outcomes will be the outputs instead of the CCP.

In [9]:
%%cython -lgsl -lgslcblas

# Cython code to optimise in C the simulation of the model portion of the code

##################### Import Modules and math functions ######################

#Global Cython Compiler Directives:

#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
#cython: cdivision=True

#from cython.parallel import prange
from libc.stdlib cimport rand, RAND_MAX, malloc, calloc, free, abort
from libc.math cimport exp, log, HUGE_VAL

#Use the CythonGSL package to get the random number gen at low-level
from cython_gsl cimport *

####################### Assign the global variables ##########################

#These will be passed into functions automatically without 
#having to call them up explicitely

cdef Py_ssize_t HH, Gen, tot_states, tot_decisions

##############################################################################
####### Define the functions that will assist the simulation module ##########
##############################################################################

############ Random Numbers, Random States, and Random Shocks functions

#Random number generator on interval [0,1]

cdef inline double rand_value() nogil:
    return rand()/<double>RAND_MAX

#This function will fill the allocated memory with the shocks from the GE-3 gumbel                        

cdef void FILL_shocks(gsl_rng* r, double* arr, Py_ssize_t iter) nogil:
    cdef Py_ssize_t i
    for i in xrange(iter):
        arr[i]=gsl_ran_gumbel1(r,1,1)


############# Choice Specific Values assisting functions

#Define the inner-array product, releasing the gil of the function

cdef double dot( double[:] a, double[:] b ) nogil:
    cdef:
        double result=0
        Py_ssize_t i, dim=a.shape[0]

    for i in range(dim):
        result += a[i]*b[i]
    return result

#This function will output the decision based on max value

cdef Py_ssize_t Compare(double* arr, size_t curr_hh) nogil:
    
    #declare variable types
    cdef:
        Py_ssize_t dec=0, i
        double v_temp, MAX=(-1)*HUGE_VAL

    #grab the max of the choice specific value for the current household:
    for i in range(1,tot_decisions+1):
        v_temp = arr[(i-1) + curr_hh*tot_decisions]
        if v_temp > MAX: 
            #update the max
            MAX = v_temp
            #capture current index
            dec = i

    return dec


############### Function and auxiliaries determining the next state

#This function rewrites array with the cumulative sum through recursion

cdef void cum_sum(double *arr, size_t index=4-1) nogil:
    if index<=0: 
        return
    cum_sum(arr, index-1)
    arr[index] += arr[index-1]

#This function will determine the index of the transition function 
#based on the cumulative probabilities 

cdef unsigned int find_interval(double x, double *arr) nogil:
    cdef Py_ssize_t i
    
    for i in range(tot_states):
        if x<arr[i]:
            return i

#This function will generate the next state based on the transition
#function probabilites (a discrete value)

cdef unsigned int Next_State(double[:] tran) nogil:
    cdef:
        double x
        double *array
        unsigned int index
        Py_ssize_t i
    
    array=<double*> calloc(tot_states, sizeof(double))
    
    if not array: abort()

    try:
        #generate a random number to help determine the next state
        x = rand_value()
        
        #copy the transition function values into the array to prevent rewrite
        for i in range(tot_states):
            array[i]=tran[i]
        
        #rewrite the array into the cumulative sum of the elements
        cum_sum(array)
        
        #the next state is the return value of the function
        #(the array index) + 1 to create the next state
        index = find_interval(x, array) + 1
    
        return index

    finally:
        free(array)  


################### Functions generating frequencies 

#This function will calculate the frequency of decisions or states for each generation

cdef void Frequencies(unsigned int* arr, double[:,:] freq, Py_ssize_t curr_gen, unsigned int flag) nogil:
    cdef:
        unsigned int choice
        unsigned int* counter
        Py_ssize_t* dec_or_state    #a pointer to either tot_states or tot_decisions
        Py_ssize_t i, j
    
    #use flag to designate to which global variable 'dec_or_state' points to
    if flag==1:
        dec_or_state=&tot_decisions
    else:
        dec_or_state=&tot_states

    #allocate and fill the counter array with 0s
    counter=<unsigned int*> calloc(dec_or_state[0], sizeof(unsigned int))

    if not counter: abort()

    try:
        #with parallel(num_threads=thread_count):
        for i in xrange(HH):#, schedule='dynamic'):
            choice = arr[i + curr_gen*HH]
            for j in range(1,dec_or_state[0]+1):
                if choice==j:
                    counter[j-1]+=1

        for i in range(tot_states):
            freq[i,curr_gen] = counter[i]/<double>HH
    finally:
        free(counter)

#This function will calculate the empirical CCPs or the relative outcomes 

cdef void CCP(unsigned int* dec_arr, unsigned int* state_arr, 
              double[:,:] freq) nogil:
    
    #declare variable types
    cdef:
        unsigned int choice, state
        unsigned int* counter
        size_t outcomes = freq.shape[1]
        double* sums
        double* CCP
        Py_ssize_t g, i, j, k
    
    #allocate the counter arrays with 0s
    counter=<unsigned int*> calloc(tot_states*tot_decisions*(Gen-1), sizeof(unsigned int))
    sums=<double*> calloc(tot_states*(Gen-1), sizeof(double))
    CCP=<double*> calloc(tot_states*tot_decisions*(Gen-1), sizeof(double))
   
    if not (counter or sums or CCP): abort()

    try:
        #relative outcomes from the CCP matrices
        for g in xrange(Gen-1):
            for i in xrange(HH):#, schedule='dynamic', num_threads=4):
                state = state_arr[i + g*HH]
                choice = dec_arr[i + g*HH]
                for j in xrange(1,tot_states+1):
                    if state==j: 
                        sums[j-1 + g*tot_states] +=1
                        for k in xrange(1,tot_decisions+1):
                            if choice==k:
                                counter[(k-1) + ((j-1) + g*tot_states)*tot_decisions] +=1
                                
            #fill in the CCP array with the frequency estimates
            for i in xrange(tot_states):
                for j in xrange(tot_decisions):
                    CCP[j + (i + g*tot_states)*tot_decisions] = counter[j + (i + g*tot_states)*tot_decisions]/sums[i + g*tot_states]
            #final loop writes the Gen x 8 array of relative outcomes
            for i in xrange(tot_states):
                freq[g,(2*i)] = CCP[3 + (i + g*tot_states)*tot_decisions]/CCP[(3-2) + (i + g*tot_states)*tot_decisions]
                freq[g,(2*i+1)] = CCP[2 + (i + g*tot_states)*tot_decisions]/CCP[(i + g*tot_states)*tot_decisions]
            
    finally:
        free(counter)
        free(sums)
        free(CCP)

        
############ Function defining the simulation of the model ################

cpdef void Sim_Model(double[::1] V, double alpha, double[:,:,:] wages, 
                     double[:,:,:] child, double[:,:] move, 
                     double[:,:,:] tranny, int[::1] demos, double[:,:] dec_freq, 
                     double[:,:] states_freq, double[:,:] CCPs, double[::1] init_states) nogil:
    
    #declare and assign the globals
    global HH, Gen, tot_states, tot_decisions
    
    HH=demos[0]
    tot_states=V.shape[0]            #Dimension of the states is given by the number of rows in the V array
    tot_decisions=tranny.shape[1]    #Dimension of the decisions is diven by the rows of one of the trans arrays
    Gen=demos[1]
    
    #declare the types for variables and arrays
    cdef:
        Py_ssize_t decision=0, state=0
        
        #define array types
        unsigned int* decisions
        unsigned int* states
        double* v_sim
        
        #define the shock array
        double* shocks
        gsl_rng* r
        
        #define iterators
        cdef Py_ssize_t i, j, k
    
    #allocate arrays
    decisions = <unsigned int*> calloc(HH*(Gen-1), sizeof(unsigned int))
    states = <unsigned int*> calloc(HH*Gen, sizeof(unsigned int))
    v_sim = <double*> calloc(HH*tot_states, sizeof(double))
    shocks = <double*> malloc(HH*tot_states*(Gen-1) * sizeof(double))
    r = gsl_rng_alloc (gsl_rng_mt19937) #use the MT19937 algorithm for prng
        
    #check that memory was allocated:
    if not (decisions or states or v_sim or shocks or r): abort()
    
    #Fill in the shocks with the shock function
    FILL_shocks(r,shocks,HH*tot_states*(Gen-1))
    
    #simulate the model
    try:
        
        #for initial generation, replace with random states generated from given distribution
        for j in xrange(HH):#, num_threads=8):
            states[j]=Next_State(init_states)

        #fill in the frequency of states of the first generation (gen 0)
        Frequencies(states, states_freq, 0, 0)

        #outerloop are the generations (make sure that we skip the last generation - they
        #make no decisions - so start iterator at 1 and not 0)
        for i in xrange(1,Gen):

            #inner loop the households (should be parallelizable)
            for j in xrange(HH):#, num_threads=8):

                #grab the household's state from the matrix
                state=states[j+(i-1)*HH]

                for k in xrange(tot_decisions):

                    #calculate choice specific value functions
                    v_sim[k+j*tot_decisions] = wages[state-1,0,k] + child[state-1,0,k] + move[0,k] + alpha*dot(tranny[state-1,k,:],V) + shocks[k+(j+(i-1)*HH)*tot_decisions]

                #compare values, return the decision (index+1)
                decision=Compare(v_sim,j)
                decisions[j+(i-1)*HH]=<unsigned int> decision

                #the next generations state (make sure we don't attempt to write (Gen+1):
                states[j+i*HH]=Next_State(tranny[state-1,decision-1,:])

            #calculate the frequencies of decisions taken and of the next states
            Frequencies(decisions, dec_freq, (i-1), 1)
            Frequencies(states, states_freq, i, 0)

        #fill the array of relative outcomes
        CCP(decisions, states, CCPs) #(Gen-2)

    finally:
        free(decisions)
        free(v_sim)
        free(states)
        free(shocks)
        gsl_rng_free(r)

# The Python Code to Run the Model
The input of the parameters, the transtion matrices, and the wage profiles

In [12]:
# The current version of the move model: the namespace with the model as an imported
# Cython optimized function module.

##################### Import Modules and math functions ######################

import numpy as np
import time

#Import the Cython modules that run the actual model and simulation
# from Solution import Modelo
# from Simulation import Sim_Model

######################### Define Parameters ##################################

#Parameters and parameter vector to pass into function
alpha=0.95      #altruism parameter
tol=1e-8        #tolerance of convergence 
iteration=1000  #upper bound of iterations
tot_states=4    #number of states
tot_decisions=4 #number of decisions

MoveCost=0     #normalized cost of moving to new region
EducCost_R1=0  #opportunity cost of education in region 1 (normalize to 0)
EducCost_R2=-1.5  #opportunity cost of education in region 2

param_lst=[alpha,tol,iteration]

parameters=np.array(param_lst, dtype='d')

####################### Wages of Children and Adults #########################

############## Adults
#Market Adult Wages (as an array, ndim=3)
#rows:      regions
#columns:   skill levels

wage_R1_ls=1
wage_R1_hs=2.091
wage_R2_ls=0.950
wage_R2_hs=1.956

#Strucutre a wage array for quick access 
wage_lst=[[wage_R1_ls]*2+[wage_R2_ls]*2,
          [wage_R2_ls]*2+[wage_R1_ls]*2,
          [wage_R1_hs]*2+[wage_R2_hs]*2,
          [wage_R2_hs]*2+[wage_R1_hs]*2]

adult_wages=np.array(wage_lst, dtype='d').reshape((tot_states,1,tot_decisions))

############## Children
#Child activity (as arrays, ndim=3)
children=input("Do children have wages (y/n): ")

if children=='y': c_wage_R1=0.536; c_wage_R2=0.358    
elif children=='n': c_wage_R1=0; c_wage_R2=0
else: pass    #change to quit() outside of JN

#Permute the wages to get quick access
child_lst=[[c_wage_R1,EducCost_R1,c_wage_R2,EducCost_R2],
           [c_wage_R2,EducCost_R2,c_wage_R1,EducCost_R1]]*2

child_wages=np.array(child_lst, dtype='d').reshape((tot_states,1,tot_decisions))

############## Moving Cost
#Mover vector (ndim=1)
move_lst=[[0]*2+[MoveCost]*2]

move=np.array(move_lst, dtype='d')

############### Transition Functions for Skill Formation ####################

#Transition function: Define the transition function 
#1) deterministic 
#2) End-of-6th-grade drop out
#3) End-of-9th-grade drop out

#Prompt for the model:
model=input('''
Model skill aquisition:
deterministic (1)
uncertain skill - 6th grade dropout (2)
uncertain skill - 9th grade dropout (3)
Please enter the corresponding model number (anything else to quit): ''')

try:
    model=int(model)
    if model<1 or model>3:
        print ("Error: input is out of indicated bounds. Exiting...\n"); quit()
except:
    quit()

if model==1:
    tran_st=[[1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1],
             [0,1,0,0],[0,0,0,1],[1,0,0,0],[0,0,1,0],
             [1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1],
             [0,1,0,0],[0,0,0,1],[1,0,0,0],[0,0,1,0]]

elif model==2:
    tran_st=[[1,0,0,0],[0.1416,0,0.8584,0],[0,1,0,0],[0,0.1545,0,0.8455],
             [0,1,0,0],[0,0.1518,0,0.8482],[1,0,0,0],[0.1171,0,0.8829,0],
             [1,0,0,0],[0.0871,0,0.9129,0],[0,1,0,0],[0,0.0205,0,0.9795],
             [0,1,0,0],[0,0.0396,0,0.9604],[1,0,0,0],[0.0262,0,0.9738,0]]

else:
    tran_st=[[1,0,0,0],[0.3799,0,0.6201,0],[0,1,0,0],[0,0.3496,0,0.6504],
             [0,1,0,0],[0,0.5125,0,0.4875],[1,0,0,0],[0.3964,0,0.6036,0],
             [1,0,0,0],[0.2635,0,0.7365,0],[0,1,0,0],[0,0.1365,0,0.8635],
             [0,1,0,0],[0,0.2078,0,0.7922],[1,0,0,0],[0.1429,0,0.8571,0]]

#place into an array and make into 3 dimensional array
tran_func=np.array(tran_st, dtype='d').reshape((tot_states,tot_decisions,tot_states))

########################## Solve the Model ###################################

directory =("/Users/idiosyncrasy58/Dropbox/Documents/College/Universitat Autonoma de Barcelona/"+
            "IDEA - Economics/Doctoral Thesis Ideas/Migration/IFLS/Project Files/"+
            "Simulation based on Thesis chapter 2/Simulation Python code/Model/")

#define the output arrays to send into function and fill in later
CCP=np.zeros((tot_decisions,tot_states), dtype='d')
V=np.zeros((tot_states), dtype='d')

#Call the model and time it
t1=time.time()
Modelo(parameters,adult_wages,child_wages,move,tran_func,CCP,V)
t2=time.time() - t1

print ("\nSolving the model took",t2,"seconds to complete \n")
print ("The model yields the following CCPs:")
print (CCP)

#save the values from the model
# np.savetxt(directory+"Output/CCP.txt",CCP)
# np.savetxt(directory+"Output/Cont_Values.txt",V)

######################### Simulate the model ################################

sim=input("Simulate the model (y/n): ")

if sim!='y': pass
else:
    #define the vector of integers that will enter into the model:
    #1) number of households
    num_HH=5000
    #2) number of generations
    gens=25
    #3) initial distribution
    init_states=np.array([0.25,0.25,0.25,0.25], dtype='d')

    #pack into an array to send into function
    people=np.array([num_HH,gens], dtype='i')

    #Declare and allocate the output matrices
    dec_freq_out=np.zeros((tot_decisions,(gens-1)), dtype='d')
    states_freq_out=np.zeros((tot_states,gens), dtype='d')
    Rel_Outcomes=np.zeros(((gens-1),tot_decisions*2), dtype='d')

    #call the model and time it
    t1=time.time()
    Sim_Model(V,alpha,adult_wages,child_wages,move,
              tran_func,people,dec_freq_out,states_freq_out,Rel_Outcomes,init_states)
    t2=time.time() - t1

    #save output matrices
#     np.savetxt(directory+"Output/Decision_Frequencies.txt",dec_freq_out)
#     np.savetxt(directory+"Output/States_Frequencies.txt", states_freq_out)
#     np.savetxt(directory+"Output/Relative_Outcomes.txt", Rel_Outcomes)
    
    #print the time it took to run the model and the the CCP (for now, this last array)
    print ("\nSimulating the model took",t2,"seconds to complete \n")
    print ("The simulation yields the following estimated Relative Outcomes:")
    print (Rel_Outcomes,"\n")

Do children have wages (y/n): y

Model skill aquisition:
deterministic (1)
uncertain skill - 6th grade dropout (2)
uncertain skill - 9th grade dropout (3)
Please enter the corresponding model number (anything else to quit): 3

Solving the model took 0.0006401538848876953 seconds to complete 

The model yields the following CCPs:
[[ 0.3148614   0.25092101  0.30231395  0.20714548]
 [ 0.3588095   0.06815815  0.39043628  0.07958384]
 [ 0.2459246   0.32125839  0.2168831   0.28874066]
 [ 0.08040451  0.35966245  0.09036667  0.42453001]]
Simulate the model (y/n): y

Simulating the model took 0.04466891288757324 seconds to complete 

The simulation yields the following estimated Relative Outcomes:
[[  0.21527778   0.78960396   6.           1.37883959   0.25799574
    0.64925373   5.32653061   1.36501901]
 [  0.18601748   0.80243161   5.80769231   1.36789298   0.25047438
    0.77225131   6.           1.44262295]
 [  0.22891566   0.77496484   5.23863636   1.33024691   0.21829522
    0.88764045   