In [1]:
import chaospy as cp
import numpy as np
import pandas as pd
import respy as rp
from os import getpid
import time
from joblib import Parallel, delayed

## Produce input parameters

Draw the sample of random input parameters.   

In [12]:
# load model specifications
_, base_options = rp.get_example_model("kw_94_one", with_data=False)
base_params = pd.read_pickle("./input/respy-se-collection/params_kw_94_one_se.pkl")
constraints = rp.get_parameter_constraints("kw_94_one")
# constraints.remove({"loc": "shocks_sdcorr", "type": "sdcorr"})
# mean and cov for sampling
mean = base_params["value"].to_numpy()[:27]
cov = pd.read_pickle("./input/respy-se-collection/covariance_kw_94_one.pkl").to_numpy()

# sample input parameters
np.random.seed(123)
distribution = cp.MvNormal(loc=mean, scale=cov)
input_params_20 = list(distribution.sample(20).T) # 20 draws for simplicity

In [36]:
def params_to_respy(input_params, *args):
    """transfer sampled paramters to respy format."""
    
    # baseline options and params for the indices.
    _, base_options = rp.get_example_model("kw_94_one", with_data=False)
    base_params = pd.read_pickle("./input/respy-se-collection/params_kw_94_one_se.pkl")

    params_idx = pd.Series(data=input_params, index=base_params.index[0:27])

    assert len(params_idx) == 27, "Length of KW94 vector must be 27."
    part_1 = params_idx

    rp_params, _ = rp.get_example_model("kw_94_one", with_data=False)
    part_2 = rp_params.iloc[27:31, 0]

    parts = [part_1, part_2]
    rp_params_series = pd.concat(parts)
    input_params_respy = pd.DataFrame(rp_params_series, columns=["value"])

    return input_params_respy

In [52]:
start = time.time()
input_params_respy = Parallel(n_jobs=-1)(delayed(params_to_respy)(i) for i in input_params_20)
end = time.time()
# print(params_idx_respy)
print(f'\nTime to complete: {end - start:.2f}s\n')


Time to complete: 4.36s



In [54]:
input_params_respy[19]

Unnamed: 0_level_0,Unnamed: 1_level_0,value
category,name,Unnamed: 2_level_1
delta,delta,0.950049
wage_a,constant,9.210243
wage_a,exp_edu,0.037991
wage_a,exp_a,0.033133
wage_a,exp_a_square,-0.000506
wage_a,exp_b,0.001611
wage_a,exp_b_square,-5e-05
wage_b,constant,8.47949
wage_b,exp_edu,0.069971
wage_b,exp_b,0.066694


## Simulation

In [55]:
# wrap simulation function
def simulation(input_params_respy):
    _, base_options = rp.get_example_model("kw_94_one", with_data=False)
    
    simulate = rp.get_simulate_func(input_params_respy, base_options)
    df = simulate(input_params_respy)
    
    print("I'm process", getpid())
    
    return df

### When I use the 1 CPU to compute, it works fine:

In [62]:
start = time.time()
df = Parallel(n_jobs=1)(delayed(simulation)(params) for params in input_params_respy)
end = time.time()
print(f'\nTime to complete: {end - start:.2f}s\n')

I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597
I'm process 69597

Time to complete: 118.82s



In [64]:
df[0].head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Experience_A,Experience_B,Experience_Edu,Lagged_Choice_1,Shock_Reward_A,Meas_Error_Wage_A,Shock_Reward_B,Meas_Error_Wage_B,Shock_Reward_Edu,Meas_Error_Wage_Edu,...,Nonpecuniary_Reward_Edu,Wage_Edu,Flow_Utility_Edu,Value_Function_Edu,Continuation_Value_Edu,Nonpecuniary_Reward_Home,Wage_Home,Flow_Utility_Home,Value_Function_Home,Continuation_Value_Home
Identifier,Period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,0,0,0,10,edu,-0.035035,1,0.040965,1,-0.713137,1,...,-155.639237,,-1349.399531,357791.571408,377917.179533,17046.475078,,19605.89061,357800.305865,355875.519334
0,1,1,0,10,a,-0.35456,1,1.185316,1,0.066755,1,...,-4568.75585,,-4502.698032,355839.85891,379181.585519,17046.475078,,15414.507232,355840.833721,358224.116868
0,2,1,1,10,b,-1.063705,1,1.245234,1,-0.130574,1,...,-4568.75585,,-4703.530244,361630.964538,385486.787185,17046.475078,,16213.699271,358351.572179,360025.144453
0,3,1,2,10,b,-0.692603,1,-1.238533,1,0.768294,1,...,-4568.75585,,-3027.752613,369030.619122,391509.913503,17046.475078,,19289.475898,366325.366448,365179.234802
0,4,1,2,11,edu,-0.943424,1,-0.581919,1,0.432645,1,...,-155.639237,,807.175186,391357.488019,410968.629688,17046.475078,,15969.849187,379949.207652,383008.522252
0,5,1,2,12,edu,0.768313,1,-1.989945,1,-0.002513,1,...,-106.958418,,-67.231687,409787.219879,431282.005921,17046.475078,,16452.396476,398384.841667,401900.212281
0,6,1,2,13,edu,0.087829,1,-0.598471,1,-0.140483,1,...,-106.958418,,-301.978637,429702.05974,452485.031007,17046.475078,,18941.836844,419575.05907,421578.68253
0,7,1,2,14,edu,0.301492,1,1.268182,1,0.982899,1,...,-106.958418,,1362.458191,452243.155086,474453.139755,17046.475078,,18016.922485,438075.356822,442019.461965
0,8,1,3,14,b,-0.948111,1,1.996244,1,-0.370696,1,...,-4520.075031,,-5152.111987,452016.600626,481069.898513,17046.475078,,18064.877793,444028.979831,448233.883212
0,9,1,4,14,b,0.796574,1,-0.375418,1,0.332443,1,...,-4520.075031,,-4078.977756,458648.853943,486919.653277,17046.475078,,17238.66955,448335.625408,453635.087184


### However, when I specify more than one CPU cores to perform parallel computing, it doesn't work:

In [69]:
# the number of CPUs on the local machine is 8
import joblib
num_cpu = joblib.cpu_count()

8

In [74]:
# with all processors
start = time.time()
simulate = Parallel(n_jobs=num_cpu)(delayed(get_simulate_func)(params) for params in input_params_respy)
end = time.time()
print(f'\nTime to complete: {end - start:.2f}s\n')

TypeError: cannot pickle '_nrt_python._MemInfo' object

In [73]:
# with 2 processors
start = time.time()
simulate = Parallel(n_jobs=2)(delayed(get_simulate_func)(params) for params in input_params_respy)
end = time.time()
print(f'\nTime to complete: {end - start:.2f}s\n')

FileNotFoundError: [Errno 2] No such file or directory: '8_1111.parquet'