In [15]:
import pandas as pd

from ema_workbench import load_results, save_results

In [16]:
path = "../results/"
subpath = "combined_results/"
file_start = "2000_scen__5_reps__"
file_end   = "_startseed___0612"
file_ext = ".tar.gz"
name_diffs = [str(i*10) for i in range(8)]

filenames = [file_start + n + file_end + file_ext for n in name_diffs]


In [17]:
all_results = [load_results(path+subpath+fn) for fn in filenames]

In [18]:
# all_results[x][0] is an experiments dataframe
all_results[0][0].head(3)

Unnamed: 0,debt_sales_ratio,wage_sensitivity_prod,init_markup,capital_firm_cap_out_ratio,min_unempl_emigration,migration_unempl_bounds_diff,deu_discount_factor,flood_timing,scenario,policy,model,seed
0,3.974297,0.712775,0.099379,0.297524,0.040977,0.136866,0.95654,33,0,,CRAB,0
1,4.388648,0.624767,0.069494,0.246466,0.031566,0.160825,0.857878,76,1,,CRAB,0
2,3.970944,0.929448,0.489306,0.339357,0.051214,0.16617,0.954534,34,2,,CRAB,0


In [19]:
lens = [len(exp) for exp,out in all_results]
total_len = sum(lens)

experiments_list = [exp for exp,out in all_results]
experiments = pd.concat(experiments_list)

print(f"Have experiments dataframe with {len(experiments)} rows")
print(f"Expected {total_len} rows")


Have experiments dataframe with 80000 rows
Expected 80000 rows


In [20]:
experiments.value_counts(['seed']).sort_index()

seed
0       2000
1       2000
2       2000
3       2000
4       2000
10      2000
11      2000
12      2000
13      2000
14      2000
20      2000
21      2000
22      2000
23      2000
24      2000
30      2000
31      2000
32      2000
33      2000
34      2000
40      2000
41      2000
42      2000
43      2000
44      2000
50      2000
51      2000
52      2000
53      2000
54      2000
60      2000
61      2000
62      2000
63      2000
64      2000
70      2000
71      2000
72      2000
73      2000
74      2000
Name: count, dtype: int64

In [21]:
# all_results[x][0] is an outcomes dictionary
all_results[0][1]['Household Population']

array([[10000, 10000, 10000, ...,  8488,  8488,  8488],
       [10000, 10000, 10000, ...,  7955,  7875,  7554],
       [10000, 10000, 10000, ...,  6265,  6265,  6265],
       ...,
       [10000, 10000, 10000, ..., 10079, 10079, 10079],
       [10000, 10000, 10000, ...,  9627,  9627,  9337],
       [10000, 10000, 10000, ...,  6363,  6363,  6363]], dtype=int64)

In [22]:
[ooi for ooi in all_results[0][1]]

['Household Population',
 'Unemployment Rate',
 'Gini Coefficient',
 'Median Net Worth',
 'Median House Value',
 'Median Wage',
 'Minimum Wage',
 'Total Household Damages',
 'Average Income-Weighted Damages',
 'Firm Population',
 'GDP',
 'Total Firm Resources',
 'Total Firm Debt',
 'CapitalFirm Population',
 'CapitalFirm Production Made',
 'ConsumptionGoodFirm Population',
 'ConsumptionGoodFirm Production Made',
 'ServiceFirm Population',
 'ServiceFirm Production Made']

In [23]:
outcomes = {ooi:[] for ooi in all_results[0][1]}
for exp,out in all_results:
    for ooi, vals in out.items():
        outcomes[ooi] += list(vals)

In [29]:
print(f"Have keys in outcome: {[k for k in outcomes]}")
print(f"Have lengths in outcomes: {[len(v) for k,v in outcomes.items()]}")
N_STEPS = None

for key in outcomes:
    if N_STEPS is None:
        N_STEPS = len(outcomes[key][0])
    else:
        assert(len(outcomes[key][0]) == N_STEPS)

Have keys in outcome: ['Household Population', 'Unemployment Rate', 'Gini Coefficient', 'Median Net Worth', 'Median House Value', 'Median Wage', 'Minimum Wage', 'Total Household Damages', 'Average Income-Weighted Damages', 'Firm Population', 'GDP', 'Total Firm Resources', 'Total Firm Debt', 'CapitalFirm Population', 'CapitalFirm Production Made', 'ConsumptionGoodFirm Population', 'ConsumptionGoodFirm Production Made', 'ServiceFirm Population', 'ServiceFirm Production Made']
Have lengths in outcomes: [80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000, 80000]


In [25]:
results = experiments, outcomes

In [26]:
# Adjust filename to reflect new number of replications
file_start_parts = file_start.split('__')
reps_per_file = int(file_start_parts[1].split('_')[0])
total_reps = reps_per_file * len(name_diffs)
file_start_parts[1] = f'{total_reps}_reps'
new_file_start = '__'.join(file_start_parts)

In [27]:
# Remove `startseed` from filename
new_file_end = file_end.split('_')[-1]

In [28]:
save_results(results, path + file_start + new_file_end + file_ext)

