In [52]:
# Collect calibration results from multiple jobs
# Author: Hongli Liu

import os, glob
import numpy as np
from tqdm import tqdm

# inputs
root_path = '/home/h294liu/project/proj/5_summaCalib'  # root path where parameter estimation will be stored.
domain_name = 'BowAtBanff'
calib_basename = 'GLUE' #SCE #GA #DDS #GLUE  
nGRU = 51

# output paths
output_path = os.path.join(root_path, domain_name+'_'+calib_basename)
runs_path = os.path.join(output_path, 'runs') # folder to save detailed run results (use links inside)
if not os.path.exists(runs_path):
    os.makedirs(runs_path)
OstModel_txt = os.path.join(output_path, 'OstModel.txt')  # text file to save param and obj func  
fail_runs_txt = os.path.join(output_path, 'runs_fail.txt')    # text file to save fail run info  
success_runs_txt = os.path.join(output_path, 'runs_success.txt')  # text file to save success run info

# 1. find all archive paths
archive_basepath = os.path.join(root_path, domain_name, calib_basename+'*', 
                                'calib/output_archive/experiment*', 'run*')
iter_archive_paths = glob.glob(archive_basepath,recursive=True)
iter_archive_paths.sort()

# 2. open output files for writing
f_failure = open(fail_runs_txt, "w") 
f_success = open(success_runs_txt, "w") 
f_ost = open(OstModel_txt, "w") 

# write header for OstModel_txt
OstModel_tpl = os.path.join(iter_archive_paths[0], os.pardir, os.pardir, os.pardir, 'OstModel0.txt')
if os.path.exists(OstModel_tpl):
    infile = open(OstModel_tpl, 'r')
    firstLine = infile.readline()
else:
    print('OstModel0.txt does not exist in %s'%(OstModel_tpl))
f_ost.write(firstLine)

# 3. loop the archive paths
pbar = tqdm(total=len(iter_archive_paths))
success_count = 0
for iPath in range(len(iter_archive_paths)):
    
    archive_path = iter_archive_paths[iPath]
    
    # (1) check the model run succeeds with the param set
    ostExeFile = os.path.join(archive_path, 'OstExeOut.txt')
    
    with open(ostExeFile, "r") as f:
        # read content of file to string
        content = f.read()
        
        # get number of occurrences of the substring in the string
        # check summa run status for GRUs
        summa_occurs = content.count("finished simulation successfully") # Hard coded. Be careful.
        route_occurs = content.count("Finished simulation")
        if ((summa_occurs==nGRU) and (route_occurs==1)):            
            # write down the sucessful run information
            f_success.write(archive_path+'\n')
            success_count = success_count+1
        else:
            # write down the failure run information
            f_failure.write(archive_path+'\n')
            continue 
     
    # (2) add successful run to runs_path by link
    src = archive_path
    dst = os.path.join(runs_path, 'run'+str(success_count))
    if os.path.exists(dst) and os.path.islink(dst):
        os.unlink(dst)
    os.symlink(src, dst)

    # (3) save param set and obj of successful runs
    param_set = np.loadtxt(os.path.join(archive_path, 'multipliers.txt'))
    obj = np.loadtxt(os.path.join(archive_path, 'trial_stats.txt'), delimiter='#', usecols=[0])
    
    f_ost.write('%d\t'%(success_count))
    f_ost.write('%.6E\t'%(obj[0]*(-1))) # obj = -KGE
    for iParam in range(len(param_set)):
        f_ost.write('%.6E\t'%(param_set[iParam]))
    f_ost.write('\n')
    
    pbar.update(1)
pbar.close()   

f_failure.close()
f_success.close() 
f_ost.close()

# 4. remove run links that are not created by this round of calib result collection
success_count = sum(1 for line in open(success_runs_txt)) 
iter_run_paths = glob.glob(os.path.join(runs_path,'run*'),recursive=False)
iter_run_paths.sort()

pbar = tqdm(total=len(iter_run_paths))
for run_path in iter_run_paths:
    run_folder = os.path.basename(run_path)
    run_id = int(run_folder.split('run')[-1])
    if run_id > success_count:
        if os.path.exists(run_path) and os.path.islink(run_path):
            os.unlink(run_path)    
    pbar.update(1)
pbar.close()           

  0%|          | 0/9098 [12:07<?, ?it/s]
 96%|█████████▌| 9323/9724 [28:25<01:13,  5.47it/s]  
100%|██████████| 9479/9479 [00:00<00:00, 137888.99it/s]
