In [4]:
# Collect calibration results from multiple jobs
# Author: Hongli Liu

import os, glob, shutil
import numpy as np
from tqdm import tqdm

# inputs
root_path = '/home/h294liu/project/proj/5_summaCalib'  # root path where parameter estimation will be stored.
domain_name = 'BowAtBanff' #'BowAtBanff', 'BowAtBanff_LA_calib'

calib_basename = 'DDS' #SCE #GA #DDS #GLUE  
outFilePrefix = 'run1'
nGRU = 51

# output paths
output_path = os.path.join(root_path, domain_name+'_'+calib_basename)
runs_path = os.path.join(output_path, 'runs') # folder to save detailed run results (use links inside)
if not os.path.exists(runs_path):
    os.makedirs(runs_path)
OstModel_txt = os.path.join(output_path, 'OstModel.txt')  # text file to save param and obj func  
fail_runs_txt = os.path.join(output_path, 'runs_fail.txt')    # text file to save fail run info  
success_runs_txt = os.path.join(output_path, 'runs_success.txt')  # text file to save success run info

# 1. find all archive paths
archive_basepath = os.path.join(root_path, domain_name, calib_basename+'*', 
                                'calib/output_archive/experiment*', 'run*')
iter_archive_paths = glob.glob(archive_basepath,recursive=True)

# remove netcdf outputs in iter_archive_paths
outFileSumma = outFilePrefix+'_day.nc'
outFileRoute = outFilePrefix+'.mizuRoute.nc'
iter_archive_paths=[x for x in iter_archive_paths if (not outFileSumma in x) and (not outFileRoute in x)]
iter_archive_paths.sort()

# 2. open output files for writing
f_failure = open(fail_runs_txt, "w") 
f_success = open(success_runs_txt, "w") 
f_ost = open(OstModel_txt, "w") 

# write header for OstModel_txt
OstModel_tpl = os.path.join(iter_archive_paths[0], os.pardir, os.pardir, os.pardir, 'OstModel0.txt')
if os.path.exists(OstModel_tpl):
    infile = open(OstModel_tpl, 'r')
    firstLine = infile.readline()
else:
    print('OstModel0.txt does not exist in %s'%(OstModel_tpl))
f_ost.write(firstLine)

# 3. save control_active.txt for future use
src = os.path.join(iter_archive_paths[0], os.pardir, os.pardir, os.pardir, 'control_active.txt')
dst = os.path.join(output_path) 
shutil.copy2(src, dst)

# 4. loop the archive paths
pbar = tqdm(total=len(iter_archive_paths))
success_count = 0
for iPath in range(len(iter_archive_paths)):
    
    archive_path = iter_archive_paths[iPath]
    
    # (1) check mdoel run success. 
    ostExeFile = os.path.join(archive_path, 'OstExeOut.txt')
    
    if os.path.exists(ostExeFile):
        with open(ostExeFile, "r") as f:
            # read content of file to string
            content = f.read()

            # get number of occurrences of the substring in the string
            # check summa run status for GRUs
            summa_occurs = content.count("finished simulation successfully") # Hard coded. Be careful.
            route_occurs = content.count("Finished simulation")
            if ((summa_occurs==nGRU) and (route_occurs==1)):            
                # write down the sucessful run information
                f_success.write(archive_path+'\n')
            else:
                # write down the failure run information
                f_failure.write(archive_path+'\n')
                continue 
    else:
        continue
    
    # (2) check param set duplication.
    param_set = np.loadtxt(os.path.join(archive_path, 'multipliers.txt'))
    obj = np.loadtxt(os.path.join(archive_path, 'trial_stats.txt'), delimiter='#', usecols=[0])
    
    if iPath == 0:
        # initialize param_sets array
        param_sets = np.zeros((len(iter_archive_paths), len(param_set)))
    else:
        # if duplication exists
        if (param_sets == param_set).all(axis=1).any():
            continue 
            
    # if no duplication, add this param_set to param_sets.
    param_sets[success_count,:] = param_set
    success_count = success_count+1

    # (3) save param set and obj of successful and unique runs.   
    f_ost.write('%d\t'%(success_count))
    f_ost.write('%.6E\t'%(obj[0]*(-1))) # obj = -KGE
    for iParam in range(len(param_set)):
        f_ost.write('%.6E\t'%(param_set[iParam]))
    f_ost.write('\n')
    
    # (4) add run output to runs_path by link.
    src = archive_path
    dst = os.path.join(runs_path, 'run'+str(success_count))
    if os.path.exists(dst) and os.path.islink(dst):
        os.unlink(dst)
    os.symlink(src, dst)

    pbar.update(1)
pbar.close()   

f_failure.close()
f_success.close() 
f_ost.close()
del param_sets

# 4. remove run links that are not created by this round of calib result collection
success_count = sum(1 for line in open(success_runs_txt)) 
iter_run_paths = glob.glob(os.path.join(runs_path,'run*'),recursive=False)
iter_run_paths.sort()

pbar = tqdm(total=len(iter_run_paths))
for run_path in iter_run_paths:
    run_folder = os.path.basename(run_path)
    run_id = int(run_folder.split('run')[-1])
    if run_id > success_count:
        if os.path.exists(run_path) and os.path.islink(run_path):
            os.unlink(run_path)    
    pbar.update(1)
pbar.close()           


  0%|          | 0/685 [00:00<?, ?it/s][A
  0%|          | 3/685 [00:00<00:31, 21.92it/s][A
  1%|          | 6/685 [00:00<00:29, 23.35it/s][A
  1%|▏         | 9/685 [00:00<00:28, 23.85it/s][A
  4%|▎         | 24/685 [00:00<00:09, 66.30it/s][A
  7%|▋         | 50/685 [00:00<00:04, 128.37it/s][A
 12%|█▏        | 81/685 [00:00<00:03, 184.36it/s][A
 16%|█▌        | 109/685 [00:00<00:02, 213.45it/s][A
 20%|██        | 140/685 [00:00<00:02, 242.41it/s][A
 25%|██▌       | 172/685 [00:00<00:01, 264.46it/s][A
 29%|██▉       | 199/685 [00:02<00:10, 47.11it/s] [A
 32%|███▏      | 219/685 [00:05<00:23, 19.82it/s][A
 34%|███▍      | 233/685 [00:07<00:29, 15.19it/s][A
 35%|███▌      | 243/685 [00:08<00:35, 12.31it/s][A
 36%|███▋      | 250/685 [00:09<00:38, 11.22it/s][A
 37%|███▋      | 256/685 [00:10<00:44,  9.73it/s][A
 38%|███▊      | 260/685 [00:11<00:44,  9.64it/s][A
 38%|███▊      | 263/685 [00:11<00:45,  9.24it/s][A
 39%|███▉      | 266/685 [00:12<00:47,  8.73it/s][A
 39%|

In [8]:
outFilePrefix = 'run1'
outFileSumma = outFilePrefix+'_day.nc'
outFileRoute = outFilePrefix+'.mizuRoute.nc'
iter_archive_paths=[x for x in iter_archive_paths if (not outFileSumma in x) and (not outFileRoute in x)]
iter_archive_paths.sort()

In [3]:
archive_path

'/home/h294liu/project/proj/5_summaCalib/BowAtBanff/DDS1/calib/output_archive/experiment2_seg51/run1007'

In [2]:
iter_archive_path

NameError: name 'iter_archive_path' is not defined