In [1]:
import os
from constants import HEADER_NAME, ENCODINGDB, ENCODING_TO_MN_DICT
import pandas as pd
import numpy as np

In [2]:
def find_closest_run_to_median_triple(dflist, pattern):
  array_filtered_sum_each_run = []
  for onerun in dflist:
    array_filtered_sum_each_run.append(
      onerun.filter(regex=pattern, axis=1).sum(axis=1)
    )
  np_array_filtered_sum_each_run = np.array(array_filtered_sum_each_run)
  df_IQR = pd.DataFrame(np_array_filtered_sum_each_run)
  assert(df_IQR.shape == (100, 361))
  # calculate median across 100 runs
  # shape=(361,) which is the median run
  # that has 361 reporting points/months (including burn-in)
  df_list_median = df_IQR.astype('float').quantile(0.5).values # type: np array
  assert(df_list_median.shape == (361,))
  # take every one of the 100 runs
  # to subtract to median run and take absolute value
  abs_diff_each_run_each_time = np.absolute(
    np.subtract(np_array_filtered_sum_each_run, df_list_median)
  )
  assert(abs_diff_each_run_each_time.shape == (100, 361))
  return np.sum(abs_diff_each_run_each_time, axis=1)

In [3]:
def find_closest_run_to_median_double(dflist, act_name):
  assert(act_name in ['DHA-PPQ', 'ASAQ', 'AL'])
  most_dang_double_type = '2-2' if act_name == 'DHA-PPQ' else '2-4'
  array_filtered_sum_each_run = []
  for onerun in dflist:
    renamed_df = onerun.rename(columns=ENCODING_TO_MN_DICT[act_name])
    renamed_df = renamed_df.groupby(renamed_df.columns, axis=1).sum()
    array_filtered_sum_each_run.append(
      renamed_df[most_dang_double_type].values
    )
  np_array_filtered_sum_each_run = np.array(array_filtered_sum_each_run)
  df_IQR = pd.DataFrame(np_array_filtered_sum_each_run)
  assert(df_IQR.shape == (100, 361))
  # calculate median across 100 runs
  # shape=(361,) which is the median run
  # that has 361 reporting points/months (including burn-in)
  df_list_median = df_IQR.astype('float').quantile(0.5).values # type: np array
  assert(df_list_median.shape == (361,))
  # take every one of the 100 runs
  # to subtract to median run and take absolute value
  abs_diff_each_run_each_time = np.absolute(
    np.subtract(np_array_filtered_sum_each_run, df_list_median)
  )
  assert(abs_diff_each_run_each_time.shape == (100, 361))
  return np.sum(abs_diff_each_run_each_time, axis=1)

In [4]:
def find_closest_run_to_median_five_dang_types(dflist):
  total_abs_diff = find_closest_run_to_median_triple(dflist, 'TYY..Y2.')
  total_abs_diff = total_abs_diff + find_closest_run_to_median_triple(dflist, 'KNF..Y2.')
  total_abs_diff = total_abs_diff + find_closest_run_to_median_double(dflist, 'DHA-PPQ')
  total_abs_diff = total_abs_diff + find_closest_run_to_median_double(dflist, 'ASAQ')
  total_abs_diff = total_abs_diff + find_closest_run_to_median_double(dflist, 'AL')
    
  min_val = np.min(total_abs_diff)
  min_idx = np.argmin(total_abs_diff) + 1 # naming of run is 1-100
  return (min_idx, min_val)

In [9]:
def onestep(filepath):
  dflist = []
  for i in range(1,101):
    df = pd.read_csv(
      os.path.join(filepath, f'{i}.txt'), index_col=False, names=HEADER_NAME, sep='\t'
    ).fillna(0).filter(items=ENCODINGDB)
    assert df.shape == (361, 128)
    dflist.append(df)
  result = find_closest_run_to_median_five_dang_types(dflist)
  print('-----')
  print(f'filpath = {filepath}')
  print(f'the closest run is #{result[0]}')
  print(f'with val={result[1]}')
  return result[0]

Figure 6 - set 7, 5-yr-cycling

In [10]:
onestep('simulation_outputs/set7_c/monthly')

-----
filpath = simulation_outputs/set7_c/monthly
the closest run is #4
with val=1.3157963537500001


4

Supplemental Table 2 - set 7, MFT

In [11]:
onestep('simulation_outputs/set7_m/monthly')

-----
filpath = simulation_outputs/set7_m/monthly
the closest run is #13
with val=0.652836191525


13

Figure 7 - set 7, Adaptive Cycling

In [12]:
onestep('simulation_outputs/set7_ac/monthly')

-----
filpath = simulation_outputs/set7_ac/monthly
the closest run is #39
with val=1.6822495638300001


39