###SimData to csv
Multiple timestamps, one stamp per traning sample

row 0: expected basecase values (no label)  
row 1 -> dset_size: residual values, one scenario per row

In [1]:
# NOTE: no authentication needed for mount if only one person edits the notebook.
# I can read files directly from the drive locations (i.e. no need to copy all files to local setting)

import pandas as pd
import torch
import pathlib
import numpy as np

import pickle

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cpu device


####Basecase and Observed Data

In [2]:
# Note whether sim has been run in debug mode before running this script.
def features_ds(base_file, simdata_dir, dset_size=150, net_charac=0, time_stamp=80, debug=False):
    """
    Write desired features for all samples to csv file.

    net_charac (int): network characteristic of interest.
        will determine simdata target file e.g. 0 -> link_flowrate
    """
    # Load _base_ data
    # Create dictionary of hdf file data keys
    charac_dict = {}
    with pd.HDFStore(base_file) as hdf:
      for key, value in enumerate(hdf.keys()):
        charac_dict[key] = value
    #   print(hdf.keys())
    #   assert False
    # output (debug=True):
    #  ['/link_flowrate', '/link_headloss', '/link_velocity', '/node_demand', '/node_head', '/node_pressure']
    # print(charac_dict)
    
    data = pd.read_hdf(base_file, charac_dict[net_charac])   # hdf using less memory; set sim file_format='hdf'
                                                      # hdf -> Hierarchical Data Format; storage and manipulation of scientific data across diverse operating systems and machines
    print(charac_dict[net_charac])
    # print(data.head())
    # print(data.columns)
    # Trim time stamp labels in first column (Is this a good idea ?? ans: yes)
    basecase = torch.tensor(data.values, dtype=torch.float32)
    print(f'features_ds(): basecase {basecase.size()}')
    basecase = basecase[time_stamp:time_stamp+1].reshape([-1]).to(device)  # Wed, 8-9am (80hrs into sim) (assuming sim starts Sun, 12a)
    # print(f'features_ds(): basecase\n{basecase}')
    # print(f'features_ds(): basecase -- max {max(basecase)}, min {min(basecase)}')
    print(f'features_ds(): basecase {basecase.size()}')

    # Load leak scenarios
    p = pathlib.Path(simdata_dir)
    sim_dirs = sorted(x for x in p.iterdir() if x.is_dir())
    # print(sim_dirs)

    # Construct hdf file list
    hdf_files = []
    for x in sim_dirs[:-1]:
        hdf_files += sorted(str(p) for p in pathlib.Path(x).glob("*.h5"))
    # print(hdf_files)

    observs = None
    # func? sets correct net_charac and inc conditioned on debug setting
    # start= net_charac   # targeting the head (pressure) file of each scenario
    # if debug:
    #   inc = 9
    # else:
    #   inc = 4   # I'd like to make this value dynamic according to file count in scenario folders.
    #             #  Either 9 or 4 depending on debug on/off, respectively.

    # Tensor of sample tensors
    #  What if I construct a dict w/ keys: network characteristic, value: list files of charact from ea scenario?
    for i, h_file in enumerate(hdf_files[:dset_size]):
        if i % 100 == 0:
          print(h_file)
        # # Data file keys (debug=True):
        # #  ['/leak_demand', '/leak_head', '/leak_pressure', '/link_flowrate', 
        # #   '/link_headloss', '/link_velocity', '/node_demand', '/node_head', '/node_pressure']
        # # NOTE: the first three are inaccessible using current design.
        data = pd.read_hdf(h_file, charac_dict[net_charac])
        meas_tn = torch.tensor(data.values, dtype=torch.float32)
        # print(meas_tn.size())
        # assert False
        # Trim time stamp label in first column and reshape
        #  Consider pushing time one day
        meas_tn = meas_tn[time_stamp:time_stamp+1].reshape([1,-1]).to(device)
        # print(meas_tn)

        if observs is None:
            observs = torch.cat((meas_tn,))
            # print(observs)
        else:
            observs = torch.cat((observs, meas_tn))
            # print(observs)

    # return only used for debug
    return observs, data.columns, basecase

In [3]:
# Debug block
def debug_features_ds():
  # net_charac options:
  #  {0: '/link_flowrate', 1: '/link_headloss', 2: '/link_velocity',
  #   3: '/node_demand', 4: '/node_head', 5: '/node_pressure'}
  net_charac = 0
  base_file = 'data.h5'
  base_file = '/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/SimData/temp_test_10k_hdf/_base_/'+base_file
  simdata_dir = '/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/SimData/temp_test_10k_hdf'
  # dest_dir = 
  set_size = 500
  debug = False
  X, cols, basecase = features_ds(base_file, simdata_dir,set_size, net_charac, debug)
  print('END')
  print(basecase.size())
  # print(X)
  print(X.size())
# debug_features_ds()

####Labels

In [4]:
# See scratch section for original four-region version of region_ls()
# Note: reg_dict is made in graph_partition notebook.
# def region_dict(regdict_dir=None) :   # where dir_path=dest_dir from main cell. This may help enforce the dict location.
def region_dict(regdict_filepath=None) :   # where dir_path=dest_dir from main cell. This may help enforce the dict location.
  # # Careful: the expected partition may have been overwritten by graph_partition notebook.
  # dir_path = '/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/'
  # regdict_dir = dir_path + 'Input Pipeline/Datasets/leak_pipes_all/39regions/00/'
  # partitions = 39
  # file_nm = f'region_dict_{partitions}.pickle'
  # # load_loc = dir_path + version + file_nm
  # load_loc = regdict_dir + file_nm
  load_loc = regdict_filepath

  # For loading
  with open(load_loc, 'rb') as handle:
      reg_dict = pickle.load(handle)
  return reg_dict

# dt = region_dict()
# print(dt)

In [5]:
# need full list of labels (strings)
# encode labels

def labels_ds(base_jfile, simdata_dir, regdict_dir, regdict_filepath, dset_size=150, debug=False):
    # Base network: get full set of pipes
    data_file = base_jfile
    data = pd.read_json(data_file)
    pipe_set = data['links']['pipes']   # returns a list of all pipes in network.
    # print(pipe_set)

    # Encode full set of pipes (keys: strings (pipe names), values: ints (arbitrary assignment of unique int to pipe names))
    lab_dict = {}
    for i, key in enumerate(pipe_set):
        lab_dict[key] = i
    # print(lab_dict)

    # Fetch label files
    info_jsons = []
    p = pathlib.Path(simdata_dir)
    sim_dirs = sorted(x for x in p.iterdir() if x.is_dir())
    # print(sim_dirs)
    # Construct json file list
    for x in sim_dirs[:-1]:
        info_jsons += sorted(str(p) for p in pathlib.Path(x).glob("*.json"))
    print(info_jsons)

    # Construct list of all training labels used in this simdata_dir collection (strings); labels not necessarily unique.
    labels = []
    for i, j_file in enumerate(info_jsons[:dset_size]):
        if i % 100 == 0:
          print(j_file)
        j_data = pd.read_json(j_file)
        labels.append(j_data['leak_pipes'][0])
    # print(labels)

    # Encode training labels (restrict encoding to label subset)
    reg_dict = region_dict(regdict_filepath)   # Lists used for encoding pipes to respective regions.
    # regions = region_ls()   # tuple containing lists used for encoding pipes to respective regions.
    encoder = {}
    encoded_labels = []
    pipe_idxs = []
    labels_set = sorted(set(labels))   # sorted() returns a sorted list formed from the unordered elements of labels set.
    # labels_set.sort()
    # print(labels_set)
    # print(len(labels_set))

    # Construct subset of region pipes containing only existing labels from that region
    # Do this for every region
    # Used to determine lengths when constructing the label encoder.
    # Strategy: construct a dict of lists -- a given list contains subset of pipes from a region
    reg_lab_subsets = {}
    regions = reg_dict['reg_partits']   # dict of lists containing the all pipes in a given region.
    # print(next(regions.values()))
    for i, reg in enumerate(regions.values()) :
      reg_lab_subsets[i] = []
      for label in labels_set :
        if label in reg :
          reg_lab_subsets[i].append(label)
    # print(reg_lab_subsets)

    # Alt encoder construction
    # Construct dict from labels_set for encoding labels; key=Pipe, value=(region, assigned pipe int)
    pip_idx = 0
    encoder[f'reg_lens'] = []
    for reg_idx in reg_lab_subsets :   # reg_lab_subsets keys are ints (reg_idx)
      # reg_labels = reg_lab_subsets[reg_idx]
      for i, label in enumerate(reg_lab_subsets[reg_idx]) :
        encoder[label] = reg_idx, pip_idx
        pip_idx += 1
      encoder[f'reg_lens'].append(i+1)   # at this point, i+1 is the length of reg_idx region.
    encoder[f'lengths'] = (reg_idx+1, pip_idx)
    # Much cleaner. Also scales w/ num of regions.
    # Include keys for Key:'lengths', Val: (pip_ct, reg_ct), Key:'reg_lens', Val: list of region lens
    # print(encoder)

    #  Construct list of encoded labels (in order of generated training data)
    for label in labels:
      encoded_labels.append(encoder[label][0])
      pipe_idxs.append(encoder[label][1])
    # print(encoded_labels)
    # print(pipe_idxs)
    print(f'lens should match: {len(encoded_labels)} =? {len(pipe_idxs)}')
    # assert False

    # Save encoder (using numpy) for use in training model
    #  encoder is general to a given edge partition. (place in parent folder of any subordinate space e.g. tmstp)
    #  Key: pipe_nm (str), Val: (region_index, pipe_index)
    #  Add Key: 'lengths', Val: (pipe_ct, reg_ct) to encoder; for use w/ conf_mat in sampler models.
    dir_nm = regdict_dir   # place regdict_dir is as general a folder as possible -- edge partition level.
    file_nm = 'dictPipeToPipeIdx.npy'
    save_loc = dir_nm + file_nm
    np.save(save_loc, encoder)

    return encoded_labels, pipe_idxs, labels_set

In [6]:
# Debug block
def debug_labels_ds():
  # This test function will overwrite the existing dictPipToPipeIdx.npy file. Otherwise, read only.
  simdata_dir = '/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/SimData/leakpipesAll/temp_test_5k_unif_area0.01_0.1_hdf/'
  base_jfile = '_base_/info.json'
  base_jfile = simdata_dir + base_jfile 
  set_size = 5000
  debug = True
  enc_labs, pipe_idxs, lab_dict = labels_ds(base_jfile, simdata_dir, set_size, debug)
  print(enc_labs)   #list of sample labels
  print(lab_dict)   #map of entire label set (all possible); key: label string, value: mapped integer
  assert False

  # classes = []
  # for i, lab in enumerate(labs):
  #   classes.append(i)
  # classes = torch.arange(len(labs)).to(device)
  # print(classes)
# debug_labels_ds()
# labels from leakpipesAll: ['P1', 'P10', 'P100', 'P1000', 'P101', 'P1016', 'P102', 'P1022', 'P1023', 'P1024', 'P1025', 'P1026', 'P1027', 'P1028', 'P1029', 'P103', 'P1030', 'P1031', 'P1032', 'P1033', 'P1034', 'P1035', 'P1036', 'P1039', 'P104', 'P1040', 'P1041', 'P1042', 'P1044', 'P1045', 'P106', 'P107', 'P108', 'P109', 'P11', 'P110', 'P111', 'P112', 'P113', 'P115', 'P116', 'P117', 'P118', 'P119', 'P12', 'P120', 'P121', 'P122', 'P123', 'P124', 'P125', 'P126', 'P127', 'P128', 'P129', 'P13', 'P130', 'P131', 'P132', 'P134', 'P136', 'P138', 'P139', 'P14', 'P140', 'P141', 'P142', 'P144', 'P147', 'P148', 'P15', 'P150', 'P154', 'P155', 'P156', 'P157', 'P158', 'P159', 'P16', 'P160', 'P161', 'P162', 'P163', 'P165', 'P166', 'P17', 'P174', 'P177', 'P18', 'P184', 'P19', 'P195', 'P2', 'P20', 'P201', 'P21', 'P211', 'P215', 'P218', 'P219', 'P22', 'P220', 'P223', 'P225', 'P228', 'P23', 'P230', 'P231', 'P233', 'P234', 'P235', 'P237', 'P238', 'P24', 'P241', 'P242', 'P243', 'P245', 'P246', 'P248', 'P249', 'P25', 'P251', 'P252', 'P255', 'P256', 'P258', 'P259', 'P26', 'P264', 'P266', 'P267', 'P268', 'P27', 'P270', 'P272', 'P275', 'P28', 'P280', 'P282', 'P284', 'P285', 'P286', 'P287', 'P288', 'P29', 'P290', 'P291', 'P292', 'P293', 'P294', 'P295', 'P296', 'P297', 'P298', 'P299', 'P3', 'P30', 'P301', 'P302', 'P303', 'P304', 'P305', 'P307', 'P308', 'P309', 'P31', 'P310', 'P316', 'P319', 'P32', 'P320', 'P322', 'P323', 'P329', 'P33', 'P330', 'P331', 'P336', 'P337', 'P338', 'P339', 'P34', 'P340', 'P341', 'P343', 'P344', 'P346', 'P347', 'P348', 'P349', 'P35', 'P350', 'P37', 'P372', 'P374', 'P375', 'P376', 'P378', 'P379', 'P38', 'P380', 'P381', 'P383', 'P384', 'P385', 'P386', 'P39', 'P397', 'P398', 'P399', 'P40', 'P402', 'P403', 'P409', 'P410', 'P42', 'P424', 'P43', 'P44', 'P443', 'P445', 'P446', 'P450', 'P46', 'P465', 'P467', 'P468', 'P48', 'P482', 'P484', 'P49', 'P492', 'P5', 'P500', 'P501', 'P502', 'P51', 'P510', 'P52', 'P524', 'P527', 'P529', 'P53', 'P54', 'P55', 'P57', 'P58', 'P596', 'P597', 'P6', 'P609', 'P610', 'P63', 'P633', 'P64', 'P65', 'P67', 'P670', 'P671', 'P68', 'P69', 'P697', 'P7', 'P70', 'P71', 'P72', 'P724', 'P725', 'P752', 'P753', 'P754', 'P755', 'P756', 'P757', 'P758', 'P759', 'P760', 'P761', 'P763', 'P766', 'P767', 'P768', 'P769', 'P771', 'P772', 'P775', 'P776', 'P777', 'P779', 'P780', 'P781', 'P783', 'P784', 'P785', 'P786', 'P787', 'P788', 'P789', 'P791', 'P794', 'P795', 'P796', 'P797', 'P798', 'P8', 'P800', 'P801', 'P804', 'P805', 'P806', 'P807', 'P808', 'P809', 'P810', 'P811', 'P813', 'P815', 'P817', 'P819', 'P821', 'P822', 'P823', 'P826', 'P827', 'P83', 'P830', 'P831', 'P84', 'P840', 'P841', 'P842', 'P844', 'P846', 'P847', 'P85', 'P850', 'P851', 'P852', 'P853', 'P855', 'P858', 'P859', 'P86', 'P861', 'P866', 'P87', 'P871', 'P880', 'P889', 'P89', 'P892', 'P9', 'P90', 'P91', 'P914', 'P915', 'P92', 'P924', 'P927', 'P929', 'P930', 'P931', 'P932', 'P933', 'P934', 'P935', 'P937', 'P938', 'P939', 'P94', 'P940', 'P941', 'P942', 'P943', 'P944', 'P946', 'P947', 'P948', 'P949', 'P95', 'P951', 'P953', 'P954', 'P955', 'P956', 'P957', 'P958', 'P959', 'P96', 'P961', 'P962', 'P963', 'P964', 'P965', 'P966', 'P967', 'P968', 'P969', 'P97', 'P970', 'P971', 'P972', 'P973', 'P974', 'P975', 'P976', 'P977', 'P978', 'P98', 'P981', 'P982', 'P983', 'P984', 'P986', 'P987', 'P988', 'P989', 'P99', 'P990', 'P991', 'P992', 'P993', 'P994', 'P995', 'P996', 'P997', 'P998', 'P999']

####Subset

In [7]:
def subset(ds_file, pipe_count):
  # I want to be able to train on fewer than the total number of leak pipes
  pass
# subset()

####SimData to csv

In [8]:
# File Paths -- Centralize all file paths (inputs and output) in this function.
def feat_lab_args(net_char, time_stamp) :
  ### !!! UPDATE when creating new csv file from SimData !!! ###
  # Network characterist options:
  #  {0: '/link_flowrate', 1: '/link_headloss', 2: '/link_velocity',
  #   3: '/node_demand', 4: '/node_head', 5: '/node_pressure'}
  net_charac = net_char   # Consider removing. Not used here.

  ## Inputs
  ### NOTE: Update file paths for different SimData
  dir_path = '/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/'
  simdata_dir = 'SimData/leakpipesAll/'
  data_dir = 'temp_test_5k_unif_area0.01_0.1_hdf/'
  ## ---------------------------- ##

  base_file = dir_path + simdata_dir + data_dir + '_base_/data.h5'
  base_jfile = dir_path + simdata_dir + data_dir  + '_base_/info.json'
  simdata_dir = dir_path + simdata_dir + data_dir
  
  ## Outputs
  ### UDATE ... partitions
  # Region dict path + filenm (for use in labels_ds() function)
  partitions = 30
  version_dir = f'Input Pipeline/Datasets/leak_pipes_all/{partitions}regions/00/'
  regdict_dir = dir_path + version_dir
  file_nm = f'region_dict_{partitions}.pickle'
  regdict_filepath = regdict_dir + file_nm

  ### UPDATE ...
  dest_dir = dir_path + version_dir + f'tmstp{time_stamp}/'
  set_size = 5000
  # set_size = 100   # For testing.
  debug = True
  fname_note = f'_area0.01_0.1'
  ## ---------------------------- ##

  # return net_charac, dir_path, simdata_dir, base_file, base_jfile, simdata_dir, dest_dir, set_size, time_stamps, debug, fname_note
  return (net_charac, dir_path, simdata_dir, base_file, base_jfile, simdata_dir,
          regdict_dir, regdict_filepath, dest_dir, set_size, time_stamp, debug, fname_note,)
# feat_lab_args(0)

In [None]:
# def SimData_to_csv(net_char):
def SimData_to_csv(net_char, tmstp):
  # Network characterist options:
  #  {0: '/link_flowrate', 1: '/link_headloss', 2: '/link_velocity',
  #   3: '/node_demand', 4: '/node_head', 5: '/node_pressure'}

  # Update time_stamp to time_stamps list (start w/ three times stamps)
  # (net_charac, dir_path, simdata_dir, base_file, base_jfile, simdata_dir, dest_dir, set_size, time_stamps, debug, fname_note) = feat_lab_args(net_char)
  # (net_charac, dir_path, simdata_dir, base_file, base_jfile, simdata_dir, dest_dir, set_size, time_stamp, debug, fname_note) = feat_lab_args(net_char)
  (net_charac, dir_path, simdata_dir, base_file, base_jfile, simdata_dir,
   regdict_dir, regdict_filepath, dest_dir, set_size, time_stamp, debug, fname_note,) = feat_lab_args(net_char, tmstp)

  # mult_tm_ds = pd.DataFrame()
  # for i in range(3) :
  #   fl_ds = pd.DataFrame({"A": range(3), "B": range(3)})
  #   # mult_tm_ds = pd.concat([mult_tm_ds, fl_ds], ignore_index=True)
  #   # Use ignore_index if repeat indices becomes a problem
  #   mult_tm_ds = pd.concat([mult_tm_ds, fl_ds], ignore_index=True)
  # print(mult_tm_ds)

  # Note: Block moved here to support saving region dict and others to dest_dir
  # Create destination directory if does not exist.
  p = pathlib.Path(dest_dir)
  # print(dest_dir)
  # print(p)
  # print(p.exists())
  if not p.exists() :
    # Parent dir must exist. i.e. all but the new folder in the line of folders must already exist.
    # p.mkdir()
    p.mkdir(parents=True)
  # Think deeply about where the region dict should be placed. Currently located w/ the csv files, but notice the same region dict might be used by multiple csv training data as is the case here.
  #  idea: make the regionxx dir a parent of tmstp dir. place the region dict in the regionxx folder as that dict works for that specific instance of xx regions. Can see a problem in that there may exist multiple e.g. region11 arangements. Need to resolve this.
  # print(p)
  # assert False
  
  ##### Time stamp for loop code
  # mult_tm_ds = pd.DataFrame()
  # for time_stamp in time_stamps :
  #####

  #### Logic block -- Construct data frame containing training set
  # one strat is to append subsequent dataframes to the previous
  file_ds = pd.DataFrame()
  observs, data_cols, basecase = features_ds(base_file, simdata_dir, set_size, net_charac, time_stamp, debug)
  print(f'SimData_to_csv(): observs {observs.size()}')
  # print(type(data_cols))

  # Labels
  encoded_targets, pipe_idxs, _lab_set_ = labels_ds(base_jfile, simdata_dir, regdict_dir, regdict_filepath, set_size, debug)
  # encoded_targets contains a tuple of (region, pipe_idx)

  # Write observs and basecase to csv file
  # Column labels
  col_labels = data_cols
  # row 0: basecase
  rows = basecase.reshape([1,-1]).tolist()
  # row 1->dset_size: scenario observed measurements
  [rows.append(observed) for observed in observs.tolist()]
  # print(rows)
  file_ds = pd.DataFrame(rows, columns=col_labels)
  # print(file_ds.head())
  # print(*pipe_idxs)
  # Add label and pipeIdx columns to dataframe
  file_ds['PipeIdx'] = (-1, *pipe_idxs)
  file_ds['Label'] = (-1, *encoded_targets)
  #   Alt method:
  # label_column = {'Label': encoded_targets}   # First create dict.
  # print(label_column)
  # file_ds.append(label_column, ignore_index=True)   # Append dict to dataframe.
  # print(file_ds)   # Notice n + 1 rows  where n = training samples.
  ####

  #####
    # mult_tm_ds = pd.concat([mult_tm_ds, file_ds])
  # Consider adding a col w/ tmstp info
  # print(mult_tm_ds)
  # assert False
  #####

  # Write dataframe to csv file
  #  note: can use a dict as a switch statement proxy
  if   net_charac == 0: hdf_file = '_link_flowrate'
  elif net_charac == 1: hdf_file = '_link_headloss'
  elif net_charac == 2: hdf_file = '_link_velocity'
  elif net_charac == 3: hdf_file = '_node_demand'
  elif net_charac == 4: hdf_file = '_node_head'
  elif net_charac == 5: hdf_file = '_node_pressure'
  else                : hdf_file = '_error_no_such_net_charac'
  
  # # Create destination directory if does not exist.
  # p = pathlib.Path(dest_dir)
  # # print(dest_dir)
  # # print(p)
  # # print(p.exists())
  # if not p.exists() :
  #   p.mkdir()

  file_ds.to_csv(path_or_buf= dest_dir
                 + 'dataset'
                 + str(set_size)
                 + hdf_file
                 + fname_note
                 + '.csv',
                 index=False)
  return
  
# Might be able to use an if __main__ type statement. Will need to determine
#  if %run command makes this a child process or runs as main.
tmstps = [78, 80, 82, 84]
# tmstps = [79, 81, 83, 85]
# tmstps = [80]
for tmstp in tmstps :
  print(f'tmstp: {tmstp}')
  for i in range(0, 6) :
    SimData_to_csv(i, tmstp)
  print(f'end tmstp {tmstp}')
print('Done!')

####Scratch

In [10]:
mult_tm_ds = pd.DataFrame()
for i in range(3) :
  fl_ds = pd.DataFrame({"A": range(3), "B": range(3)})
  # mult_tm_ds = pd.concat([mult_tm_ds, fl_ds], ignore_index=True)
  # Use ignore_index if repeat indices becomes a problem
  mult_tm_ds = pd.concat([mult_tm_ds, fl_ds], ignore_index=True)
print(mult_tm_ds)
rg = [1, 2, 3]
print(f'{rg}')

   A  B
0  0  0
1  1  1
2  2  2
3  0  0
4  1  1
5  2  2
6  0  0
7  1  1
8  2  2
[1, 2, 3]


In [11]:
# p = pathlib.Path('/content/drive/MyDrive/Colab Notebooks/Water Distribution Network/')
# print(p.is_dir())

In [12]:
# str1 = 'all'
# print(f'str {str1}')

In [13]:
# older version of region_ls() used w/ first four region partition by hand.
def region_ls() :
  # boundary notation: from-region_to-region (e.g. r1_r2 means from region1 to region2)
  region0 = [# 2 boundary edges
            'P524', 'P237', 'P292', 'P291', 'P308', 'P293', 'P148', 'P268', 'P1036', 'P1026',
            'V47', 'V45', # valves
            'P386', 'P86', 'P165', # adjacent
            'P1042', 'P502', 'P977', # adjacent
            'P85', 'P238', 'P163', 'P938', 'P1035', 'P1034', 'P951', 'P252', 'P383', 'P501',
            'P267', 'P1039', 'P284', 'P1025', 'P344', 'P256', 'P933', 'P1024', 'P527', 'P1027',
            'P331', 'P54', 'P1028', 'P1029', 'P935', 'P937', 'P1032', 'P39', 'P384', 'P1033',
            'P1030', 'P1031', 'P340', 'P349', 'P347', 'P350', 'P914', 'P930', 'P929', 'P810',
            'P336', 'P343', 'P341', 'P337', 'P385', 'P338', 'P346', 'P330', 'P339', 'P329', 'P348',
            'P280', 'P305', 'P610', 'P510', 'P697', 'P670', 'P932', 'P671', 'P780', 'P754', 'P931',
            'P1016', 'P934', 'P940', 'P939', 'P942', 'P944', 'P941', 'P947', 'P946', 'P949', 'P948',
            'P959',  'P1023', 'P529', 'P1022', 'P597', 'P1040', 'P1041', 'P290', 'P304',
            'P955', 'P956', 'v1', 'P242', 'P243', 'P954', 'P142', 'P953', 'P957', 'P958', # adjacent
            'P943', 'P500', 'P270', 'P275', 'P285', 'P286', 'P288', 'P380', 'PU6', 'PU7',
            'P381',
            ]
  region1 = ['P297', # Boundary edge (r1_r2) # 3 total
            'P18', # r1_r2
            'P379', # r1_r0
            'V2', # valves
            'P892', 'P96', 'P445', 'P446', 'P450', # adjacent
            'P215', 'P287', 'P99', 'P468', 'P467', 'P465', 'P294', 'P303', 'P123', 'P880',
            'P118', 'P120', 'P296', 'P307', 'P301', 'P117', 'P282', 'P299', 'P298', 'P484',
            'P804', 'P889', 'P801', 'P241', 'P800', 'P112', 'P91', 'P97', 'P83', 'P98',
            'P100', 'P22', 'P111', 'P115', 'P121', 'P23', 'P68', 'P924', 'P25', 'P17',
            'P30', 'P125', 'P26', 'P101', 'P104', 'P769', 'P72', 'P63', 'P64', 'P67',
            'P33', 'P107', 'P31', 'P122', 'P1', 'P109', 'P2', 'P24', 'P110', 'P34',
            'P6', 'P3', 'P757', 'P84', 'P758', 'P116', 'P32', 'P87', 'P119', 'P71',
            'P108', 'P106', 'P102', 'P27', 'P28', 'P29', 'P103', 'P134', 'P136', 'P174',
            'P962', 'P195', 'P201', 'P272', 'P482', 'P295', 'P302', 'P443', 'P756', 'P94', # adjacent (5 rows)
            'P755', 'P95', 'P763', 'P767', 'P768', 'P771', 'P772', 'P775', 'P777', 'P776',
            'P779', 'P783', 'P785', 'P786', 'P784', 'P787', 'P788', 'P791', 'P797', 'P794',
            'P795', 'P798', 'P796', 'P807', 'P805', 'P806', 'P113', 'P92', 'P374', 'P375',
            'P310', 'P320', 'P319', 'PU3', 'P322', 'P323', 'PU2', 'PU1',
            'P378', 'PU4', 'P376', 'PU5', 'P316',
            ]
  region2 = [# 3 boundary edges # needs to be broken up into two regions
            'P19', # r2_r1
            'P996', # r2_r3
            'P53', # r2_r3
            'P218', 'P840', 'P219', 'P761', 'P220', 'P993', 'P127', 'P11', 'P223', 'P766',
            'P927', 'P9', 'P21', 'P991', 'P7', 'P8', 'P819', 'P989', 'P970', 'P990',
            'P789', 'P228', 'P813', 'P251', 'P815', 'P141', 'P759', 'P126', 'P132', 'P35',
            'P995', 'P817', 'P973', 'P811', 'P225', 'P809', 'P20', 'P230', 'P231', 
            'P233', 'P10', 'P234', 'P235', 'P13', # adjacent
            'P150', 'P55',  'P781', 'P808', # adjacent
            'P258', 'P90', 'P129', 'P259', 'P89', 'P124', 'P130', # adjacent
            'P147', 'P988', 'P961', 'P1000', 'P37', 'P987', 'P983', 'P984', 'P52', 'P724',
            'P725', 'P982', 'P166', 'P65', 'P821', 'P978', 'P249', 'P976', 'P974', 'P963',
            'P16', 'P184', 'P915', 'P966', 'P971', 'P255', 'P975', 'P972', 'P986', 'P5',
            'P69', 'P144', 'P211', 'P752', 'P753', 'P841', 'P760', 'P177', 'P965', 'P139',
            'P998', 'P968', 'P969', 'P994', 'P997', 'P992', 'P138', 'P999', 'P159', 'P964',
            'P131', 'P967', 'P12', 'P14', 'P15', 'P128', 'P70',
            # region 5
            'P161', 'P981', 'P822', 'P309',
            ]
  region3 = [# 2 boudary edge
            'P397', # r3_r2
            'P424', # r3_r2
            'P140', 'P372', 'P42', 'P633', 'P38', 'P847', 'P596', 'P609', 'P846', 'P852',
            'P40', 'P844', 'P850', 'P158', 'P823', 'P826', 'P155', 'P851', 'P1045', 'P43',
            'P44', 'P853', 'P51', 'P866', 'P46', 'P157', 'P827', 'P154', 'P830', 'P160',
            'P831', 'P162', 'P246', 'P156', 'P248', 'P266', 'P57', 'P492', 'P58', 'P245',
            'P264', 'P859', 'P49', 'P48', 'P861', 'P398', 'PU8', 'P399', 'PU9', 'P403', 'P409', 'P402', # adjacent
            'P871', 'P855', 'P858', 'P842', 'PU10', 'PU11', 'P410',  'P1044'
            ]
  return region0, region1, region2, region3