## StateMod Direct Diversion Rights File (.ddr)

In [1]:
import os

import numpy as np
import pandas as pd
from SALib.sample import latin
from joblib import Parallel, delayed


In [2]:
def populate_dict(line, d, column_widths):
    """Populate the input dictionary with values from each line based on column widths."""
    
    # strip newline and split on spaces
    stripped = line.strip()

    start_index = 0

    d["id"].append(stripped[start_index : column_widths["id"]])
    start_index += column_widths["id"]

    d["name"].append(stripped[start_index : start_index + column_widths["name"]])
    start_index += column_widths["name"]

    d["struct"].append(stripped[start_index : start_index + column_widths["struct"]])
    start_index += column_widths["struct"]

    d["admin"].append(stripped[start_index : start_index + column_widths["admin"]])
    start_index += column_widths["admin"]

    d["decree"].append(stripped[start_index : start_index + column_widths["decree"]])
    start_index += column_widths["decree"]

    d["on_off"].append(stripped[start_index : start_index + column_widths["on_off"]])

    return d


def prep_data(template_file, column_list, value_columns, comment="#"):
    """Ingest statemod ddm template file and format into a data frame."""
    
    # dictionary to hold values for each field
    d = {"id": [], 
         "name": [], 
         "struct": [], 
         "admin": [], 
         "decree": [], 
         "on_off": []}
    
    # empty string to hold header data
    header = ""

    capture = False
    with open(template_file) as template:

        for idx, line in enumerate(template):

            if capture:
                
                # populate dictionary with data content
                d = populate_dict(line, d, column_widths)

            else:

                # passes all commented lines in header
                if line[0] != comment:

                    d = populate_dict(line, d, column_widths)
                    capture = True

                else:

                    # store any header and preliminary lines to use in restoration
                    header += line


    # convert dictionary to a pandas data frame  
    df = pd.DataFrame(d)

    # adjust types for modification
    df["decree"] = df["decree"].astype(np.float64)
    df["on_off"] = df["on_off"].astype(np.int)
                    
    return df, header


def set_alignment(value, n_spaces=0, align="left"):
    """Set left or right alignment."""
    
    # set align attribute to lower case
    lowercase_align = align.casefold()
    
    if lowercase_align == "left":
        return f"{value}{n_spaces * ' '}"

    elif lowercase_align == "right":
        return f"{n_spaces * ' '}{value}"

    else:
        raise ValueError(f"Choice for alignment '{align}' not supported.  Must be 'left' or 'right'.")


def pad_with_spaces(value, expected_width, align="left"):
    """Pad a string with the number of spaces specified by the user."""
    
    # strip all whitespace padding from value
    value_stripped = value.strip()
    
    # get length of data in field
    field_length = len(value_stripped)

    if field_length <= expected_width:
        
        # get the number of missing spaces
        missing_spaces = expected_width - field_length
        
        return set_alignment(value=value_stripped, 
                             n_spaces=missing_spaces, 
                             align=align)

    else:
        raise AssertionError(f"Column width '{field_length}' exceeds the expected width '{expected_width}'")

        
def add_zero_padding(x, precision=2):
    """Some fields expect zero padding that gets rounded off by pandas.  
    This method adds that back in.
    
    """
    
    # get length of precision
    x_length = len(x.split(".")[-1])
    
    if x_length < precision:
        
        # determine the number of zeros needed
        n_zeros = precision - x_length
        
        return f"{x}{'0' * n_zeros}"
    else:
        return x


def workhorse(municipal_struct, 
              standard_struct, 
              sample, 
              sample_id, 
              output_dir, 
              column_widths, 
              data_df, 
              header):
    
    # break out values from sample
    municipal_decree = np.around(sample[problem["names"].index("municipal_decree")], 2)
    standard_decree = np.around(sample[problem["names"].index("standard_decree")], 2)
    municipal_switch = sample[problem["names"].index("municipal_switch")]
    standard_switch = sample[problem["names"].index("standard_switch")]
    
    # copy template data frame for alteration 
    df = data_df.copy()
    
    df["struct"] = df["struct"].str.strip()

    # modify value columns associated structures based on the lhs draw
    df["decree"] = np.where(df["struct"].isin(municipal_struct), municipal_decree, df["decree"])
    df["decree"] = np.where(df["struct"].isin(standard_struct), standard_decree, df["decree"])
    df["on_off"] = np.where(df["struct"].isin(municipal_struct), int(municipal_switch), df["on_off"])
    df["on_off"] = np.where(df["struct"].isin(standard_struct), int(standard_switch), df["on_off"])

    # convert all fields to str type
    df = df.astype(str)
        
    # construct output file name
    template_basename = os.path.basename(template_file)
    template_name_parts = os.path.splitext(template_basename)
    output_file = os.path.join(output_dir, f"{template_name_parts[0]}_scenario-{scenario}_sample-{sample_id}{template_name_parts[-1]}")

    # ensure decree field is padded with two zeros
    df["decree"] = df["decree"].apply(add_zero_padding)
    
    # add formatted data to output string
    data = ""
    for idx in df.index:
        
        # adjust fields with appropriate column widths and alignment
        fid = pad_with_spaces(df["id"][idx], column_widths["id"], align="left")
        name = pad_with_spaces(df["name"][idx], column_widths["name"], align="left")
        struct = pad_with_spaces(df["struct"][idx], column_widths["struct"], align="left")
        admin = pad_with_spaces(df["admin"][idx], column_widths["admin"], align="right")
        decree = pad_with_spaces(df["decree"][idx], column_widths["decree"], align="right")
        on_off = pad_with_spaces(df["on_off"][idx], column_widths["on_off"], align="right")

        # store line to output string
        data += f"{fid}{name}{struct}{admin}{decree}{on_off}\n"

    # write output file
    with open(output_file, "w") as out:

        # write header
        out.write(header)
        
        # write data
        out.write(data)



## Setup

In [3]:
# set random seed for reproducibility
seed_value = 123

# directory where the data is stored
data_dir = "/Users/d3y010/projects/statemod/data/inputs/cm2015_StateMod/StateMod"

# template file as a source for modification
template_file = os.path.join(data_dir, "cm2015.ddr")

# directory to write modified files to
output_dir = "/Users/d3y010/Desktop/statemod"

# scenario name
scenario = "test"

# character indicating row is a comment
comment = "#"

# dictionary to hold values for each field
d = {"id": [], 
     "name": [], 
     "struct": [], 
     "admin": [], 
     "decree": [], 
     "on_off": []}

# define the column widths for the output file
column_widths = {"id": 12, 
                 "name": 24, 
                 "struct": 12, 
                 "admin": 16, 
                 "decree": 8, 
                 "on_off": 8}

# list of columns to process
column_list = ["id", "name", "struct", "admin", "decree", "on_off"]

# list of value columns that may be modified
value_columns = ["decree", "on_off"]

# struct associated with each category
municipal_struct = ["3600507", "3600642"]
standard_struct = ["3600649_D", "3600645"]



## Ingest and process template file

In [4]:
%%time

df, header = prep_data(template_file, column_list, value_columns, comment="#")


CPU times: user 5.84 ms, sys: 1.28 ms, total: 7.12 ms
Wall time: 6.2 ms


## Build LHS

In [5]:
%%time

# build our problem
problem = {
    'num_vars': 4,
    'names': ['municipal_decree', 'standard_decree', 'municipal_switch', 'standard_switch'],
    'bounds': [[10.2, 100.9],
               [45.7, 1000.0],
               [0, 1],
               [0, 1]]
}

# generate 4 files
n_samples = 4

# generate our sample so we can test
lhs = latin.sample(problem, n_samples, seed_value)

# make on_off bounds 0, 1
municipal_switch_idx = problem["names"].index("municipal_switch")
lhs[:, municipal_switch_idx] = np.around(lhs[:, municipal_switch_idx])

standard_switch_idx = problem["names"].index("standard_switch")
lhs[:, standard_switch_idx] = np.around(lhs[:, standard_switch_idx])

lhs


CPU times: user 192 µs, sys: 29 µs, total: 221 µs
Wall time: 214 µs


array([[ 49.18895889, 385.21762372,   1.        ,   1.        ],
       [ 25.99243878, 113.96569184,   0.        ,   1.        ],
       [ 66.45513087, 616.3994369 ,   0.        ,   0.        ],
       [ 88.16962565, 775.66265418,   1.        ,   0.        ]])

## Run all LHS in parallel

In [6]:
%%time

results = Parallel(n_jobs=-1, backend="loky")(delayed(workhorse)(municipal_struct, 
                                                                 standard_struct, 
                                                                 sample, 
                                                                 sample_id, 
                                                                 output_dir, 
                                                                 column_widths, 
                                                                 df, 
                                                                 header) for sample_id, sample in enumerate(lhs))


CPU times: user 37.6 ms, sys: 46.2 ms, total: 83.8 ms
Wall time: 733 ms
