## StateMod Direct Diversion Rights File (.ddr)

In [1]:
import os

import numpy as np
import pandas as pd
from SALib.sample import latin
from joblib import Parallel, delayed


## Setup

In [2]:
# set random seed for reproducibility
seed_value = 123

# directory where the data is stored
data_dir = "/Users/d3y010/projects/statemod/data/inputs/cm2015_StateMod/StateMod"

# template file as a source for modification
template_file = os.path.join(data_dir, "cm2015.ddr")

# directory to write modified files to
output_dir = "/Users/d3y010/Desktop"

# scenario name
scenario = "test"

# character indicating row is a comment
comment = "#"

# dictionary to hold values for each field
d = {"id": [], 
     "name": [], 
     "struct": [], 
     "admin": [], 
     "decree": [], 
     "on_off": []}

# define the column widths for the output file
column_widths = {"id": 12, 
                 "name": 24, 
                 "struct": 12, 
                 "admin": 16, 
                 "decree": 8, 
                 "on_off": 8}

# list of columns to process
column_list = ["id", "name", "struct", "admin", "decree", "on_off"]

# list of value columns that may be modified
value_columns = ["decree", "on_off"]

# struct associated with each category
municipal_struct = ["3600507", "3600642"]
standard_struct = ["3600649_D", "3600645"]



In [74]:
def populate_dict(line, d, column_widths):
    """Populate the input dictionary with values from each line based on column widths."""
    
    # strip newline and split on spaces
    stripped = line.strip()

    start_index = 0

    d["id"].append(stripped[start_index : column_widths["id"]])
    start_index += column_widths["id"]

    d["name"].append(stripped[start_index : start_index + column_widths["name"]])
    start_index += column_widths["name"]

    d["struct"].append(stripped[start_index : start_index + column_widths["struct"]])
    start_index += column_widths["struct"]

    d["admin"].append(stripped[start_index : start_index + column_widths["admin"]])
    start_index += column_widths["admin"]

    d["decree"].append(stripped[start_index : start_index + column_widths["decree"]])
    start_index += column_widths["decree"]

    d["on_off"].append(stripped[start_index : start_index + column_widths["on_off"]])

    return d


def prep_data(template_file, column_list, value_columns, comment="#"):
    """Ingest statemod ddm template file and format into a data frame."""
    
    # dictionary to hold values for each field
    d = {"id": [], 
         "name": [], 
         "struct": [], 
         "admin": [], 
         "decree": [], 
         "on_off": []}
    
    # empty string to hold header data
    header = ""

    capture = False
    with open(template_file) as template:

        for idx, line in enumerate(template):

            if capture:
                
                # populate dictionary with data content
                d = populate_dict(line, d, column_widths)

            else:

                # passes all commented lines in header
                if line[0] != comment:

                    d = populate_dict(line, d, column_widths)
                    capture = True

                else:

                    # store any header and preliminary lines to use in restoration
                    header += line


    # convert dictionary to a pandas data frame  
    df = pd.DataFrame(d)

    # adjust types for modification
    df["decree"] = df["decree"].astype(np.float64)
    df["on_off"] = df["on_off"].astype(np.int)
                    
    return df, header


def workhorse(municipal_struct, 
              standard_struct, 
              sample, 
              sample_id, 
              output_dir, 
              column_widths, 
              data_df, 
              header):
    
    # break out values from sample
    municipal_decree = sample[problem["names"].index("municipal_decree")]
    standard_decree = sample[problem["names"].index("standard_decree")]
    municipal_switch = sample[problem["names"].index("municipal_switch")]
    standard_switch = sample[problem["names"].index("standard_switch")]
    
    # copy template data frame for alteration 
    df = data_df.copy()

    # modify value columns associated structures based on the lhs draw
    df["decree"] = np.where(df["struct"].isin(municipal_struct), municipal_decree, df["decree"])
    df["decree"] = np.where(df["struct"].isin(standard_struct), standard_decree, df["decree"])
    df["on_off"] = np.where(df["struct"].isin(municipal_struct), int(municipal_switch), df["on_off"])
    df["on_off"] = np.where(df["struct"].isin(standard_struct), int(standard_switch), df["on_off"])

    # convert all fields to str type
    df = df.astype(str)
    
    # construct output file name
    template_basename = os.path.basename(template_file)
    template_name_parts = os.path.splitext(template_basename)
    output_file = os.path.join(output_dir, f"{template_name_parts[0]}_scenario-{scenario}_sample-{sample_id}{template_name_parts[-1]}")

    # write output file
    with open(output_file, "w") as out:

        # write header
        out.write(header)

        # write altered content
        df.to_string(buf=out,
                     col_space=column_widths,
                     header=False,
                     index=False,
                     formatters={'name':'{{:<{}s}}'.format(df['name'].str.len().max()-1).format})



## Ingest and process template file

In [75]:
%%time

df, header = prep_data(template_file, column_list, value_columns, comment="#")

df


CPU times: user 8.62 ms, sys: 1.74 ms, total: 10.4 ms
Wall time: 8.89 ms


Unnamed: 0,id,name,struct,admin,decree,on_off
0,3600507.01,ALBER DITCH,3600507,12950.00000,1.00,1
1,3600507.02,ALBER DITCH,3600507,13301.00000,1.37,1
2,3600507.03,ALBER DITCH,3600507,17806.00000,1.00,1
3,3600507.04,ALBER DITCH,3600507,22964.21102,2.13,1
4,3600507.05,ALBER DITCH,3600507,30184.23162,5.50,1
...,...,...,...,...,...,...
1714,72_AMC001.01,72_AMC001 Colorado River,72_AMC001,1.00000,0.00,1
1715,72_GJMun.01,City of Grand Jnct,72_GJMun,1.00000,999.00,1
1716,72_UWCD.01,Ute Water Treatment Plan,72_UWCD,99999.99999,0.00,0
1717,ChevDem.01,Chevron Demand Node,ChevDem,99999.99999,0.00,1


In [76]:
df.describe()

Unnamed: 0,decree,on_off
count,1719.0,1719.0
mean,92.870378,0.997091
std,272.033733,68.11972
min,0.0,-1996.0
25%,2.0,1.0
50%,4.68,1.0
75%,13.615,1.0
max,1726.0,1997.0


In [77]:
df["on_off"].unique()

array([    1,     0,  1997, -1996])

In [78]:
df.loc[~df["on_off"].isin([0, 1])]

Unnamed: 0,id,name,struct,admin,decree,on_off
1547,7200813BP.01,OMID Bypass,7200813BP,30895.23492,1100.0,1997
1548,7200813BP.02,OMID Bypass,7200813BP,999998.0,1100.0,-1996


In [79]:
df.head(25)

Unnamed: 0,id,name,struct,admin,decree,on_off
0,3600507.01,ALBER DITCH,3600507,12950.0,1.0,1
1,3600507.02,ALBER DITCH,3600507,13301.0,1.37,1
2,3600507.03,ALBER DITCH,3600507,17806.0,1.0,1
3,3600507.04,ALBER DITCH,3600507,22964.21102,2.13,1
4,3600507.05,ALBER DITCH,3600507,30184.23162,5.5,1
5,3600603.01,ED WARD DITCH,3600603,19239.0,3.5,1
6,3600603.02,ED WARD DITCH,3600603,20228.0,1.25,1
7,3600603.03,ED WARD DITCH,3600603,32075.20228,6.21,1
8,3600603.04,ED WARD DITCH,3600603,32075.25707,6.02,1
9,3600606.01,ELLIOTT CREEK FEEDER,3600606,31258.0,90.0,1


## Build LHS

In [80]:
%%time

# build our problem
problem = {
    'num_vars': 4,
    'names': ['municipal_decree', 'standard_decree', 'municipal_switch', 'standard_switch'],
    'bounds': [[10.2, 100.9],
               [45.7, 1000.0],
               [0, 1],
               [0, 1]]
}

# generate 4 files
n_samples = 4

# generate our sample so we can test
lhs = latin.sample(problem, n_samples, seed_value)

# make on_off bounds 0, 1
municipal_switch_idx = problem["names"].index("municipal_switch")
lhs[:, municipal_switch_idx] = np.around(lhs[:, municipal_switch_idx])

standard_switch_idx = problem["names"].index("standard_switch")
lhs[:, standard_switch_idx] = np.around(lhs[:, standard_switch_idx])

lhs


CPU times: user 230 µs, sys: 33 µs, total: 263 µs
Wall time: 238 µs


array([[ 49.18895889, 385.21762372,   1.        ,   1.        ],
       [ 25.99243878, 113.96569184,   0.        ,   1.        ],
       [ 66.45513087, 616.3994369 ,   0.        ,   0.        ],
       [ 88.16962565, 775.66265418,   1.        ,   0.        ]])

## Run all LHS in parallel

In [81]:
%%time

results = Parallel(n_jobs=-1, backend="loky")(delayed(workhorse)(municipal_struct, standard_struct, sample, 
                                                                 sample_id, output_dir, column_widths, df, header) 
                                                                 for sample_id, sample in enumerate(lhs))



CPU times: user 12.2 ms, sys: 2.17 ms, total: 14.3 ms
Wall time: 289 ms
