
Copyright (c) 2018 Leland Stanford Junior University
Copyright (c) 2018 The Regents of the University of California

This file is part of pelicun.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

You should have received a copy of the BSD 3-Clause License along with
pelicun. If not, see <http://www.opensource.org/licenses/>.

Contributors:
Adam Zsarnóczay

In [None]:
"""
This module has classes and methods that handle file input and output.

.. rubric:: Contents

.. autosummary::

    read_SimCenter_DL_input
    read_SimCenter_EDP_input
    read_population_distribution
    read_component_DL_data
    convert_P58_data_to_json
    create_HAZUS_EQ_json_files
    create_HAZUS_HU_json_files
    write_SimCenter_DL_output
    write_SimCenter_DM_output
    write_SimCenter_DV_output

"""

In [None]:
# from .base import *

In [None]:
# import json
import xml.etree.ElementTree as ET
from distutils.util import strtobool
from copy import deepcopy

In [None]:
import warnings

In [None]:
convert_dv_name = {
    'DV_rec_cost': 'Reconstruction Cost',
    'DV_rec_time': 'Reconstruction Time',
    'DV_injuries_0': 'Injuries lvl. 1',
    'DV_injuries_1': 'Injuries lvl. 2',
    'DV_injuries_2': 'Injuries lvl. 3',
    'DV_injuries_3': 'Injuries lvl. 4',
    'DV_red_tag': 'Red Tag ',
}

In [None]:
# this is a convenience function for converting strings to float or None
def float_or_None(string):
    try:
        res = float(string)
        return res
    except:
        return None

In [None]:
def int_or_None(string):
    try:
        res = int(string)
        return res
    except:
        return None

In [None]:
def process_loc(string, stories):
    try:
        res = int(string)
        return [res, ]
    except:
        if "-" in string:
            s_low, s_high = string.split('-')
            s_low = process_loc(s_low, stories)
            s_high = process_loc(s_high, stories)
            return list(range(s_low[0], s_high[0]+1))
        elif string == "all":
            return list(range(1, stories+1))
        elif string == "top":
            return [stories,]
        elif string == "roof":
            return [stories,]
        else:
            return None

In [None]:
def read_SimCenter_DL_input(input_path, assessment_type='P58', verbose=False):
    """
    Read the damage and loss input information from a json file.

    The SimCenter in the function name refers to having specific fields
    available in the file. Such a file is automatically prepared by the
    SimCenter PBE Application, but it can also be easily manipulated or created
    manually. The accepted input fields are explained in detail in the Input
    section of the documentation.

    Parameters
    ----------
    input_path: string
        Location of the DL input json file.
    assessment_type: {'P58', 'HAZUS_EQ', 'HAZUS_HU'}
        Tailors the warnings and verifications towards the type of assessment.
        default: 'P58'.
    verbose: boolean
        If True, the function echoes the information read from the file. This
        can be useful to ensure that the information in the file is properly
        read by the method.

    Returns
    -------
    data: dict
        A dictionary with all the damage and loss data.

    """

    AT = assessment_type

    with open(input_path, 'r') as f:
        jd = json.load(f)

    # get the data required for DL
    data = dict([(label, dict()) for label in [
        'general', 'units', 'components', 'collapse_modes',
        'decision_variables', 'dependencies', 'data_sources',
    ]])

    # create a few internal variables for convenience
    DL_input = jd['DamageAndLoss']

    response = DL_input.get('ResponseModel',None)
    if response is not None:
        res_description = response.get('ResponseDescription', None)
        det_lims = response.get('DetectionLimits', None)
        uncertainty = response.get('AdditionalUncertainty', None)

    else:
        res_description = None
        det_lims = None
        uncertainty = None

    damage = DL_input.get('DamageModel',None)
    if damage is not None:
        irrep_res_drift = damage.get('IrrepairableResidualDrift', None)
        coll_prob = damage.get('CollapseProbability', None)
        coll_lims = damage.get('CollapseLimits', None)
        design_lvl = damage.get('DesignLevel', None)
        struct_type = damage.get('StructureType', None)

    else:
        irrep_res_drift = None
        coll_prob = None
        coll_lims = None
        design_lvl = None
        struct_type = None

    loss = DL_input.get('LossModel', None)
    if loss is not None:
        repl_cost = loss.get('ReplacementCost', None)
        repl_time = loss.get('ReplacementTime', None)
        dec_vars = loss.get('DecisionVariables', None)
        inhabitants = loss.get('Inhabitants', None)

    else:
        repl_cost = None
        repl_time = None
        dec_vars = None
        inhabitants = None

    depends = DL_input.get('Dependencies', None)
    components = DL_input.get('Components', None)
    coll_modes = DL_input.get('CollapseModes', None)

    # decision variables of interest
    if dec_vars is not None:
        for target_att, source_att in [ ['injuries', 'Injuries'],
                                        ['rec_cost', 'ReconstructionCost'],
                                        ['rec_time', 'ReconstructionTime'],
                                        ['red_tag', 'RedTag'], ]:
            val = bool(dec_vars.get(source_att, False))
            data['decision_variables'].update({target_att: val})
    else:
        warnings.warn(UserWarning(
            "No decision variables specified in the input file. Assuming that "
            "only reconstruction cost and time needs to be calculated."))
        data['decision_variables'].update({ 'injuries': False,
                                            'rec_cost': True,
                                            'rec_time': True,})
        # red tag is only used by P58 now
        if AT == 'P58':
            data['decision_variables'].update({'red_tag': False})

    dec_vars = data['decision_variables']

    # data sources
    # check if the user specified custom data sources
    path_CMP_data = DL_input.get("ComponentDataFolder", "")

    if inhabitants is not None:
        path_POP_data = inhabitants.get("PopulationDataFile", "")
    else:
        path_POP_data = ""


    # if not, use the default location
    default_data_name = {
        'P58'     : 'FEMA P58 first edition',
        'HAZUS_EQ': 'HAZUS MH 2.1 earthquake',
        'HAZUS_HU': 'HAZUS MH 2.1 hurricane'
    }

    if path_CMP_data == "":
        warnings.warn(UserWarning(
            "The component database is not specified; using the default "
            "{} data.".format(default_data_name[AT])
        ))
        path_CMP_data = pelicun_path
        if AT == 'P58':
            path_CMP_data += '/resources/FEMA P58 first edition/DL json/'
        elif AT == 'HAZUS_EQ':
            path_CMP_data += '/resources/HAZUS MH 2.1 earthquake/DL json/'
        elif AT == 'HAZUS_HU':
            path_CMP_data += '/resources/HAZUS MH 2.1 hurricane/DL json/'
    data['data_sources'].update({'path_CMP_data': path_CMP_data})

    # The population data is only needed if we are interested in injuries
    if data['decision_variables']['injuries']:
        if path_POP_data == "":
            warnings.warn(UserWarning(
                "The population distribution is not specified; using the default "
                "{} data.".format(default_data_name[AT])
            ))
            path_POP_data = pelicun_path
            if AT == 'P58':
                path_POP_data += '/resources/FEMA P58 first edition/population.json'
            elif AT == 'HAZUS_EQ':
                path_POP_data += '/resources/HAZUS MH 2.1 earthquake/population.json'
        data['data_sources'].update({'path_POP_data': path_POP_data})

    # general information
    GI = jd.get("GeneralInformation", None)

    # units
    if (GI is not None) and ('units' in GI.keys()):
        for key, value in GI['units'].items():
            if value == 'in':
                value = 'inch'
            if value in globals().keys():
                data['units'].update({key: globals()[value]})
            else:
                warnings.warn(UserWarning(
                    "Unknown {} unit: {}".format(key, value)
                ))
        if 'length' in data['units'].keys():
            data['units'].update({
                'area': data['units']['length']**2.,
                'volume': data['units']['length']**3.
            })
            if 'speed' not in data['units'].keys():
                data['units'].update({
                    'speed': data['units']['length']})
            if 'acceleration' not in data['units'].keys():
                data['units'].update({
                    #'acceleration': 1.0 })
                    'acceleration': data['units']['length']})
    else:
        warnings.warn(UserWarning(
            "No units were specified in the input file. Standard units are "
            "assumed."))
        data['units'].update({
            'force': globals()['N'],
            'length': globals()['m'],
            'area': globals()['m2'],
            'volume': globals()['m3'],
            'speed': globals()['mps'],
            'acceleration': globals()['mps2'],
        })

    # other attributes that can be used by a P58 assessment
    if AT == 'P58':
        for target_att, source_att, f_conv, unit_kind, dv_req in [
            ['plan_area', 'planArea', float, 'area', 'injuries'],
            ['stories', 'stories', int, '', 'all'],
            # The following lines are commented out for now, because we do not
            # use these pieces of data anyway.
            #['building_type', 'type', str, ''],
            #['height', 'height', float, 'length'],
            #['year_built', 'year', int, ''],
        ]:
            if (GI is not None) and (source_att in GI.keys()):
                if unit_kind != '':
                    f_unit = data['units'][unit_kind]
                else:
                    f_unit = 1
                att_value = f_conv(GI[source_att]) * f_unit
                data['general'].update({target_att: att_value})
            else:
                if (dv_req != '') and ((dv_req == 'all') or dec_vars[dv_req]):
                    raise ValueError(
                        "{} has to be specified in the DL input file to "
                        "estimate {} decision variable(s).".format(source_att,
                                                                   dv_req))

    # is this a coupled assessment?
    if res_description is not None:
        data['general'].update({'coupled_assessment':
                            res_description.get('CoupledAssessment', False)})
    else:
        data['general'].update({'coupled_assessment': False})

    # components
    # Having components defined is not necessary, but if a component is defined
    # then all of its attributes need to be specified. Note that the required
    # set of attributes depends on the type of assessment.
    if components is not None:
        for fg_id, frag_group in components.items():
            if AT == 'P58':
                # TODO: allow location and direction inputs with '-' in them
                comp_data = {
                    'locations'   : [],
                    'directions'  : [],
                    'quantities'  : [],
                    'unit'        : [],
                    'distribution': [],
                    'cov'         : [],
                    'csg_weights':  [],
                }

                for comp in frag_group:
                    locs = []
                    for loc_ in comp['location'].split(','):
                        for l in process_loc(loc_, data['general']['stories']):
                            locs.append(l)
                    locs.sort()

                    dirs = sorted([int_or_None(dir_)
                                   for dir_ in comp['direction'].split(',')])
                    qnts = [float(qnt)
                            for qnt in comp['median_quantity'].split(',')]
                    csg_weights = (qnts / np.sum(qnts)).tolist()
                    qnts = np.sum(qnts)

                    pg_count = len(locs) * len(dirs)

                    comp_data['locations'] = (comp_data['locations'] +
                                               [l for l in locs for d in dirs])
                    comp_data['directions'] = (comp_data['directions'] +
                                              dirs * len(locs))

                    unit = comp['unit']
                    if unit not in globals().keys():
                        raise ValueError(
                            "Unknown unit for component {}: {}".format(fg_id,
                                                                       unit))
                    for i in range(pg_count):
                        comp_data['quantities'].append(qnts)
                        comp_data['csg_weights'].append(csg_weights)
                        comp_data['unit'].append(unit)
                        comp_data['distribution'].append(comp['distribution'])
                        comp_data['cov'].append(comp.get('cov', None))

                sorted_ids = np.argsort(comp_data['locations'])
                for key in ['locations', 'directions', 'quantities',
                            'csg_weights', 'distribution', 'cov']:
                    comp_data[key] = [comp_data[key][s_id] for s_id in sorted_ids]

                if len(set(comp_data['unit'])) != 1:
                    raise ValueError(
                        "Multiple types of units specified for fragility group "
                        "{}. Make sure that every component group in a "
                        "fragility group is defined using the same "
                        "unit.".format(fg_id))
                comp_data['unit'] = comp_data['unit'][0]

            elif AT.startswith('HAZUS'):
                comp_data = {
                    'locations'    : [1, ],
                    'directions'   : [1, ],
                    'quantities'  : [1, ],
                    'unit'        : 'ea',
                    'distribution': ['N/A',],
                    'cov'         : [None,],
                    'csg_weights' : [[1.0,],]
                }

                # some basic pre-processing
                # sort the dirs and their weights to have better structured
                # matrices later
                #dir_order = np.argsort(comp_data['directions'])
                #comp_data['directions'] = [comp_data['directions'][d_i] for d_i
                #                     in dir_order]

                # get the location(s) of components based on non-zero quantities
                #comp_data.update({
                #    'locations': (np.where(comp_data['quantities'] > 0.)[
                #                      0] + 1).tolist()
                #})
                # remove the zeros from the quantities
                #nonzero = comp_data['quantities'] > 0.
                #comp_data['quantities'] = comp_data['quantities'][
                #    nonzero].tolist()

                # scale the quantities according to the specified unit

                # store the component data
            data['components'].update({fg_id: comp_data})
    else:
        warnings.warn(UserWarning(
            "No components were defined in the input file."))


    # collapse modes
    if AT == 'P58':
        # Having collapse modes defined is not necessary, but if a collapse mode
        # is defined, then all of its attributes need to be specified.
        if coll_modes is not None:
            for coll_mode in coll_modes:
                cm_data = {
                    'w'            : float(coll_mode['weight']),
                    'injuries'     : [float(inj) for inj in
                                      coll_mode['injuries'].split(',')],
                    'affected_area': [float(cfar) for cfar in
                                      coll_mode['affected_area'].split(',')],
                }
                if len(cm_data['affected_area']) == 1:
                    cm_data['affected_area'] = (np.ones(data['general']['stories'])*cm_data['affected_area']).tolist()
                if len(cm_data['injuries']) == 1:
                    cm_data['injuries'] = (np.ones(data['general']['stories'])*cm_data['injuries']).tolist()
                data['collapse_modes'].update({coll_mode['name']: cm_data})
        else:
            warnings.warn(UserWarning(
                "No collapse modes were defined in the input file."))

    # the number of realizations has to be specified in the file
    if res_description is not None:
        realizations = res_description.get("Realizations", None)
        if realizations is not None:
            data['general'].update({'realizations': int(realizations)})
    else:
        raise ValueError(
            "Number of realizations is not specified in the input file.")

    EDP_units = dict(
        # PID is not here because it is unitless
        PFA = 'acceleration',
        PWS = 'speed'
    )
    if AT in ['P58', 'HAZUS_EQ']:
        EDP_keys = ['PID', 'PFA']
    elif AT in ['HAZUS_HU']:
        EDP_keys = ['PWS', ]

    # response model info ------------------------------------------------------
    if response is None:
        warnings.warn(UserWarning(
            "Response model characteristics were not defined in the input "
            "file"))

    # detection limits
    if ((response is not None) and (det_lims is not None)):
        data['general'].update({
            'detection_limits':
                dict([(key, float_or_None(value)) for key, value in
                      det_lims.items()])})
        DGDL = data['general']['detection_limits']
        # scale the limits by the units
        for EDP_kind, value in DGDL.items():
            if (EDP_kind in EDP_units.keys()) and (value is not None):
                f_EDP = data['units'][EDP_units[EDP_kind]]
                DGDL[EDP_kind] = DGDL[EDP_kind] * f_EDP
    else:
        warnings.warn(UserWarning(
            "EDP detection limits were not defined in the input file. "
            "Assuming no detection limits."))

        data['general'].update({'detection_limits':{}})
    # make sure that PID and PFA detection limits are initialized
    for key in EDP_keys:
        if key not in data['general']['detection_limits'].keys():
            data['general']['detection_limits'].update({key: None})

    # response description
    if ((response is not None) and (res_description is not None)):
        #TODO: move the collapse-related data to another field
        data['general'].update({'response': {
            'EDP_distribution': res_description.get('EDP_Distribution',
                                                    'lognormal'),
            'EDP_dist_basis':   res_description.get('BasisOfEDP_Distribution',
                                                    'all results')}})
    else:
        warnings.warn(UserWarning(
            "EDP estimation method was not defined in the input file. All EDP "
            "samples are used to define a multivariate lognormal EDP "
            "distribution."))

        data['general'].update({'response': {
            'EDP_distribution': 'lognormal',
            'EDP_dist_basis'  : 'all results'}})

    # additional uncertainty
    if ((response is not None) and (uncertainty is not None)):
        data['general'].update({
            'added_uncertainty': {
                'beta_gm': float_or_None(uncertainty['GroundMotion']),
                'beta_m' : float_or_None(uncertainty['Modeling'])}})
    else:
        warnings.warn(UserWarning(
            "No additional uncertainties were defined in the input file. "
            "Assuming that EDPs already include all ground motion and model "
            "uncertainty."))
        data['general'].update({
            'added_uncertainty': {
                'beta_gm': 0.0001,
                'beta_m': 0.0001
            }
        })

    # damage model info --------------------------------------------------------
    if damage is None:
        if AT == 'P58':
            warnings.warn(UserWarning(
                "Damage model characteristics were not defined in the "
                "input file"))
        elif AT.startswith('HAZUS'):
            pass

    # P58-specific things
    if AT == 'P58':
        # EDP limits for collapse
        if ((damage is not None) and (coll_lims is not None)):
            # load the limits
            data['general'].update({
                'collapse_limits':
                    dict([(key, float_or_None(value)) for key, value
                          in coll_lims.items()])})

            # scale the limits according to their units
            DGCL = data['general']['collapse_limits']
            for EDP_kind, value in DGCL.items():
                if (EDP_kind in EDP_units.keys()) and (value is not None):
                    f_EDP = data['units'][EDP_units[EDP_kind]]
                    DGCL[EDP_kind] = DGCL[EDP_kind] * f_EDP
        else:
            warnings.warn(UserWarning(
                "Collapse EDP limits were not defined in the input file. "
                "No EDP limits are assumed."))

            data['general'].update({'collapse_limits': {}})

        # make sure that PID and PFA collapse limits are initialized
        for key in EDP_keys:
            if key not in data['general']['collapse_limits'].keys():
                data['general']['collapse_limits'].update({key: None})

        # irrepairable drift
        if ((damage is not None) and (irrep_res_drift is not None)):
            data['general'].update({
                'irrepairable_res_drift':
                    dict([(key, float_or_None(value)) for key, value in
                          irrep_res_drift.items()])})
            # TODO: move this in the irrepairable part of general
            yield_drift = irrep_res_drift.get("YieldDriftRatio", None)
            if yield_drift is not None:
                data['general'].update({
                    'yield_drift': float_or_None(yield_drift)})
            elif ((data['decision_variables']['rec_cost']) or
                  (data['decision_variables']['rec_time'])):
                warnings.warn(UserWarning(
                    "Yield drift ratio was not defined in the input file. "
                    "Assuming a yield drift ratio of 0.01 radian."))
                data['general'].update({'yield_drift': 0.01})

        elif ((data['decision_variables']['rec_cost']) or
              (data['decision_variables']['rec_time'])):
            warnings.warn(UserWarning(
                "Residual drift limits corresponding to irrepairable "
                "damage were not defined in the input file. We assume that "
                "damage is repairable regardless of the residual drift."))
            # we might need to have a default yield drift here

        # collapse probability
        if 'response' not in data['general'].keys():
            data['general'].update({'response': {}})
        if ((damage is not None) and (coll_prob is not None)):
            data['general']['response'].update({
                'coll_prob'   : coll_prob.get('Value',
                                                    'estimated'),
                'CP_est_basis': coll_prob.get('BasisOfEstimate',
                                                    'raw EDP')})
            if data['general']['response']['coll_prob'] != 'estimated':
                data['general']['response']['coll_prob'] = \
                    float_or_None(data['general']['response']['coll_prob'])
        else:
            warnings.warn(UserWarning(
                "Collapse probability estimation method was not defined in the "
                "input file. Collapse probability is estimated using raw EDP "
                "samples."))
            data['general']['response'].update({
                'coll_prob'       : 'estimated',
                'CP_est_basis'    : 'raw EDP'})

    # loss model info ----------------------------------------------------------
    if loss is None:
        warnings.warn(UserWarning(
            "Loss model characteristics were not defined in the input file"))

    # replacement cost
    if ((loss is not None) and (repl_cost is not None)):
        data['general'].update({
            'replacement_cost': float_or_None(repl_cost)})
    elif data['decision_variables']['rec_cost']:
        if AT == 'P58':
            warnings.warn(UserWarning(
                "Building replacement cost was not defined in the "
                "input file."))
        elif AT.startswith('HAZUS'):
            raise ValueError(
                "Building replacement cost was not defined in the input "
                "file.")

    # replacement time
    if ((loss is not None) and (repl_time is not None)):
        data['general'].update({
            'replacement_time': float_or_None(repl_time)})
    elif data['decision_variables']['rec_time']:
        if AT == 'P58':
            warnings.warn(UserWarning(
                "Building replacement cost was not defined in the "
                "input file."))
        elif AT.startswith('HAZUS'):
            raise ValueError(
                "Building replacement cost was not defined in the input "
                "file.")

    # inhabitants
    if data['decision_variables']['injuries']:
        if ((loss is not None) and (inhabitants is not None)):

            # occupancy type
            occupancy = inhabitants.get("OccupancyType", None)
            if occupancy is not None:
                data['general'].update({'occupancy_type': occupancy})
            else:
                raise ValueError("Occupancy type was not defined in the input "
                                 "file.")

            # peak population
            peak_pop = inhabitants.get("PeakPopulation", None)
            if peak_pop is not None:
                peak_pop = [float_or_None(pop) for pop in peak_pop.split(',')]

                # If the number of stories is specified...
                if 'stories' in data['general'].keys():
                    stories = data['general']['stories']
                    pop_in = len(peak_pop)

                    # and the population list does not provide values
                    # for every story:
                    for s in range(pop_in, stories):
                        # If only one value is provided, then it is assumed to
                        # be the population on every story.
                        if pop_in == 1:
                            peak_pop.append(peak_pop[0])

                        # Otherwise, the values are assumed to correspond to
                        # the bottom stories and the upper ones are filled with
                        # zeros. A warning message is displayed in this case.
                        else:
                            peak_pop.append(0)

                    if pop_in > 1 and pop_in != stories:
                        warnings.warn(UserWarning(
                            "Peak population was specified to some, but not all "
                            "stories. The remaining stories are assumed to have "
                            "zero population."
                        ))

                data['general'].update({'population': peak_pop})
            else:
                raise ValueError(
                    "Peak population was not defined in the input file.")
        else:
            raise ValueError(
                "Information about inhabitants was not defined in the input "
                "file.")

    # dependencies -------------------------------------------------------------

    # set defaults
    # We assume 'Independent' for all unspecified fields except for the
    # fragilities where 'per ATC recommendation' is the default setting.
    dependency_to_acronym = {
        'btw. Fragility Groups'  : 'FG',
        'btw. Performance Groups': 'PG',
        'btw. Floors'            : 'LOC',
        'btw. Directions'        : 'DIR',
        'btw. Component Groups'  : 'CSG',
        'btw. Damage States'     : 'DS',
        'Independent'            : 'IND',
        'per ATC recommendation' : 'ATC',
    }

    if AT == 'P58':

        for target_att, source_att, dv_req in [
            ['quantities', 'Quantities', ''],
            ['fragilities', 'Fragilities', ''],
            ['injuries', 'Injuries', 'injuries'],
            ['rec_costs', 'ReconstructionCosts', 'rec_cost'],
            ['rec_times', 'ReconstructionTimes', 'rec_time'],
            ['red_tags', 'RedTagProbabilities', 'red_tag'],]:

            if ((depends is not None) and (source_att in depends.keys())):
                data['dependencies'].update({
                    target_att:dependency_to_acronym[depends[source_att]]})
            elif dv_req == '' or data['decision_variables'][dv_req]:
                if target_att != 'fragilities':
                    data['dependencies'].update({target_att: 'IND'})
                else:
                    data['dependencies'].update({target_att: 'ATC'})

                warnings.warn(UserWarning(
                    "Correlation between {} was not ".format(source_att)+
                    "defined in the input file. Using default values."))

        if ((depends is not None) and ('CostAndTime' in depends.keys())):
            data['dependencies'].update({
                'cost_and_time': bool(depends['CostAndTime'])})
        elif ((data['decision_variables']['rec_cost']) or
              (data['decision_variables']['rec_time'])):
            data['dependencies'].update({'cost_and_time': False})
            warnings.warn(UserWarning(
                "Correlation between reconstruction cost and time was not "
                "defined in the input file. Using default values."))

        if ((depends is not None) and ('InjurySeverities' in depends.keys())):
            data['dependencies'].update({
                'injury_lvls': bool(depends['InjurySeverities'])})
        elif data['decision_variables']['injuries']:
            data['dependencies'].update({'injury_lvls': False})
            warnings.warn(UserWarning(
                "Correlation between injury levels was not defined in the "
                "input file. Using default values."))

    if verbose: pp.pprint(data)

    return data

In [None]:
def read_SimCenter_EDP_input(input_path, EDP_kinds=('PID', 'PFA'),
                             units = dict(PID=1., PFA=1.),
                             verbose=False):
    """
    Read the EDP input information from a text file with a tabular structure.

    The SimCenter in the function name refers to having specific columns
    available in the file. Currently, the expected formatting follows the
    output formatting of Dakota that is applied for the dakotaTab.out. When
    using pelicun with the PBE Application, such a dakotaTab.out is
    automatically generated. The Input section of the documentation provides
    more information about the expected formatting of the EDP input file.

    Parameters
    ----------
    input_path: string
        Location of the EDP input file.
    EDP_kinds: tuple of strings, default: ('PID', 'PFA')
        Collection of the kinds of EDPs in the input file. The default pair of
        'PID' and 'PFA' can be replaced or extended by any other EDPs.
    units: dict, default: {'PID':1., 'PFA':1}
        Defines the unit conversion that shall be applied to the EDP values.
    verbose: boolean
        If True, the function echoes the information read from the file. This
        can be useful to ensure that the information in the file is properly
        read by the method.

    Returns
    -------
    data: dict
        A dictionary with all the EDP data.
    """

    # initialize the data container
    data = {}

    # read the collection of EDP inputs...
    # If the file name ends with csv, we assume a standard csv file
    if input_path[-3:] == 'csv':
        EDP_raw = pd.read_csv(input_path, header=0, index_col=0)

    # otherwise, we assume that a dakota file is provided...
    else:
        # the read_csv method in pandas is sufficiently versatile to handle the
        # tabular format of dakota
        EDP_raw = pd.read_csv(input_path, sep=r'\s+', header=0, index_col=0)
    # set the index to be zero-based
    EDP_raw.index = EDP_raw.index - 1

    # search the header for EDP information
    for column in EDP_raw.columns:
        for kind in EDP_kinds:
            if kind in column:

                if kind not in data.keys():
                    data.update({kind: []})

                # extract info about the location, direction, and scenario
                info = column.split('-')

                # get the scale factor to perform unit conversion
                f_unit = units[kind]

                # store the data
                data[kind].append(dict(
                    raw_data=(EDP_raw[column].values * f_unit).tolist(),
                    location=info[2],
                    direction=info[3],
                    scenario_id=info[0]
                ))

    if verbose: pp.pprint(data)

    return data

In [None]:
def write_SimCenter_DL_output(output_path, output_df, index_name='#Num',
                              collapse_columns = True, stats_only=False):

    output_df = deepcopy(output_df)

    # if the summary flag is set, then not all realizations are returned, but
    # only the first two moments and the empirical CDF through 100 percentiles
    if stats_only:
        #output_df = output_df.describe(np.arange(1, 100)/100.)
        output_df = output_df.describe([0.1,0.5,0.9])

    # the name of the index column is replaced with the provided value
    output_df.index.name = index_name


    # multiple levels of indices are collapsed into a single level if needed
    # TODO: check for the number of levels and prepare a smarter collapse method
    if collapse_columns:
        output_df.columns = [('{}/{}'.format(s0, s1)).replace(' ', '_')
                     for s0, s1 in zip(output_df.columns.get_level_values(0),
                                       output_df.columns.get_level_values(1))]

    # write the results in a csv file
    # TODO: provide other file formats
    output_df.to_csv(output_path)

In [None]:
def write_SimCenter_DM_output(DM_file_path, DMG_df):

    # Start with the probability of being in a particular damage state.
    # Here, the damage state of the building (asset) is defined as the highest
    # damage state among the building components/component groups. This works
    # well for a HAZUS assessment, but something more sophisticated is needed
    # for a FEMA P58 assessment.

    # Determine the probability of DS exceedance by collecting the DS from all
    # components and assigning ones to all lower damage states.
    DMG_agg = DMG_df.T.groupby('DS').sum().T
    DMG_agg[DMG_agg > 0.0] = DMG_agg[DMG_agg > 0.0] / DMG_agg[DMG_agg > 0.0]

    cols = DMG_agg.columns
    for i in range(len(cols)):
        filter = np.where(DMG_agg.iloc[:,i].values > 0.0)[0]
        DMG_agg.iloc[filter,idx[0:i]] = 1.0

    # The P(DS=ds) probability is determined by subtracting consecutive DS
    # exceedance probabilites. This will not work well for a FEMA P58 assessment
    # with Damage State Groups that include multiple Damage States.
    DMG_agg_mean = DMG_agg.describe().loc['mean',:]
    DS_0 = 1.0 - DMG_agg_mean['1-1']
    for i in range(len(DMG_agg_mean.index)-1):
        DMG_agg_mean.iloc[i] = DMG_agg_mean.iloc[i] - DMG_agg_mean.iloc[i+1]

    # Add the probability of no damage for convenience.
    DMG_agg_mean['0'] = DS_0
    DMG_agg_mean = DMG_agg_mean.sort_index()

    # Save the results in the output json file
    DM = {'aggregate': {}}

    for id in DMG_agg_mean.index:
        DM['aggregate'].update({str(id): DMG_agg_mean[id]})

    # Now determine the probability of being in a damage state for individual
    # components / component assemblies...
    DMG_mean = DMG_df.describe().loc['mean',:]

    # and save the results in the output json file.
    for FG in sorted(DMG_mean.index.get_level_values('FG').unique()):
        DM.update({str(FG):{}})

        for PG in sorted(
            DMG_mean.loc[idx[FG],:].index.get_level_values('PG').unique()):
            DM[str(FG)].update({str(PG):{}})

            for DS in sorted(
                DMG_mean.loc[idx[FG],:].loc[idx[:,PG],:].index.get_level_values('DS').unique()):
                DM[str(FG)][str(PG)].update({str(DS): DMG_mean.loc[(FG,PG,DS)]})

    with open(DM_file_path, 'w') as f:
        json.dump(DM, f, indent = 2)

In [None]:
def write_SimCenter_DV_output(DV_file_path, DV_df, DV_name):

    DV_name = convert_dv_name[DV_name]

    try:
        with open(DV_file_path, 'r') as f:
            DV = json.load(f)
    except:
        DV = {}

    DV.update({DV_name: {}})

    DV_i = DV[DV_name]

    try:
        DV_tot = DV_df.sum(axis=1).describe([0.1,0.5,0.9]).drop('count')
        DV_i.update({'total':{}})
        for stat in DV_tot.index:
            DV_i['total'].update({stat: DV_tot.loc[stat]})

        DV_stats = DV_df.describe([0.1,0.5,0.9]).drop('count')
        for FG in sorted(DV_stats.columns.get_level_values('FG').unique()):
            DV_i.update({str(FG):{}})

            for PG in sorted(
                DV_stats.loc[:,idx[FG]].columns.get_level_values('PG').unique()):
                DV_i[str(FG)].update({str(PG):{}})

                for DS in sorted(
                    DV_stats.loc[:,idx[FG, PG]].columns.get_level_values('DS').unique()):
                    DV_i[str(FG)][str(PG)].update({str(DS): {}})
                    DV_stats_i = DV_stats.loc[:,(FG,PG,DS)]
                    for stat in DV_stats_i.index:
                        DV_i[str(FG)][str(PG)][str(DS)].update({
                            stat: DV_stats_i.loc[stat]})
    except:
        pass

    with open(DV_file_path, 'w') as f:
        json.dump(DV, f, indent = 2)