# Data Modeling

Playing around with Drainit models (`models/__init__.py`).

In [1]:
import json
from pathlib import Path
from typing import List

# NOTE: dataclasses not available in Python 3.6; use backported package instead:
from dataclasses import field, asdict, replace

import petl as etl
import marshmallow_dataclass
import marshmallow

from marshmallow import Schema, fields, EXCLUDE
from marshmallow_dataclass import dataclass, class_schema

In [2]:
@dataclass
class Precips:
    """Precipitation frequencies
    """
    f1: float = None
    f2: float = None
    f5: float = None
    f10: float = None
    f25: float = None
    f50: float = None
    f100: float = None
    f200: float = None
    f500: float = None
    f1000: float = None

PrecipsSchema = marshmallow_dataclass.class_schema(Precips)


@dataclass
class NaaccPoint:
    """NAACC model for a single culvert.
    
    NOTE: this is only a subset of available NAACC fields
    """

    Survey_Id: str = None # 'field_short': 'Survey_ID'
    Naacc_Culvert_Id: str = None # 'field_short': 'NAACC_ID'

    Number_Of_Culverts: int = 1 # 'field_short': 'Flags'

    Road: str = None # 'field_short': 'Rd_Name'
    Material: str = None # 'field_short': 'Culv_Mat'
    Inlet_Type: str = None # 'field_short': 'In_Type'
    Inlet_Structure_Type: str = None # 'field_short': 'In_Shape'

    Inlet_Width: float = None # 'field_short': 'In_A'
    Inlet_Height: float = None # 'field_short': 'In_B'
    Road_Fill_Height: float = None # 'field_short': 'HW'
    Slope_Percent: float = None # 'field_short': 'Slope'
    Crossing_Structure_Length: float = None # 'field_short': 'Length'
    Outlet_Structure_Type: str = None # 'field_short': 'Out_Shape'
    Outlet_Width: float = None # 'field_short': 'Out_A'
    Outlet_Height: float = None # 'field_short': 'Out_B'
    Crossing_Type: str = None # 'field_short': 'Crossing_Type'
    Crossing_Comment: str = None # 'field_short': 'Comments'

    GIS_Latitude: float = None # 'field_short': 'Lat'
    GIS_Longitude: float = None # 'field_short': 'Long'
        
    class Meta:
        unknown = EXCLUDE

NaaccPointSchema = marshmallow_dataclass.class_schema(NaaccPoint)


@dataclass
class Basin:
    """Base model for characteristics of a single basin, including
    characteristics of the outlet (point) and catchment (polygon) used for 
    analysis
    """
    # unique id field, derived from the basin outlet point; AKA the 
    # pour_point_field. For NAACC-based culvert modeling, this is the
    # NAACC Naacc_Culvert_Id field
    cid: str = None
    # group id field. non-unique ID field that indicates groups of related
    # outlets. Used primarily for NAACC-based culvert modeling, this is the
    # NAACC Survey_Id field
    gid: str = None

    # characteristics used for calculating peak flow
    area_sqkm: float = None# <area of inlet's catchment in square km>
    avg_slope_pct: float = None # <average slope of DEM in catchment>
    avg_cn: float = None # <average curve number in the catchment>
    max_fl: float = None # <maximum flow length in the catchment>
    precip_table: Precips = Precips # <basin-specific precipitation estimates>

    # geometries
    inlet_geom: str = None
    basin_geom: str = None
    
    # for recording the location of intermediate outputs
    basin_polygon_filepath: str = None
    basin_raster_filepath: str = None
    

BasinSchema = marshmallow_dataclass.class_schema(Basin)


class RainfallRasterConfig():
    """store rainfall download metadata with methods for portability
    """

    def __init__(self, path):
        self.path = Path(path)
        self.lookup_table = []

    def as_dict(self):
        return {
            'path': str(self.path),
            'lookup_table': self.lookup_table
        }

    def as_json(self, out_path):
        with open(out_path, 'w') as fp:
            json.dump(self.as_dict(), fp)

    def as_csv(self, out_path):
        etl.tocsv(etl.fromdicts(self.lookup_table), out_path)


@dataclass
class WorkflowConfig:
    """Store all parameters required for any of our model runs.
    """

    # directories
    work_dir: str = None

    # -----------------------------
    # input points (culverts or catch-basins)

    points_filepath: str = None
    points_id_fieldname: str = None
    is_naacc: bool = False
    
    # -----------------------------
    # input landscape rasters

    raster_dem_filepath: str = None
    raster_flowdir_filepath: str = None
    raster_slope_filepath: str = None
    raster_curvenumber_filepath: str = None
    raster_watershed_filepath: str = None

    # --------------------------
    # input rainfall

    precip_src_config_filepath: str = None
    precip_noaa_csv_filepath: str = None

    # --------------------------
    # outputs
    output_points_filepath: str = None
    output_basins_filepath: str = None

    # --------------------------
    # models for intermediate data

    culverts: List[NaaccPoint] = field(default_factory=list)
    basins: List[Basin] = field(default_factory=list)

    # --------------------------
    # analysis parameters
    
    area_conv_factor: float = 0.00000009290304
    leng_conv_factor: float = 1
    basins_simplify: bool = False

WorkflowConfigSchema = marshmallow_dataclass.class_schema(WorkflowConfig)

In [5]:
NAACC_HEADER_XWALK = [
    {'field_idx': 0, 'field_name': 'Survey_Id', 'field_short': 'Survey_ID' },
    {'field_idx': 35, 'field_name': 'Naacc_Culvert_Id', 'field_short': 'NAACC_ID'},
    {'field_idx': 20, 'field_name': 'GIS_Latitude', 'field_short': 'Lat', 'field_type': float},
    {'field_idx': 19, 'field_name': 'GIS_Longitude', 'field_short': 'Long', 'field_type': float},
    {'field_idx': 26, 'field_name': 'Road', 'field_short': 'Rd_Name'},
    {'field_idx': 49, 'field_name': 'Material', 'field_short': 'Culv_Mat'},
    {'field_idx': 22, 'field_name': 'Inlet_Type', 'field_short': 'In_Type'},
    {'field_idx': 44, 'field_name': 'Inlet_Structure_Type', 'field_short': 'In_Shape'},
    {'field_idx': 47, 'field_name': 'Inlet_Width', 'field_short': 'In_A', 'field_type': float},
    {'field_idx': 43, 'field_name': 'Inlet_Height', 'field_short': 'In_B', 'field_type': float},
    {'field_idx': 27, 'field_name': 'Road_Fill_Height', 'field_short': 'HW', 'field_type': float},
    {'field_idx': 61, 'field_name': 'Slope_Percent', 'field_short': 'Slope', 'field_type': float},
    {'field_idx': 39, 'field_name': 'Crossing_Structure_Length', 'field_short': 'Length', 'field_type': float},
    {'field_idx': 55, 'field_name': 'Outlet_Structure_Type', 'field_short': 'Out_Shape'},
    {'field_idx': 58, 'field_name': 'Outlet_Width', 'field_short': 'Out_A', 'field_type': float},
    {'field_idx': 54, 'field_name': 'Outlet_Height', 'field_short': 'Out_B', 'field_type': float},
    {'field_idx': 11, 'field_name': 'Crossing_Type', 'field_short': 'Crossing_Type'},
    {'field_idx': 8, 'field_name': 'Crossing_Comment', 'field_short': 'Comments'},
    {'field_idx': 24, 'field_name': 'Number_Of_Culverts', 'field_short': 'Flags', 'field_type': int}
]

In [7]:
NAACC_HEADER_LOOKUP = {i['field_name']: i['field_short'] for i in NAACC_HEADER_XWALK}

NAACC_TYPECASTS_FULLNAME = {
    i['field_name']: i['field_type'] 
    for i in 
    NAACC_HEADER_XWALK
    if 'field_type' in i.keys()
}

In [8]:
def validator(row, schema):
    r = {i[0]: i[1] for i in zip(row.flds, row)}
    #errors = schema.load(r, unknown=marshmallow.EXCLUDE)
    errors = schema.validate(r)
    #errors = {k: v for k,v in errors.items() if v[0] != 'Unknown field.'}
    if errors:
        return errors
    return None
        

In [9]:
t = etl\
    .fromcsv(r"D:\Dropbox (CivicMapper)\Projects\202004-02 Cornell Modeling\3 - Production\tool outputs\c19 baseline\C19\C19.csv")\
    .convert(NAACC_TYPECASTS_FULLNAME)
t

Survey_Id,Crossing_Code,Alignment,Aqua_Pass_Score,AOP,Approved,Bankfull_Width,Bankfull_Width_Confidence,Coordinator,Crossing_Comment,Crossing_Condition,Crossing_Span,Crossing_Type,Data_Checked_Coordinator,Database_Entry_By,Date_Data_Checked,Date_First_Entered,Date_Last_Updated,Date_Observed,Evaluation,Flag_Name,Flow_Condition,GIS_Latitude,GIS_Longitude,GPS_X_Coordinate,GPS_Y_Coordinate,GPS_Distance,Inlet_Type,Lccx,Lccy,Lccx_Moved,Lccy_Moved,Local_Id,Location_Description,Maine_Private,No_Crossing,Number_Of_Culverts,Observer,Road,Road_Fill_Height,Road_Type,Scour_Pool,State,Stream_Name,Terrestrial_Passage_Score,Tidal_Site,Town,Naacc_Culvert_Id,Armoring,Barrier_Name,Barrier_Severity,Crossing_Structure_Length,Culvert_Condition_Assess_Id,Dry_Passage,Inlet_Abutment_Height,Inlet_Grade,Inlet_Height,Inlet_Openness,Inlet_Structure_Type,Inlet_Substrate_Water_Width,Inlet_Type1,Inlet_Water_Depth,Inlet_Width,Internal_Structure,Internal_Structure_Comment,Material,NHD_HUC8_Watershed,Outlet_Drop_To_Stream_Bottom,Outlet_Drop_To_Water_Surface,Outlet_Grade,Outlet_Height,Outlet_Openness,Outlet_Structure_Type,Outlet_Substrate_Water_Width,Outlet_Water_Depth,Outlet_Width,Passage_Height,Slope_Confidence,Slope_Percent,Structure_Comment,Structure_Substrate_Matches_Stream,Substrate_Continuous,Substrate_Type,Water_Depth_Matches_Stream,Water_Velocity,County
66697,xy4216342773600046,No data,-1.0,no score - missing data,True,-1,No data,"Jastremski, Michael","Long hike in, off gated driveway",No data,No data,Inaccessible,"Jastremski, Michael",1774,2019-01-10 10:06:43.467,2019-01-10 10:01:41.81,2019-01-10 10:01:41.933,2018-08-14,no score - missing data,No data,No data,42.163427,-73.600046,-73.600091,42.163404,4.5,,1821475.438,2344858.227,1821475.438,2344858.227,,"Hike in, possible dirt path off driveway 115 Copake Lake Road",False,False,-1,"Larson, Lindsay",,-1.0,Trail,No data,NY,Unnamed,,No data,Copake,,,No culvert,,,,,,,,,,,,,,,,,Middle Hudson,,,,,,,,,,,,,,,,,,,Columbia
66742,xy4210207573548535,Flow-Aligned,0.868200103,Reduced AOP,True,34,Low/Estimated,"Jastremski, Michael",,OK,Spans Full Channel & Banks,Bridge,"Jastremski, Michael",1774,2019-01-23 09:40:00.03,2019-01-17 15:46:16.373,2019-01-17 15:46:16.937,2019-01-17,Insignificant barrier,No data,Typical low-flow,42.102075,-73.548535,-73.548579,42.102078,3.6,Headwall and Wingwalls,1827192.044,2339175.387,1827192.044,2339175.387,,1st bridge on 7a after empire road intersection,False,False,1,"Larson, Lindsay",County Route 7a,0.0,Paved,,NY,Bish Bash Brook,,No,Copake,64662.0,,,,35.0,0.0,Yes,-1.0,At Stream Grade,6.8,10.891,Box/Bridge with Abutments,48.5,Headwall and Wingwalls,0.89,64.5,,,Combination,Middle Hudson,0.0,0.0,At Stream Grade,8.0,12.974,Box/Bridge with Abutments,32.2,0.96,64.5,5.0,,-1.0,No data,,,,Yes,Yes,Columbia
66743,xy4212007573518272,Skewed (>45°),0.72047168,Reduced AOP,True,5,Low/Estimated,"Jastremski, Michael",,OK,Moderate,Culvert,"Jastremski, Michael",1774,2019-01-23 09:39:25.263,2019-01-17 15:50:12.46,2019-01-17 15:50:13.1,2019-01-17,Minor barrier,No data,Typical low-flow,42.120075,-73.518272,-73.518272,42.120075,0.0,Headwall,-1.0,-1.0,-1.0,-1.0,,Copake iron works historic sign,False,False,1,"Larson, Lindsay",Route 344,3.5,Paved,Small,NY,Unnamed,,No,Copake,64663.0,,,,67.0,0.0,No,-1.0,At Stream Grade,3.0,0.101,Round Culvert,1.5,Headwall,0.36,3.0,,,Metal,Middle Hudson,0.0,0.0,At Stream Grade,2.1,0.068,Round Culvert,2.9,0.3,3.0,-1.0,Low,2.8,No data,Contrasting,25%,Gravel,No-Shallower,Yes,Columbia
66744,xy4211668073507688,Flow-Aligned,0.955,Full AOP,True,50,Low/Estimated,"Jastremski, Michael",,OK,Moderate,Bridge,"Jastremski, Michael",1774,2019-01-23 09:39:43.763,2019-01-17 16:07:20.287,2019-01-17 16:07:20.753,2019-01-17,Insignificant barrier,No data,Typical low-flow,42.11668,-73.507688,-73.507852,42.116664,13.7,Headwall and Wingwalls,1830071.077,2341549.477,1830071.077,2341549.477,,Bridge on gravel entry road to trail to bash bish falls,False,False,1,"Larson, Lindsay",Trail to Bash Bish Falls,0.0,Trail,,NY,Bash Bish Brook,,No,Copake,64664.0,,,,11.5,0.0,No,-1.0,At Stream Grade,10.0,23.033,Box/Bridge with Abutments,29.9,Headwall and Wingwalls,1.2,30.1,,,Combination,Middle Hudson,0.0,0.0,At Stream Grade,9.9,23.875,Box/Bridge with Abutments,30.0,1.1,31.2,-1.0,,-1.0,No data,Comparable,100%,Cobble,Yes,Yes,Columbia
66750,xy4208481473620643,Flow-Aligned,0.625165921,No AOP,True,-1,No data,"Jastremski, Michael",,OK,Severe,Culvert,"Jastremski, Michael",36,2019-01-23 09:43:16.897,2019-01-23 09:02:55.8,2019-01-23 09:43:12.91,2019-01-18,Minor barrier,No data,Typical low-flow,42.084814,-73.620643,-73.62056,42.084829,7.1,Projecting,1821883.12,2335913.824,1821883.12,2335913.824,,Just past first house on Banor off Tompkins,False,False,1,"Larson, Lindsay",Banor Road,1.2,Unpaved,,NY,Unnamed,,No,Copake,64672.0,,,,31.0,0.0,No,-1.0,At Stream Grade,3.0,0.219,Round Culvert,1.3,Projecting,0.36,3.0,,,Plastic,Middle Hudson,0.5,0.2,At Stream Grade,3.0,0.223,Round Culvert,1.5,0.12,3.0,-1.0,Low,3.1,No data,,,,No-Shallower,Yes,Columbia


In [10]:
nc = NaaccPointSchema()
validated = etl\
    .addfield(t, 'validation_errors', lambda rec: validator(rec, nc))

validated[1]

('66697',
 'xy4216342773600046',
 'No data',
 '-1',
 'no score - missing data',
 'true',
 '-1',
 'No data',
 'Jastremski, Michael',
 'Long hike in, off gated driveway',
 'No data',
 'No data',
 'Inaccessible',
 'Jastremski, Michael',
 '1774',
 '2019-01-10 10:06:43.467',
 '2019-01-10 10:01:41.81',
 '2019-01-10 10:01:41.933',
 '2018-08-14',
 'no score - missing data',
 'No data',
 'No data',
 42.163427,
 -73.600046,
 '-73.600091',
 '42.163404',
 '4.5',
 '',
 '1821475.438',
 '2344858.227',
 '1821475.438',
 '2344858.227',
 '',
 'Hike in, possible dirt path off driveway 115 Copake Lake Road',
 'false',
 'false',
 -1,
 'Larson, Lindsay',
 '',
 -1.0,
 'Trail',
 'No data',
 'NY',
 'Unnamed',
 '',
 'No data',
 'Copake',
 '',
 '',
 'No culvert',
 '',
 None,
 '',
 '',
 '',
 '',
 None,
 '',
 '',
 '',
 '',
 '',
 None,
 '',
 '',
 '',
 'Middle Hudson',
 '',
 '',
 '',
 None,
 '',
 '',
 '',
 '',
 None,
 '',
 '',
 None,
 '',
 '',
 '',
 '',
 '',
 '',
 'Columbia',
 {'Outlet_Structure_Type': ['Not a valid 