# NM Regulatory Data

Preprocessing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

Notes
- Will create separate dataframes for each regulatory input, combine into single long output dataframe.
- WKT will be in it's own separate geometery dataframe.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/NewMexico/Regulatory/RawInputData"
os.chdir(workingDir)

## Interstate Stream Compact Regions

In [None]:
#CSV input file
fileInput = "InterstateStreamCompactRegions_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head(3)

In [None]:
# Interstate Stream Compact Regions Dataframe
# -------------------------------------------

# Create new dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID', 
              'in_ReportingUnitTypeCV', 
              "in_OversightAgency",
              "in_RegulatoryDescription",
              "in_RegulatoryName",
              "in_RegulatoryStatusCV",
              "in_RegulatoryStatute",
              "in_RegulatoryStatuteLink",
              "in_StatutoryEffectiveDate",
              "in_RegulatoryOverlayTypeCV",
              "in_WaterSourceTypeCV"]
df_ISCR = pd.DataFrame(columns=columnList, index=df.index)

# ReportingUnit Inputs
df_ISCR['in_ReportingUnitName'] = df['Full_Name']
df_ISCR['in_ReportingUnitNativeID'] = df['OID_']
df_ISCR['in_ReportingUnitTypeCV'] = "Interstate River Compact"

# RegulatoryOverlay
df_ISCR['in_OversightAgency'] = df['States']
df_ISCR['in_RegulatoryDescription'] = df['RegulatoryDescription']
df_ISCR['in_RegulatoryName'] = df['Full_Name']
df_ISCR['in_RegulatoryStatusCV'] = "Active"
df_ISCR['in_RegulatoryStatute'] = ""  #blank
df_ISCR['in_RegulatoryStatuteLink'] = df['URL']
df_ISCR['in_StatutoryEffectiveDate'] = "01/01/" + df['EffectiveDate'].astype(str)
df_ISCR['in_RegulatoryOverlayTypeCV'] = "Interstate River Compact"
df_ISCR['in_WaterSourceTypeCV'] = "Surface Water"

df_ISCR = df_ISCR.drop_duplicates().reset_index()

print(len(df_ISCR))
df_ISCR.head(3)

## OSE Water Right District Boundary

In [None]:
#CSV input file
fileInput = "OSEWaterRightDistrictBoundary_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head(3)

In [None]:
#  OSE Water Right District Boundary Dataframe
# -------------------------------------------

# Create new dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID', 
              'in_ReportingUnitTypeCV', 
              "in_OversightAgency",
              "in_RegulatoryDescription",
              "in_RegulatoryName",
              "in_RegulatoryStatusCV",
              "in_RegulatoryStatute",
              "in_RegulatoryStatuteLink",
              "in_StatutoryEffectiveDate",
              "in_RegulatoryOverlayTypeCV",
              "in_WaterSourceTypeCV"]
df_EWRD = pd.DataFrame(columns=columnList, index=df.index)

# ReportingUnit Inputs
df_EWRD['in_ReportingUnitName'] = df['name']
df_EWRD['in_ReportingUnitNativeID'] = df['ose_dist_i']
df_EWRD['in_ReportingUnitTypeCV'] = "Water Rights District"

# RegulatoryOverlay
df_EWRD['in_OversightAgency'] = df['name'] + " OSE"
df_EWRD['in_RegulatoryDescription'] = "District operated by a Water Master appointed by the Office of the State Engineer, who is charged with administering the state's water resources. The State Engineer has authority over the supervision, measurement, appropriation, and distribution of all surface and groundwater in New Mexico, including streams and rivers that cross state boundaries"
df_EWRD['in_RegulatoryName'] = df['name'] + " District"
df_EWRD['in_RegulatoryStatusCV'] = "Active"
df_EWRD['in_RegulatoryStatute'] = "https://nmonesource.com/nmos/nmsa/en/item/4402/index.do#!fragment/zoupio-_Toc74832537/BQCwhgziBcwMYgK4DsDWszIQewE4BUBTADwBdoAvbRABwEtsBaAfX2zgHYAWADgGYATAFY+HAJQAaZNlKEIARUSFcAT2gBydRIiEwuBIuVrN23fpABlPKQBCagEoBRADKOAagEEAcgGFHE0jAAI2hSdjExIA"
df_EWRD['in_RegulatoryStatuteLink'] = df['URL']
df_EWRD['in_StatutoryEffectiveDate'] = "08/12/2021"
df_EWRD['in_RegulatoryOverlayTypeCV'] = "Water Rights District"
df_EWRD['in_WaterSourceTypeCV'] = "Surface and Groundwater"

df_EWRD = df_EWRD.drop_duplicates().reset_index()

print(len(df_EWRD))
df_EWRD.head(3)

## Special Conditions Water Right

In [None]:
#CSV input file
fileInput = "SpecialConditionsWaterRight_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head(3)

In [None]:
#  Special Conditoins Water Right Dataframe
# -------------------------------------------

# Create new dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID', 
              'in_ReportingUnitTypeCV', 
              "in_OversightAgency",
              "in_RegulatoryDescription",
              "in_RegulatoryName",
              "in_RegulatoryStatusCV",
              "in_RegulatoryStatute",
              "in_RegulatoryStatuteLink",
              "in_StatutoryEffectiveDate",
              "in_RegulatoryOverlayTypeCV",
              "in_WaterSourceTypeCV"]
df_SCWR = pd.DataFrame(columns=columnList, index=df.index)

# ReportingUnit Inputs
df_SCWR['in_ReportingUnitName'] = df['Name']
df_SCWR['in_ReportingUnitNativeID'] = df['OID_']
df_SCWR['in_ReportingUnitTypeCV'] = "Special Condition Water Right"

# RegulatoryOverlay
df_SCWR['in_OversightAgency'] = df['jurisdicti']
df_SCWR['in_RegulatoryDescription'] = df['requiremen']
df_SCWR['in_RegulatoryName'] = df['Name']
df_SCWR['in_RegulatoryStatusCV'] = "Active"
df_SCWR['in_RegulatoryStatute'] = ""
df_SCWR['in_RegulatoryStatuteLink'] = ""
df_SCWR['in_StatutoryEffectiveDate'] = df['effect_dat']
df_SCWR['in_RegulatoryOverlayTypeCV'] = "Special Condition Water Right"
df_SCWR['in_WaterSourceTypeCV'] = "Surface and Groundwater"

df_SCWR = df_SCWR.drop_duplicates().reset_index()

print(len(df_SCWR))
df_SCWR.head(3)

## Concatenate DataFrames together

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_ISCR, df_EWRD, df_SCWR]
df_out = pd.concat(frames)

print(len(df_out))
df_out

## WaDE Custom Elements (due to missing sate info)

In [None]:
# Creating WaDE Custom Regulatory Overlay Native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp RegulatoryOverlayNativeID dataframe of unique water source.
def assignRegulatoryOverlayNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENM_RE" + string1
    return outstring

dfRegulatoryOverlayNativeID = pd.DataFrame()
dfRegulatoryOverlayNativeID['in_ReportingUnitName'] = df_out['in_ReportingUnitName']
dfRegulatoryOverlayNativeID = dfRegulatoryOverlayNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfRegulatoryOverlayNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfRegulatoryOverlayNativeID['in_RegulatoryOverlayNativeID'] = dftemp.apply(lambda row: assignRegulatoryOverlayNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom Regulatory Overlay Native ID
def retrieveRegulatoryOverlayNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfRegulatoryOverlayNativeID.loc[(dfRegulatoryOverlayNativeID['in_ReportingUnitName'] == A), 'in_RegulatoryOverlayNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df_out['in_RegulatoryOverlayNativeID'] = df_out.apply(lambda row: retrieveRegulatoryOverlayNativeID(row['in_ReportingUnitName']), axis=1)
df_out

# Shapefile Data

In [None]:
# Interstate Stream Compact Regions

ISCRShapeFile = "ShapefileData/Interstate Stream Compact Regions/NMInterstateStreamCompactRegions.shp"
ISCRShape = gpd.read_file(ISCRShapeFile)

print(len(ISCRShape))
ISCRShape.head(1)

In [None]:
# Interstate Stream Compact Regions output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_ISCR_shp = pd.DataFrame(columns=columnList, index=ISCRShape.index)

df_ISCR_shp['in_ReportingUnitName'] = ISCRShape['Full_Name']
df_ISCR_shp['in_ReportingUnitNativeID'] = ISCRShape.index
df_ISCR_shp['in_ReportingUnitTypeCV'] = "Interstate River Compact"
df_ISCR_shp['in_Geomerty'] = ISCRShape['geometry']

print(len(df_ISCR_shp))
df_ISCR_shp.head(3)

In [None]:
# OSE Water Right District Boundary

EWRDhapeFile = "ShapefileData/OSE District Boundary/OSEDistrictBoundary.shp"
EWRDShape = gpd.read_file(EWRDhapeFile)

print(len(EWRDShape))
EWRDShape.head(1)

In [None]:
# OSE Water Right District output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_EWRD_shp = pd.DataFrame(columns=columnList, index=EWRDShape.index)

df_EWRD_shp['in_ReportingUnitName'] = EWRDShape['name']
df_EWRD_shp['in_ReportingUnitNativeID'] = EWRDShape['ose_dist_i']
df_EWRD_shp['in_ReportingUnitTypeCV'] = "Water Rights District"
df_EWRD_shp['in_Geomerty'] = EWRDShape['geometry']

print(len(df_EWRD_shp))
df_EWRD_shp.head(3)

In [None]:
# Special Conditoins Water Right

SCWRShapeFile = "ShapefileData/Water Right Regulations/WaterRightRegulations.shp"
SCWRShape = gpd.read_file(SCWRShapeFile)

print(len(SCWRShape))
SCWRShape.head(1)

In [None]:
# Special Conditoins Water Right output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_SCWR_shp = pd.DataFrame(columns=columnList, index=SCWRShape.index)

df_SCWR_shp['in_ReportingUnitName'] = SCWRShape['Name']
df_SCWR_shp['in_ReportingUnitNativeID'] = SCWRShape.index
df_SCWR_shp['in_ReportingUnitTypeCV'] = "Special Condition Water Right"
df_SCWR_shp['in_Geomerty'] = SCWRShape['geometry']

print(len(df_SCWR_shp))
df_SCWR_shp.head(3)

### Concatenate Basin & Subbasin DataFrames

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_ISCR_shp, df_EWRD_shp, df_SCWR_shp]
df_shape_out = pd.concat(frames)

print(len(df_shape_out))
df_shape_out

### Inspect Output Data & Export

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_out.dtypes)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_shape_out.dtypes)

In [None]:
# Export out to CSV.
df_out.to_csv('P_nmRegMaster.csv', index=False) # The output.
df_shape_out.to_csv('P_nmRegGeometry.csv', index=False) # The output geometry.