# Preprocessing New Mexico Regulatory data for WaDE upload.
- Purpose:  To preprocess the New Mexico data into one master file for simple DataFrame creation and extraction.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import date
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/NewMexico/Regulatory/RawInputData"
os.chdir(workingDir)

## Interstate Stream Compact Regions

In [3]:
df_iscr = gpd.read_file('Interstate Stream Compact Regions/shapefile/InterstateStreamCompactRegions.zip', compression='zip')
df_iscr.head(1)

Unnamed: 0,States,Source,Compact_Na,URL,Shape__Are,Shape__Len,Full_Name,Shape_Leng,Shape_Le_1,Shape_Area,geometry
0,"CO,NM",NHD HUC10,CCC,http://www.ose.state.nm.us/Compacts/CostillaCr...,0.06555,1.77274,Costilla Creek Compact,1.77274,1.77274,0.06555,"POLYGON ((-105.50932 36.99594, -105.49790 36.9..."


In [4]:
# Input File - OSEWaterRightDistrictBoundary_input.csv
df_iscr = pd.read_csv('Interstate Stream Compact Regions/InterstateStreamCompactRegions_input.zip', compression='zip')

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_iscr:
    df_iscr['WaDEUUID'] = "nmiscr" + df_iscr.index.astype(str)
    df_iscr.to_csv('Interstate Stream Compact Regions/InterstateStreamCompactRegions_input.zip', compression=dict(method='zip', archive_name='InterstateStreamCompactRegions_input.csv'), index=False)

print(len(df_iscr))
df_iscr.head(1)

8


Unnamed: 0,OID_,EffectiveDate,RegulatoryDescription,Full_Name,States,Source,Compact_Na,URL,Shape__Are,Shape__Len,Shape_Length,Shape_Area,WaDEUUID
0,1,1946,The compact provides for the delivery of appor...,Costilla Creek Compact,"CO,NM",NHD HUC10,CCC,http://www.ose.state.nm.us/Compacts/CostillaCr...,0.06555,1.77274,1.77274,0.06555,nmiscr0


In [15]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = df_iscr['WaDEUUID']

# Date Info
df['in_Date'] = date.today().strftime('%m/%d')
df['in_Year'] = date.today().strftime('%Y')

# Organization
df['in_OrganizationUUID'] = "NMwr_O1"

# ReportingUnit Info
df['in_EPSGCodeCV'] = "4326"
df['in_ReportingUnitName'] = df_iscr['Full_Name']
df['in_ReportingUnitNativeID'] = df_iscr['OID_'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_ReportingUnitProductVersion'] = ""
df['in_ReportingUnitTypeCV'] = "Interstate River Compact"
df['in_ReportingUnitUpdateDate'] = ""
df['in_StateCV'] = "NM"
df['in_Geometry'] = ""

# RegulatoryOverlay Info
df['in_OversightAgency'] = "WaDE Unspecfied"
df['in_RegulatoryDescription'] = df_iscr['RegulatoryDescription']
df['in_RegulatoryName'] = df_iscr['Full_Name']
df['in_RegulatoryOverlayNativeID'] = "" # make custom below
df['in_RegulatoryStatusCV'] = "Active"
df['in_RegulatoryStatute'] = ""
df['in_RegulatoryStatuteLink'] = df_iscr['URL']
df['in_StatutoryEffectiveDate'] = df_iscr['EffectiveDate']
df['in_StatutoryEndDate'] = ""
df['in_RegulatoryOverlayTypeCV'] = "Interstate River Compact"
df['in_WaterSourceTypeCV'] = "Surface Water"


df = df.drop_duplicates().reset_index(drop=True)
outdf_iscr = df.copy()
print(len(outdf_iscr))
outdf_iscr.head()

8


Unnamed: 0,WaDEUUID,in_Date,in_Year,in_OrganizationUUID,in_EPSGCodeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitProductVersion,in_ReportingUnitTypeCV,in_ReportingUnitUpdateDate,in_StateCV,in_Geometry,in_OversightAgency,in_RegulatoryDescription,in_RegulatoryName,in_RegulatoryOverlayNativeID,in_RegulatoryStatusCV,in_RegulatoryStatute,in_RegulatoryStatuteLink,in_StatutoryEffectiveDate,in_StatutoryEndDate,in_RegulatoryOverlayTypeCV,in_WaterSourceTypeCV
0,nmiscr0,04/14,2023,NMwr_O1,4326,Costilla Creek Compact,1,,Interstate River Compact,,NM,,WaDE Unspecfied,The compact provides for the delivery of appor...,Costilla Creek Compact,,Active,,http://www.ose.state.nm.us/Compacts/CostillaCr...,1946,,Interstate River Compact,Surface Water
1,nmiscr1,04/14,2023,NMwr_O1,4326,Animas-La Plata Project Compact,2,,Interstate River Compact,,NM,,WaDE Unspecfied,It establishes equal priority for the water su...,Animas-La Plata Project Compact,,Active,,http://www.ose.state.nm.us/Compacts/AnimasLaPl...,1968,,Interstate River Compact,Surface Water
2,nmiscr2,04/14,2023,NMwr_O1,4326,La Plata River Compact,3,,Interstate River Compact,,NM,,WaDE Unspecfied,The compact also provides cooperative collecti...,La Plata River Compact,,Active,,http://www.ose.state.nm.us/Compacts/LaPlata/is...,1925,,Interstate River Compact,Surface Water
3,nmiscr3,04/14,2023,NMwr_O1,4326,Upper Colorado River Basin Compact,4,,Interstate River Compact,,NM,,WaDE Unspecfied,The compact creates the Upper Colorado River C...,Upper Colorado River Basin Compact,,Active,,http://www.ose.state.nm.us/Compacts/UpperColor...,1948,,Interstate River Compact,Surface Water
4,nmiscr4,04/14,2023,NMwr_O1,4326,Pecos River Compact,5,,Interstate River Compact,,NM,,WaDE Unspecfied,The compact creates the Pecos River Commission...,Pecos River Compact,,Active,,http://www.ose.state.nm.us/Compacts/Pecos/isc_...,1948,,Interstate River Compact,Surface Water


## OSE Water Right District Boundary

In [16]:
# Input File - OSEWaterRightDistrictBoundary_input.csv
df_ose = pd.read_csv('OSE Water Right District Boundary/OSEWaterRightDistrictBoundary_input.csv')

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_ose:
    df_ose['WaDEUUID'] = "nmiscr" + df_ose.index.astype(str)
    df_ose.to_csv('OSE Water Right District Boundary/OSEWaterRightDistrictBoundary_input.csv', index=False)

print(len(df_ose))
df_ose.head(1)

7


Unnamed: 0,OID_,EffectiveDate,ose_dist_i,name,dist_nbr,Shape__Are,Shape__Len,Designatio,Address,URL,MeterReadi,WaDEUUID
0,0,1946,7,Cimarron,7,43861758004,983128.2028,District 7,301 East 9th Street,https://www.ose.state.nm.us/DO/district_7.php,https://www.ose.state.nm.us/Meter/meterContact...,nmiscr0


In [18]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = df_ose['WaDEUUID']

# Date Info
df['in_Date'] = date.today().strftime('%m/%d')
df['in_Year'] = date.today().strftime('%Y')

# Organization
df['in_OrganizationUUID'] = "NMwr_O1"

# ReportingUnit Info
df['in_EPSGCodeCV'] = "4326"
df['in_ReportingUnitName'] = df_ose['name']
df['in_ReportingUnitNativeID'] = df_ose['ose_dist_i'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_ReportingUnitProductVersion'] = ""
df['in_ReportingUnitTypeCV'] = "Water Rights District"
df['in_ReportingUnitUpdateDate'] = ""
df['in_StateCV'] = "NM"
df['in_Geometry'] = ""

# RegulatoryOverlay Info
df['in_OversightAgency'] = "WaDE Unspecfied"
df['in_RegulatoryDescription'] = "District operated by a Water Master appointed by the Office of the State Engineer, who is charged with administering the state's water resources. The State Engineer has authority over the supervision, measurement, appropriation, and distribution of all surface and groundwater in New Mexico, including streams and rivers that cross state boundaries."
df['in_RegulatoryName'] = df_ose['name']
df['in_RegulatoryOverlayNativeID'] = "" # make custom below
df['in_RegulatoryStatusCV'] = "Active"
df['in_RegulatoryStatute'] = ""
df['in_RegulatoryStatuteLink'] = df_ose['URL']
df['in_StatutoryEffectiveDate'] = "08/12/2021"
df['in_StatutoryEndDate'] = ""
df['in_RegulatoryOverlayTypeCV'] = "Water Rights District"
df['in_WaterSourceTypeCV'] = "Surface Water and Groundwater"

df = df.drop_duplicates().reset_index(drop=True)
outdf_ose = df.copy()
print(len(outdf_ose))
outdf_ose.head()

7


Unnamed: 0,WaDEUUID,in_Date,in_Year,in_OrganizationUUID,in_EPSGCodeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitProductVersion,in_ReportingUnitTypeCV,in_ReportingUnitUpdateDate,in_StateCV,in_Geometry,in_OversightAgency,in_RegulatoryDescription,in_RegulatoryName,in_RegulatoryOverlayNativeID,in_RegulatoryStatusCV,in_RegulatoryStatute,in_RegulatoryStatuteLink,in_StatutoryEffectiveDate,in_StatutoryEndDate,in_RegulatoryOverlayTypeCV,in_WaterSourceTypeCV
0,nmiscr0,04/14,2023,NMwr_O1,4326,Cimarron,7,,Water Rights District,,NM,,WaDE Unspecfied,District operated by a Water Master appointed ...,Cimarron,,Active,,https://www.ose.state.nm.us/DO/district_7.php,08/12/2021,,Water Rights District,Surface Water and Groundwater
1,nmiscr1,04/14,2023,NMwr_O1,4326,Santa Fe,6,,Water Rights District,,NM,,WaDE Unspecfied,District operated by a Water Master appointed ...,Santa Fe,,Active,,https://www.ose.state.nm.us/DO/district_6.php,08/12/2021,,Water Rights District,Surface Water and Groundwater
2,nmiscr2,04/14,2023,NMwr_O1,4326,Aztec,5,,Water Rights District,,NM,,WaDE Unspecfied,District operated by a Water Master appointed ...,Aztec,,Active,,https://www.ose.state.nm.us/DO/district_5.php,08/12/2021,,Water Rights District,Surface Water and Groundwater
3,nmiscr3,04/14,2023,NMwr_O1,4326,Las Cruces,4,,Water Rights District,,NM,,WaDE Unspecfied,District operated by a Water Master appointed ...,Las Cruces,,Active,,https://www.ose.state.nm.us/DO/district_4.php,08/12/2021,,Water Rights District,Surface Water and Groundwater
4,nmiscr4,04/14,2023,NMwr_O1,4326,Deming,3,,Water Rights District,,NM,,WaDE Unspecfied,District operated by a Water Master appointed ...,Deming,,Active,,https://www.ose.state.nm.us/DO/district_3.php,08/12/2021,,Water Rights District,Surface Water and Groundwater


## Special Conditions Water Right

In [None]:
#CSV input file
fileInput = "SpecialConditionsWaterRight_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head(3)

In [None]:
# #  Special Conditoins Water Right Dataframe
# # -------------------------------------------

# # Create new dataframe
# columnList = ['in_ReportingUnitName', 
#               'in_ReportingUnitNativeID', 
#               'in_ReportingUnitTypeCV', 
#               "in_OversightAgency",
#               "in_RegulatoryDescription",
#               "in_RegulatoryName",
#               "in_RegulatoryStatusCV",
#               "in_RegulatoryStatute",
#               "in_RegulatoryStatuteLink",
#               "in_StatutoryEffectiveDate",
#               "in_RegulatoryOverlayTypeCV",
#               "in_WaterSourceTypeCV"]
# df_SCWR = pd.DataFrame(columns=columnList, index=df.index)

# # ReportingUnit Inputs
# df_SCWR['in_ReportingUnitName'] = df['Name']
# df_SCWR['in_ReportingUnitNativeID'] = df['OID_']
# df_SCWR['in_ReportingUnitTypeCV'] = "Special Condition Water Right"

# # RegulatoryOverlay
# df_SCWR['in_OversightAgency'] = df['jurisdicti']
# df_SCWR['in_RegulatoryDescription'] = df['requiremen']
# df_SCWR['in_RegulatoryName'] = df['Name']
# df_SCWR['in_RegulatoryStatusCV'] = "Active"
# df_SCWR['in_RegulatoryStatute'] = "Unspecified"
# df_SCWR['in_RegulatoryStatuteLink'] = ""
# df_SCWR['in_StatutoryEffectiveDate'] = df['effect_dat']
# df_SCWR['in_RegulatoryOverlayTypeCV'] = "Special Condition Water Right"
# df_SCWR['in_WaterSourceTypeCV'] = "Surface and Groundwater"

# df_SCWR = df_SCWR.drop_duplicates().reset_index()

# print(len(df_SCWR))
# df_SCWR.head(3)

## Concatenate DataFrames together

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_ISCR, df_EWRD, df_SCWR]
df_out = pd.concat(frames)

print(len(df_out))
df_out

## WaDE Custom Elements (due to missing sate info)

In [None]:
# Creating WaDE Custom Regulatory Overlay Native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp RegulatoryOverlayNativeID dataframe of unique water source.
def assignRegulatoryOverlayNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENM_RE" + string1
    return outstring

dfRegulatoryOverlayNativeID = pd.DataFrame()
dfRegulatoryOverlayNativeID['in_ReportingUnitName'] = df_out['in_ReportingUnitName']
dfRegulatoryOverlayNativeID = dfRegulatoryOverlayNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfRegulatoryOverlayNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfRegulatoryOverlayNativeID['in_RegulatoryOverlayNativeID'] = dftemp.apply(lambda row: assignRegulatoryOverlayNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom Regulatory Overlay Native ID
def retrieveRegulatoryOverlayNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfRegulatoryOverlayNativeID.loc[(dfRegulatoryOverlayNativeID['in_ReportingUnitName'] == A), 'in_RegulatoryOverlayNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df_out['in_RegulatoryOverlayNativeID'] = df_out.apply(lambda row: retrieveRegulatoryOverlayNativeID(row['in_ReportingUnitName']), axis=1)
df_out

# Shapefile Data

In [None]:
# Interstate Stream Compact Regions

ISCRShapeFile = "ShapefileData/Interstate Stream Compact Regions/NMInterstateStreamCompactRegions.shp"
ISCRShape = gpd.read_file(ISCRShapeFile)

print(len(ISCRShape))
ISCRShape.head(1)

In [None]:
# Interstate Stream Compact Regions output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_ISCR_shp = pd.DataFrame(columns=columnList, index=ISCRShape.index)

df_ISCR_shp['in_ReportingUnitName'] = ISCRShape['Full_Name']
df_ISCR_shp['in_ReportingUnitNativeID'] = ISCRShape.index
df_ISCR_shp['in_ReportingUnitTypeCV'] = "Interstate River Compact"
df_ISCR_shp['in_Geomerty'] = ISCRShape['geometry']

print(len(df_ISCR_shp))
df_ISCR_shp.head(3)

In [None]:
# OSE Water Right District Boundary

EWRDhapeFile = "ShapefileData/OSE District Boundary/OSEDistrictBoundary.shp"
EWRDShape = gpd.read_file(EWRDhapeFile)

print(len(EWRDShape))
EWRDShape.head(1)

In [None]:
# OSE Water Right District output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_EWRD_shp = pd.DataFrame(columns=columnList, index=EWRDShape.index)

df_EWRD_shp['in_ReportingUnitName'] = EWRDShape['name']
df_EWRD_shp['in_ReportingUnitNativeID'] = EWRDShape['ose_dist_i']
df_EWRD_shp['in_ReportingUnitTypeCV'] = "Water Rights District"
df_EWRD_shp['in_Geomerty'] = EWRDShape['geometry']

print(len(df_EWRD_shp))
df_EWRD_shp.head(3)

In [None]:
# Special Conditoins Water Right

SCWRShapeFile = "ShapefileData/Water Right Regulations/WaterRightRegulations.shp"
SCWRShape = gpd.read_file(SCWRShapeFile)

print(len(SCWRShape))
SCWRShape.head(1)

In [None]:
# Special Conditoins Water Right output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_SCWR_shp = pd.DataFrame(columns=columnList, index=SCWRShape.index)

df_SCWR_shp['in_ReportingUnitName'] = SCWRShape['Name']
df_SCWR_shp['in_ReportingUnitNativeID'] = SCWRShape.index
df_SCWR_shp['in_ReportingUnitTypeCV'] = "Special Condition Water Right"
df_SCWR_shp['in_Geomerty'] = SCWRShape['geometry']

print(len(df_SCWR_shp))
df_SCWR_shp.head(3)

### Concatenate Basin & Subbasin DataFrames

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_ISCR_shp, df_EWRD_shp, df_SCWR_shp]
df_shape_out = pd.concat(frames)

print(len(df_shape_out))
df_shape_out

### Inspect Output Data & Export

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_out.dtypes)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_shape_out.dtypes)

In [None]:
# Export out to CSV.
df_out.to_csv('P_nmRegMaster.csv', index=False) # The output.
df_shape_out.to_csv('P_nmRegGeometry.csv', index=False) # The output geometry.