# NE Regulatory Data

Preprocessing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

Notes
- Simeple input, just checking datatypes / cleaning up data.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Nebraska/Regulatory/RawInputData"
os.chdir(workingDir)

## Interstate Stream Compact Regions

In [2]:
#CSV input file
fileInput = "BND_NaturalResourceDistricts_DNR_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head()

23


Unnamed: 0,OID_,OBJECTID,AreaName,NRD_Name_A,NRD_Num,Shape_Leng,Shape_Length,Shape_Area
0,1,1,Lower Republican,LOWER REPUBLICAN,23,605099.768489,605099.768489,11018160000.0
1,2,2,Tri-Basin,TRI-BASIN,24,413768.865461,413768.865461,6815290000.0
2,3,3,Little Blue,LITTLE BLUE,5,606780.751822,606780.751822,10718680000.0
3,4,4,Lower Big Blue,LOWER BIG BLUE,2,438586.078598,438586.078598,7324130000.0
4,5,5,Nemaha,NEMAHA,11,606872.264508,606872.264508,10901490000.0


In [3]:
# Clean text
df['NRD_Name_A'] = df['NRD_Name_A'].str.strip()
df['NRD_Num'] = df['NRD_Num'].astype(str).str.strip()
df.head()

Unnamed: 0,OID_,OBJECTID,AreaName,NRD_Name_A,NRD_Num,Shape_Leng,Shape_Length,Shape_Area
0,1,1,Lower Republican,LOWER REPUBLICAN,23,605099.768489,605099.768489,11018160000.0
1,2,2,Tri-Basin,TRI-BASIN,24,413768.865461,413768.865461,6815290000.0
2,3,3,Little Blue,LITTLE BLUE,5,606780.751822,606780.751822,10718680000.0
3,4,4,Lower Big Blue,LOWER BIG BLUE,2,438586.078598,438586.078598,7324130000.0
4,5,5,Nemaha,NEMAHA,11,606872.264508,606872.264508,10901490000.0


In [4]:
# Create URL
# URL link info was found here: https://www.nrdnet.org/

URLdict = {
"1" : "https://www.lrnrd.org/",
"2" : "https://www.tribasinnrd.org/",
"3" : "http://www.littlebluenrd.org/",
"4" : "http://www.lbbnrd.net",
"5" : "https://www.nemahanrd.org/",
"6" : "http://www.mrnrd.org",
"7" : "http://www.urnrd.org",
"8" : "https://www.lpsnrd.org/",
"9" : "http://www.upperbigblue.org",
"10" : "www.cpnrd.org",
"11" : "http://www.spnrd.org",
"12" : "http://www.tpnrd.org",
"13" : "http://www.lpnnrd.org",
"14" : "http://www.llnrd.org",
"15" : "http://www.npnrd.org",
"16" : "http://www.upperloupnrd.org",
"17" : "http://www.papionrd.org",
"18" : "http://www.lenrd.org",
"19" : "http://www.uenrd.org",
"20" : "https://lcnrd.nebraska.gov/",
"21" : "http://www.lnnrd.org",
"22" : "http://www.mnnrd.org",
"23" : "http://www.unwnrd.org"}

def retrieveURL(valA):
    valA = str(valA).strip()
    if valA == '' or pd.isnull(valA):
        outString = ''
    else:
        String1 = valA
        try:
            outString = URLdict[String1]
        except:
            outString = ''
    return outString


df['in_RegulatoryStatuteLink'] = df.apply(lambda row: retrieveURL(row['OBJECTID']), axis=1)
df.head()

Unnamed: 0,OID_,OBJECTID,AreaName,NRD_Name_A,NRD_Num,Shape_Leng,Shape_Length,Shape_Area,in_RegulatoryStatuteLink
0,1,1,Lower Republican,LOWER REPUBLICAN,23,605099.768489,605099.768489,11018160000.0,https://www.lrnrd.org/
1,2,2,Tri-Basin,TRI-BASIN,24,413768.865461,413768.865461,6815290000.0,https://www.tribasinnrd.org/
2,3,3,Little Blue,LITTLE BLUE,5,606780.751822,606780.751822,10718680000.0,http://www.littlebluenrd.org/
3,4,4,Lower Big Blue,LOWER BIG BLUE,2,438586.078598,438586.078598,7324130000.0,http://www.lbbnrd.net
4,5,5,Nemaha,NEMAHA,11,606872.264508,606872.264508,10901490000.0,https://www.nemahanrd.org/


In [5]:
df_out = df

## Shapefile Data

In [6]:
# Interstate Stream Compact Regions

ShapeFile = "ShapefileData/Natural_Resource_District__NRD__Boundaries-shp/BND_NaturalResourceDistricts_DNR.shp"
dfshp = gpd.read_file(ShapeFile)

print(len(dfshp))
dfshp.head()

23


Unnamed: 0,OBJECTID,NRD_Name,NRD_Name_A,NRD_Num,Shape_Leng,Shape_Area,geometry
0,1,Lower Republican,LOWER REPUBLICAN,23,605099.768489,11018160000.0,"POLYGON ((-10902029.406 4866248.102, -10904135..."
1,2,Tri-Basin,TRI-BASIN,24,413768.865461,6815290000.0,"POLYGON ((-10989946.964 4966690.380, -10989944..."
2,3,Little Blue,LITTLE BLUE,5,606780.751822,10718680000.0,"POLYGON ((-10839037.640 4929707.818, -10839039..."
3,4,Lower Big Blue,LOWER BIG BLUE,2,438586.078598,7324130000.0,"POLYGON ((-10769263.889 4942254.303, -10767151..."
4,5,Nemaha,NEMAHA,11,606872.264508,10901490000.0,"POLYGON ((-10674099.444 4972017.503, -10674045..."


In [7]:
# Interstate Stream Compact Regions output shapefile dataframe
columnList = ['OBJECTID', 
              'NRD_Name',
              'geometry']
dfshp_out = pd.DataFrame(columns=columnList, index=dfshp.index)

dfshp_out['OBJECTID'] = dfshp['OBJECTID']
dfshp_out['NRD_Name'] = dfshp['NRD_Name'].str.title()
dfshp_out['geometry'] = dfshp['geometry']

print(len(dfshp_out))
dfshp_out.head(3)

23


Unnamed: 0,OBJECTID,NRD_Name,geometry
0,1,Lower Republican,"POLYGON ((-10902029.406 4866248.102, -10904135..."
1,2,Tri-Basin,"POLYGON ((-10989946.964 4966690.380, -10989944..."
2,3,Little Blue,"POLYGON ((-10839037.640 4929707.818, -10839039..."


### Inspect Output Data & Export

In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

OID_                          int64
OBJECTID                      int64
AreaName                     object
NRD_Name_A                   object
NRD_Num                      object
Shape_Leng                  float64
Shape_Length                float64
Shape_Area                  float64
in_RegulatoryStatuteLink     object
dtype: object


In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfshp_out.dtypes)

OBJECTID       int64
NRD_Name      object
geometry    geometry
dtype: object


In [10]:
# Export out to CSV.
df_out.to_csv('P_neRegMaster.csv', index=False) # The output.
dfshp_out.to_csv('P_neRegGeometry.csv', index=False) # The output geometry.