# KS Regulatory Data

Preprocessing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

Notes:
- WKT will be in it's own separate geometery dataframe.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Kansas/Regulatory/RawInputData"
os.chdir(workingDir)

## Interstate Stream Compact Regions
- Filled in some of the data manualy from info from here: https://www.kgs.ku.edu/Hydro/gmd.html.

In [3]:
#CSV input file
fileInput = "Groundwater_Districts_input.csv"

df = pd.read_csv(fileInput)
print(len(df))
df.head(3)

5


Unnamed: 0,OID_,OBJECTID,AREA,PERIMETER,GMD_,GMD_ID,NAME,SYM,SQ_MILES,Shape_Leng,Shape_Area,in_OversightAgency,in_RegulatoryDescription,in_RegulatoryName,in_RegulatoryStatuteLink,in_StatutoryEffectiveDate,in_RegulatoryOverlayTypeCV,in_WaterSourceTypeCV
0,1,2,4734380032,472221,3,1,Western Kansas GMD #1,82,1848.314936,5.059135,0.494811,Western Kansas Groundwater Management,Groundwater Management Districts,Western Kansas GMD #1,http://www.gmd1.org/,1/1/1970,Groundwater Management Districts,Groundwater
1,0,1,3543389952,392530,2,2,Equus Beds GMD #2,57,1722.231121,3.6357,0.457862,Equus Beds Groundwater Management,Groundwater Management Districts,Equus Beds GMD #2,http://www.gmd2.org/,1/1/1970,Groundwater Management Districts,Groundwater
2,4,5,21604399104,742771,6,3,Southwest Kansas GMD #3,57,8427.346468,7.748317,2.226546,Southwest Kansas Groundwater Management,Groundwater Management Districts,Southwest Kansas GMD #3,http://www.gmd3.org/,1/1/1970,Groundwater Management Districts,Groundwater


In [5]:
df_out = df

# Shapefile Data

In [6]:
# Interstate Stream Compact Regions

ShapeFile = "ShapefileData/Groundwater_Districts.shp"
dfShape = gpd.read_file(ShapeFile)

print(len(dfShape))
dfShape.head()

5


Unnamed: 0,OBJECTID,AREA,PERIMETER,GMD_,GMD_ID,NAME,SYM,SQ_MILES,Shape_Leng,Shape_Area,geometry
0,1,3543390000.0,392530.0,2,2,Equus Beds GMD #2,57,1722.231121,3.6357,0.457862,"POLYGON ((-98.14224 38.17362, -98.03243 38.173..."
1,2,4734380000.0,472221.0,3,1,Western Kansas GMD #1,82,1848.314936,5.059135,0.494811,"POLYGON ((-102.00183 38.91551, -101.98174 38.9..."
2,3,12622400000.0,626362.0,4,4,Northwest Kansas GMD #4,82,4927.36857,6.78553,1.334546,"POLYGON ((-102.00039 39.87234, -101.27959 39.8..."
3,4,10119400000.0,587835.0,5,5,Big Bend GMD #5,82,3945.658099,6.120936,1.048748,"POLYGON ((-98.47964 38.52198, -98.47953 38.304..."
4,5,21604400000.0,742771.0,6,3,Southwest Kansas GMD #3,57,8427.346468,7.748317,2.226546,"POLYGON ((-101.07029 38.26463, -101.00045 38.2..."


In [8]:
# Interstate Stream Compact Regions output shapefile dataframe
columnList = ['GMD_ID', 
              'NAME',
              'geometry']
dfShape_2 = pd.DataFrame(columns=columnList, index=dfShape.index)

dfShape_2['GMD_ID'] = dfShape['GMD_ID']
dfShape_2['NAME'] = dfShape['NAME'].str.title()
dfShape_2['geometry'] = dfShape['geometry']

print(len(dfShape_2))
dfShape_2.head()

5


Unnamed: 0,GMD_ID,NAME,geometry
0,2,Equus Beds Gmd #2,"POLYGON ((-98.14224 38.17362, -98.03243 38.173..."
1,1,Western Kansas Gmd #1,"POLYGON ((-102.00183 38.91551, -101.98174 38.9..."
2,4,Northwest Kansas Gmd #4,"POLYGON ((-102.00039 39.87234, -101.27959 39.8..."
3,5,Big Bend Gmd #5,"POLYGON ((-98.47964 38.52198, -98.47953 38.304..."
4,3,Southwest Kansas Gmd #3,"POLYGON ((-101.07029 38.26463, -101.00045 38.2..."


### Inspect Output Data & Export

In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_out.dtypes)

OID_                            int64
OBJECTID                        int64
AREA                            int64
PERIMETER                       int64
GMD_                            int64
GMD_ID                          int64
NAME                           object
SYM                             int64
SQ_MILES                      float64
Shape_Leng                    float64
Shape_Area                    float64
in_OversightAgency             object
in_RegulatoryDescription       object
in_RegulatoryName              object
in_RegulatoryStatuteLink       object
in_StatutoryEffectiveDate      object
in_RegulatoryOverlayTypeCV     object
in_WaterSourceTypeCV           object
dtype: object


In [10]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfShape_2.dtypes)

GMD_ID         int64
NAME          object
geometry    geometry
dtype: object


In [12]:
# Export out to CSV.
df_out.to_csv('P_ksRegMaster.csv', index=False) # The output.
dfShape_2.to_csv('P_ksRegGeometry.csv', index=False) # The output geometry.