# Creating Shapefiles for Web App - Custom_A

Inputs: 
1) Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.

2) 

Paring HUC8 to ReportingUnitNativeID

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,EPSGCodeCV
0,20733,NM_C_1,35001,Bernalillo,County,NM,EPSG:4326
1,20734,NM_C_2,35003,Catron,County,NM,EPSG:4326
2,20735,NM_C_3,35005,Chaves,County,NM,EPSG:4326


### California - Planning Area

In [3]:
# Grab the CA Planning Area Shapefile.
# Paring PA_NO to ReportingUnitNativeID

CAPAshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Custom/CA/Water_Plan_Planning_Areas.shp')
dfs_CAPA = pd.DataFrame(CAPAshapefile_input)
dfs_CAPA.head(3)

Unnamed: 0,OBJECTID,PA_NO,geometry
0,12,1001,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,13,1002,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,14,1003,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


In [4]:
# Custom
# State: CA, Planning Area
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_CA = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Planning Area') & ((df_1RU.StateCV == 'CA'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_CA.ReportingUnitUUID.values, index = df_1RU_Custom_CA.ReportingUnitNativeID).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_CAPA['ReportingUnitUUID'] = dfs_CAPA.apply(lambda row: retrieveCountyName(row['PA_NO']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_CAPA = pd.merge(dfs_CAPA, df_1RU_Custom_CA, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2CAPA = pd.DataFrame() #empty dataframe
dfs_2CAPA['OBJECTID'] = dfs_CAPA.index
dfs_2CAPA['Shape'] = 'Polygon'
dfs_2CAPA['UnitID'] = dfs_CAPA['ReportingUnitID']
dfs_2CAPA['UnitUUID'] = dfs_CAPA['ReportingUnitUUID']
dfs_2CAPA['NativeID'] = dfs_CAPA['ReportingUnitNativeID']
dfs_2CAPA['Name'] = dfs_CAPA['ReportingUnitName']
dfs_2CAPA['TypeCV'] = dfs_CAPA['ReportingUnitTypeCV']
dfs_2CAPA['StateCV'] = dfs_CAPA['StateCV']
dfs_2CAPA['geometry'] = dfs_CAPA['geometry']

# view output
dfs_2CAPA.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25919,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,25930,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,25941,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


### AZ - Active Management Area

In [5]:
# Grab the AZ Active Management Area Shapefile.
# Paring BASIN_NAME to ReportingUnitName
AZ_AMAshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Custom/AZ/AZ_AMA.shp')
dfs_AZAMA = pd.DataFrame(AZ_AMAshapefile_input)
dfs_AZAMA.head(3)

Unnamed: 0,OBJECTID,BASIN_NAME,NAME_ABBR,Shape_Leng,Shape_Le_1,Shape_Area,geometry
0,1,SANTA CRUZ AMA,SCA,2.245176,2.245176,0.176233,"POLYGON ((-111.19920 31.85820, -111.19888 31.8..."
1,2,PRESCOTT AMA,PRE,1.930985,1.930985,0.122395,"POLYGON ((-112.56132 34.72245, -112.56119 34.7..."
2,3,HARQUAHALA INA,HAR,2.365999,2.365999,0.192482,"POLYGON ((-113.30171 33.83493, -113.30157 33.8..."


In [6]:
# Custom
# State: AZ, Active Management Area
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_AZ = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Active Management Area') & ((df_1RU.StateCV == 'AZ'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_AZ.ReportingUnitUUID.values, index = df_1RU_Custom_AZ.ReportingUnitName).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_AZAMA['ReportingUnitUUID'] = dfs_AZAMA.apply(lambda row: retrieveCountyName(row['BASIN_NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_AZAMA = pd.merge(dfs_AZAMA, df_1RU_Custom_AZ, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2AZAMA = pd.DataFrame() #empty dataframe
dfs_2AZAMA['OBJECTID'] = dfs_AZAMA.index
dfs_2AZAMA['Shape'] = 'Polygon'
dfs_2AZAMA['UnitID'] = dfs_AZAMA['ReportingUnitID']
dfs_2AZAMA['UnitUUID'] = dfs_AZAMA['ReportingUnitUUID']
dfs_2AZAMA['NativeID'] = dfs_AZAMA['ReportingUnitNativeID']
dfs_2AZAMA['Name'] = dfs_AZAMA['ReportingUnitName']
dfs_2AZAMA['TypeCV'] = dfs_AZAMA['ReportingUnitTypeCV']
dfs_2AZAMA['StateCV'] = dfs_AZAMA['StateCV']
dfs_2AZAMA['geometry'] = dfs_AZAMA['geometry']

# view output
dfs_2AZAMA.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,21232.0,AZag_RU4,Unspecified,SANTA CRUZ AMA,Active Management Area,AZ,"POLYGON ((-111.19920 31.85820, -111.19888 31.8..."
1,1,Polygon,21231.0,AZag_RU3,Unspecified,PRESCOTT AMA,Active Management Area,AZ,"POLYGON ((-112.56132 34.72245, -112.56119 34.7..."
2,2,Polygon,,,,,,,"POLYGON ((-113.30171 33.83493, -113.30157 33.8..."


### Texas - Basin

In [7]:
# Grab the TX Basin Shapefile.
# Paring RU_Name to ReportingUnitName

TXBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Basins/WaDEBasins.shp')
dfs_TXB = pd.DataFrame(TXBshapefile_input)
dfs_TXB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [8]:
# Custom
# State: TX, Basin
# Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_TX = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'TX'))]
df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_TX.ReportingUnitUUID.values, index = df_1RU_Custom_TX.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_TXB['ReportingUnitUUID'] = dfs_TXB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_TXB = pd.merge(dfs_TXB, df_1RU_Custom_TX, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2TXB = pd.DataFrame() #empty dataframe
dfs_2TXB['OBJECTID'] = dfs_TXB.index
dfs_2TXB['Shape'] = 'Polygon'
dfs_2TXB['UnitID'] = dfs_TXB['ReportingUnitID']
dfs_2TXB['UnitUUID'] = dfs_TXB['ReportingUnitUUID']
dfs_2TXB['NativeID'] = dfs_TXB['ReportingUnitNativeID']
dfs_2TXB['Name'] = dfs_TXB['ReportingUnitName']
dfs_2TXB['TypeCV'] = dfs_TXB['ReportingUnitTypeCV']
dfs_2TXB['StateCV'] = dfs_TXB['StateCV']
dfs_2TXB['geometry'] = dfs_TXB['geometry']

#view output
dfs_2TXB.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,22752.0,TX_12,12,Brazos,Basin,TX,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,22753.0,TX_13,13,Brazos-Colorado,Basin,TX,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,22749.0,TX_1,1,Canadian,Basin,TX,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


### Wyoming - Basin

In [9]:
# Grab the WY Basin Shapefile.
# Paring RU_Name to ReportingUnitName

WYBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Basins/WaDEBasins.shp')
dfs_WYB = pd.DataFrame(WYBshapefile_input)
dfs_WYB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [10]:
# Custom
# State: WY, Basin
# # Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_WY = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'WY'))]
# df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_WY.ReportingUnitUUID.values, index = df_1RU_Custom_WY.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_WYB['ReportingUnitUUID'] = dfs_WYB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)
dfs_WYB
# Merging temporal dataframes into one, using left-join.
dfs_WYB = pd.merge(dfs_WYB, df_1RU_Custom_WY, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2WYB = pd.DataFrame() #empty dataframe
dfs_2WYB['OBJECTID'] = dfs_WYB.index
dfs_2WYB['Shape'] = 'Polygon'
dfs_2WYB['UnitID'] = dfs_WYB['ReportingUnitID']
dfs_2WYB['UnitUUID'] = dfs_WYB['ReportingUnitUUID']
dfs_2WYB['NativeID'] = dfs_WYB['ReportingUnitNativeID']
dfs_2WYB['Name'] = dfs_WYB['ReportingUnitName']
dfs_2WYB['TypeCV'] = dfs_WYB['ReportingUnitTypeCV']
dfs_2WYB['StateCV'] = dfs_WYB['StateCV']
dfs_2WYB['geometry'] = dfs_WYB['geometry']

#view output
dfs_2WYB.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,,,,,,,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,,,,,,,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,,,,,,,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


### Utah - Subarea

In [11]:
# Grab the UT custom Subarea Shapefile.
# Paring RU_Name to ReportingUnitName

UTSubshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Custom/UT/UT_Subarea.shp')
dfs_UTsub = pd.DataFrame(UTSubshapefile_input)
dfs_UTsub

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,State,geometry
0,00-01-03,Curlew Valley,46,46-00-01-03,2.058765,0.115650,UT,"POLYGON ((-112.58699 42.00092, -112.57918 41.9..."
1,000-01-03,Clear Creek,46,46-000-01-03,0.707791,0.021416,UT,"POLYGON ((-113.17778 42.00082, -113.20384 41.9..."
2,00-07-02,Promontory Point,46,46-00-07-02,2.543908,0.166048,UT,"POLYGON ((-112.36394 42.00018, -112.38128 41.9..."
3,000-01-02,Yost,46,46-000-01-02,0.869172,0.030750,UT,"POLYGON ((-113.45058 41.99980, -113.47376 41.9..."
4,000-02-00,Goose Creek,46,46-000-02-00,0.800608,0.031581,UT,"POLYGON ((-113.86094 41.99704, -113.81920 41.9..."
...,...,...,...,...,...,...,...,...
144,01-01-07,Brigham City,46,46-01-01-07,1.721887,0.081817,UT,"POLYGON ((-112.01361 41.64418, -112.00460 41.6..."
145,01-01-04,Cache Valley,46,46-01-01-04,2.738388,0.329565,UT,"POLYGON ((-111.50799 42.00025, -111.50842 41.9..."
146,01-03-02,Randolph,46,46-01-03-02,2.180911,0.183824,UT,"POLYGON ((-111.04977 41.80850, -111.05113 41.5..."
147,01-03-01,Evanston,46,46-01-03-01,2.697182,0.100037,UT,"MULTIPOLYGON (((-110.72426 40.99222, -110.7332..."


In [12]:
# Custom
# State: UT, Subarea
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_UT = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Subarea') & ((df_1RU.StateCV == 'UT'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_UT.ReportingUnitUUID.values, index = df_1RU_Custom_UT.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_UTsub['ReportingUnitUUID'] = dfs_UTsub.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_UTsub = pd.merge(dfs_UTsub, df_1RU_Custom_UT, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2UTsub = pd.DataFrame() #empty dataframe
dfs_2UTsub['OBJECTID'] = dfs_UTsub.index
dfs_2UTsub['Shape'] = 'Polygon'
dfs_2UTsub['UnitID'] = dfs_UTsub['ReportingUnitID']
dfs_2UTsub['UnitUUID'] = dfs_UTsub['ReportingUnitUUID']
dfs_2UTsub['NativeID'] = dfs_UTsub['ReportingUnitNativeID']
dfs_2UTsub['Name'] = dfs_UTsub['ReportingUnitName']
dfs_2UTsub['TypeCV'] = dfs_UTsub['ReportingUnitTypeCV']
dfs_2UTsub['StateCV'] = dfs_UTsub['StateCV']
dfs_2UTsub['geometry'] = dfs_UTsub['geometry']

#view output
dfs_2UTsub.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,24220.0,UTag_RU93,00-01-03,Curlew Valley,Subarea,UT,"POLYGON ((-112.58699 42.00092, -112.57918 41.9..."
1,1,Polygon,24218.0,UTag_RU91,000-01-03,Clear Creek,Subarea,UT,"POLYGON ((-113.17778 42.00082, -113.20384 41.9..."
2,2,Polygon,24230.0,UTag_RU103,00-07-02,Promontory Point,Subarea,UT,"POLYGON ((-112.36394 42.00018, -112.38128 41.9..."


In [13]:
# Merge dataframes
frames = [dfs_2CAPA, dfs_2AZAMA, dfs_2TXB, dfs_2WYB, dfs_2UTsub]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25919.0,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,25930.0,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,25941.0,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


In [14]:
# drop NA rows
outdf = outdf.dropna(subset=['UnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25919.0,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,25930.0,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,25941.0,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."
3,3,Polygon,25952.0,CAag_RU54,1004,1004,Planning Area,CA,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
4,4,Polygon,25963.0,CAag_RU55,1005,1005,Planning Area,CA,"POLYGON ((-116.56630 33.54937, -116.56550 33.5..."
...,...,...,...,...,...,...,...,...,...
144,144,Polygon,24249.0,UTag_RU122,2007-01-01 00:00:00,Brigham City,Subarea,UT,"POLYGON ((-112.01361 41.64418, -112.00460 41.6..."
145,145,Polygon,24232.0,UTag_RU105,2004-01-01 00:00:00,Cache Valley,Subarea,UT,"POLYGON ((-111.50799 42.00025, -111.50842 41.9..."
146,146,Polygon,24222.0,UTag_RU95,2002-01-03 00:00:00,Randolph,Subarea,UT,"POLYGON ((-111.04977 41.80850, -111.05113 41.5..."
147,147,Polygon,24250.0,UTag_RU123,2001-01-03 00:00:00,Evanston,Subarea,UT,"MULTIPOLYGON (((-110.72426 40.99222, -110.7332..."


In [15]:
# Export the dataframe to a shapefile.
# Note: rememeber that shapefiles autofill in fields names, will need to fix field names in app upload.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/P_Custom.shp") # export shape file