# Creating Shapefiles for Web App - Custom Basin

Inputs: 
- Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.
- California Planning Area source shapefile.
- Airizona Active Management Area source shapefile.
- Utah Subarea source shapefile.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,WaDEName
0,27796,AZag_RU1,WaDEAZ_RU1,PHOENIX AMA,Active Management Area,AZ,Custom Basin
1,27797,AZag_RU2,WaDEAZ_RU2,PINAL AMA,Active Management Area,AZ,Custom Basin
2,27798,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,Custom Basin


### California - Planning Area

In [3]:
# Grab the CA Planning Area Shapefile.
# Paring PA_NO to ReportingUnitNativeID

CAPAshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Custom/CA/Water_Plan_Planning_Areas.shp')
dfs_CAPA = pd.DataFrame(CAPAshapefile_input)
dfs_CAPA.head(3)

Unnamed: 0,OBJECTID,PA_NO,geometry
0,12,1001,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,13,1002,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,14,1003,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


In [4]:
# Custom
# State: CA, Planning Area
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_CA = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Planning Area') & ((df_1RU.StateCV == 'CA'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_CA.ReportingUnitUUID.values, index = df_1RU_Custom_CA.ReportingUnitNativeID).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_CAPA['ReportingUnitUUID'] = dfs_CAPA.apply(lambda row: retrieveCountyName(row['PA_NO']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_CAPA = pd.merge(dfs_CAPA, df_1RU_Custom_CA, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2CAPA = pd.DataFrame() #empty dataframe
dfs_2CAPA['OBJECTID'] = dfs_CAPA.index
dfs_2CAPA['Shape'] = 'Polygon'
dfs_2CAPA['UnitID'] = dfs_CAPA['ReportingUnitID']
dfs_2CAPA['UnitUUID'] = dfs_CAPA['ReportingUnitUUID']
dfs_2CAPA['NativeID'] = dfs_CAPA['ReportingUnitNativeID']
dfs_2CAPA['Name'] = dfs_CAPA['ReportingUnitName']
dfs_2CAPA['TypeCV'] = dfs_CAPA['ReportingUnitTypeCV']
dfs_2CAPA['StateCV'] = dfs_CAPA['StateCV']
dfs_2CAPA['geometry'] = dfs_CAPA['geometry']

# view output
dfs_2CAPA.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26745,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,26746,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,26747,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


### AZ - Active Management Area

In [5]:
# Grab the AZ Active Management Area Shapefile.
# Paring BASIN_NAME to ReportingUnitName
AZ_AMAshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Custom/AZ/AZ_AMA.shp')
dfs_AZAMA = pd.DataFrame(AZ_AMAshapefile_input)
dfs_AZAMA.head(3)

Unnamed: 0,OBJECTID,BASIN_NAME,NAME_ABBR,Shape_Leng,Shape_Le_1,Shape_Area,geometry
0,1,SANTA CRUZ AMA,SCA,2.245176,2.245176,0.176233,"POLYGON ((-111.19920 31.85820, -111.19888 31.8..."
1,2,PRESCOTT AMA,PRE,1.930985,1.930985,0.122395,"POLYGON ((-112.56132 34.72245, -112.56119 34.7..."
2,3,HARQUAHALA INA,HAR,2.365999,2.365999,0.192482,"POLYGON ((-113.30171 33.83493, -113.30157 33.8..."


In [6]:
# Custom
# State: AZ, Active Management Area
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_AZ = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Active Management Area') & ((df_1RU.StateCV == 'AZ'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_AZ.ReportingUnitUUID.values, index = df_1RU_Custom_AZ.ReportingUnitName).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_AZAMA['ReportingUnitUUID'] = dfs_AZAMA.apply(lambda row: retrieveCountyName(row['BASIN_NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_AZAMA = pd.merge(dfs_AZAMA, df_1RU_Custom_AZ, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2AZAMA = pd.DataFrame() #empty dataframe
dfs_2AZAMA['OBJECTID'] = dfs_AZAMA.index
dfs_2AZAMA['Shape'] = 'Polygon'
dfs_2AZAMA['UnitID'] = dfs_AZAMA['ReportingUnitID']
dfs_2AZAMA['UnitUUID'] = dfs_AZAMA['ReportingUnitUUID']
dfs_2AZAMA['NativeID'] = dfs_AZAMA['ReportingUnitNativeID']
dfs_2AZAMA['Name'] = dfs_AZAMA['ReportingUnitName']
dfs_2AZAMA['TypeCV'] = dfs_AZAMA['ReportingUnitTypeCV']
dfs_2AZAMA['StateCV'] = dfs_AZAMA['StateCV']
dfs_2AZAMA['geometry'] = dfs_AZAMA['geometry']

# view output
dfs_2AZAMA.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27799.0,AZag_RU4,WaDEAZ_RU4,SANTA CRUZ AMA,Active Management Area,AZ,"POLYGON ((-111.19920 31.85820, -111.19888 31.8..."
1,1,Polygon,27798.0,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,"POLYGON ((-112.56132 34.72245, -112.56119 34.7..."
2,2,Polygon,,,,,,,"POLYGON ((-113.30171 33.83493, -113.30157 33.8..."


### Utah - Subarea

In [7]:
# Grab the UT custom Subarea Shapefile.
# Paring RU_Name to ReportingUnitName

UTSubshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Custom/UT/UT_Subarea.shp')
dfs_UTsub = pd.DataFrame(UTSubshapefile_input)
dfs_UTsub

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,State,geometry
0,00-01-03,Curlew Valley,46,46-00-01-03,2.058765,0.115650,UT,"POLYGON ((-112.58699 42.00092, -112.57918 41.9..."
1,000-01-03,Clear Creek,46,46-000-01-03,0.707791,0.021416,UT,"POLYGON ((-113.17778 42.00082, -113.20384 41.9..."
2,00-07-02,Promontory Point,46,46-00-07-02,2.543908,0.166048,UT,"POLYGON ((-112.36394 42.00018, -112.38128 41.9..."
3,000-01-02,Yost,46,46-000-01-02,0.869172,0.030750,UT,"POLYGON ((-113.45058 41.99980, -113.47376 41.9..."
4,000-02-00,Goose Creek,46,46-000-02-00,0.800608,0.031581,UT,"POLYGON ((-113.86094 41.99704, -113.81920 41.9..."
...,...,...,...,...,...,...,...,...
144,01-01-07,Brigham City,46,46-01-01-07,1.721887,0.081817,UT,"POLYGON ((-112.01361 41.64418, -112.00460 41.6..."
145,01-01-04,Cache Valley,46,46-01-01-04,2.738388,0.329565,UT,"POLYGON ((-111.50799 42.00025, -111.50842 41.9..."
146,01-03-02,Randolph,46,46-01-03-02,2.180911,0.183824,UT,"POLYGON ((-111.04977 41.80850, -111.05113 41.5..."
147,01-03-01,Evanston,46,46-01-03-01,2.697182,0.100037,UT,"MULTIPOLYGON (((-110.72426 40.99222, -110.7332..."


In [8]:
# Custom
# State: UT, Subarea
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_UT = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Subarea') & ((df_1RU.StateCV == 'UT'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_UT.ReportingUnitUUID.values, index = df_1RU_Custom_UT.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_UTsub['ReportingUnitUUID'] = dfs_UTsub.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_UTsub = pd.merge(dfs_UTsub, df_1RU_Custom_UT, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2UTsub = pd.DataFrame() #empty dataframe
dfs_2UTsub['OBJECTID'] = dfs_UTsub.index
dfs_2UTsub['Shape'] = 'Polygon'
dfs_2UTsub['UnitID'] = dfs_UTsub['ReportingUnitID']
dfs_2UTsub['UnitUUID'] = dfs_UTsub['ReportingUnitUUID']
dfs_2UTsub['NativeID'] = dfs_UTsub['ReportingUnitNativeID']
dfs_2UTsub['Name'] = dfs_UTsub['ReportingUnitName']
dfs_2UTsub['TypeCV'] = dfs_UTsub['ReportingUnitTypeCV']
dfs_2UTsub['StateCV'] = dfs_UTsub['StateCV']
dfs_2UTsub['geometry'] = dfs_UTsub['geometry']

#view output
dfs_2UTsub.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27472.0,UTag_RU93,00-01-03,Curlew Valley,Subarea,UT,"POLYGON ((-112.58699 42.00092, -112.57918 41.9..."
1,1,Polygon,27470.0,UTag_RU91,000-01-03,Clear Creek,Subarea,UT,"POLYGON ((-113.17778 42.00082, -113.20384 41.9..."
2,2,Polygon,27251.0,UTag_RU103,00-07-02,Promontory Point,Subarea,UT,"POLYGON ((-112.36394 42.00018, -112.38128 41.9..."


### Concatenate and Export

In [9]:
# Merge dataframes
frames = [dfs_2CAPA, dfs_2AZAMA, dfs_2UTsub]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26745.0,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,26746.0,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,26747.0,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


In [10]:
# drop NA rows
outdf = outdf.dropna(subset=['UnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26745.0,CAag_RU51,1001,1001,Planning Area,CA,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,Polygon,26746.0,CAag_RU52,1002,1002,Planning Area,CA,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,Polygon,26747.0,CAag_RU53,1003,1003,Planning Area,CA,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."
3,3,Polygon,26748.0,CAag_RU54,1004,1004,Planning Area,CA,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
4,4,Polygon,26749.0,CAag_RU55,1005,1005,Planning Area,CA,"POLYGON ((-116.56630 33.54937, -116.56550 33.5..."
...,...,...,...,...,...,...,...,...,...
144,144,Polygon,27272.0,UTag_RU122,2007-01-01 00:00:00,Brigham City,Subarea,UT,"POLYGON ((-112.01361 41.64418, -112.00460 41.6..."
145,145,Polygon,27253.0,UTag_RU105,2004-01-01 00:00:00,Cache Valley,Subarea,UT,"POLYGON ((-111.50799 42.00025, -111.50842 41.9..."
146,146,Polygon,27474.0,UTag_RU95,2002-01-03 00:00:00,Randolph,Subarea,UT,"POLYGON ((-111.04977 41.80850, -111.05113 41.5..."
147,147,Polygon,27273.0,UTag_RU123,2001-01-03 00:00:00,Evanston,Subarea,UT,"MULTIPOLYGON (((-110.72426 40.99222, -110.7332..."


In [11]:
# Export the dataframe to a shapefile.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/WaDE_CustomBasin.shp") # export shape file