# Creating Shapefiles for Web App - HUC8

Inputs: 
1) Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.

2) WaDECounties.shp.  Shapefile of Unitied State counties.

Paring HUC8 to ReportingUnitNativeID

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,EPSGCodeCV
0,254,00-01-03,00-01-03,Curlew Valley,Subarea,UT,EPSG:4326
1,255,000-01-03,000-01-03,Clear Creek,Subarea,UT,EPSG:4326
2,256,00-07-02,00-07-02,Promontory Point,Subarea,UT,EPSG:4326


In [3]:
# Grab the HUC8 Shapefile.
shapefile_input = gpd.read_file('SourceFiles/HUC8/HUC8_US.shp')
dfs_1HUC8 = pd.DataFrame(shapefile_input)
dfs_1HUC8.head(3)

Unnamed: 0,TNMID,METASOURCE,SOURCEDATA,SOURCEORIG,SOURCEFEAT,LOADDATE,GNIS_ID,AREAACRES,AREASQKM,STATES,HUC8,NAME,Shape_Leng,Shape_Area,geometry
0,{4CCAA733-584D-4347-A7F3-4E664ADA8B9B},,,,,2012-06-11,0,1104144.63,4468.32,"CO,NM",11080001,Canadian Headwaters,3.943636,0.450856,"POLYGON ((-104.18034 36.92065, -104.18082 36.9..."
1,{70274AE7-175E-410E-A1BD-C5B4A8E36460},,,,,2012-06-11,0,671679.8,2718.19,NM,11080002,Cimarron,3.00689,0.273469,"POLYGON ((-104.49909 36.32423, -104.49957 36.3..."
2,{AC101059-1D6E-465C-AF7F-605322DDFBF9},,,,,2012-06-11,0,1314676.86,5320.31,NM,11080003,Upper Canadian,6.201702,0.531733,"POLYGON ((-104.21278 35.94052, -104.21295 35.9..."


In [4]:
# HUC8
# State: Utah
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
# dfs_1HUC8_UT = dfs_1HUC8[(dfs_1HUC8.STATES == 'UT')]
dfs_1HUC8_UT = dfs_1HUC8
df_1RU_HUC8_UT = df_1RU[(df_1RU.ReportingUnitTypeCV == 'HUC8') & ((df_1RU.StateCV == 'UT'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_HUC8_UT.ReportingUnitUUID.values, index = df_1RU_HUC8_UT.ReportingUnitNativeID).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_1HUC8_UT['ReportingUnitUUID'] = dfs_1HUC8_UT.apply(lambda row: retrieveCountyName(row['HUC8']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_1HUC8_UT = pd.merge(dfs_1HUC8_UT, df_1RU_HUC8_UT, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2HUC8_UT = pd.DataFrame() #empty dataframe
dfs_2HUC8_UT['OBJECTID'] = dfs_1HUC8_UT.index
dfs_2HUC8_UT['Shape'] = 'Polygon'
dfs_2HUC8_UT['ReportingUnitID'] = dfs_1HUC8_UT['ReportingUnitID']
dfs_2HUC8_UT['ReportingUnitUUID'] = dfs_1HUC8_UT['ReportingUnitUUID']
dfs_2HUC8_UT['ReportingUnitNativeID'] = dfs_1HUC8_UT['ReportingUnitNativeID']
dfs_2HUC8_UT['ReportingUnitName'] = dfs_1HUC8_UT['ReportingUnitName']
dfs_2HUC8_UT['ReportingUnitTypeCV'] = dfs_1HUC8_UT['ReportingUnitTypeCV']
dfs_2HUC8_UT['StateCV'] = dfs_1HUC8_UT['StateCV']
dfs_2HUC8_UT['Shape_Length'] = dfs_1HUC8_UT['Shape_Leng']
dfs_2HUC8_UT['Shape_Area'] = dfs_1HUC8_UT['Shape_Area']
dfs_2HUC8_UT['geometry'] = dfs_1HUC8_UT['geometry']

# view output
dfs_2HUC8_UT.head(3)

Unnamed: 0,OBJECTID,Shape,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,,,,,,,3.943636,0.450856,"POLYGON ((-104.18034 36.92065, -104.18082 36.9..."
1,1,Polygon,,,,,,,3.00689,0.273469,"POLYGON ((-104.49909 36.32423, -104.49957 36.3..."
2,2,Polygon,,,,,,,6.201702,0.531733,"POLYGON ((-104.21278 35.94052, -104.21295 35.9..."


In [5]:
# HUC8
# State: CO
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
# dfs_1HUC8_CO = dfs_1HUC8[(dfs_1HUC8.STATES == 'CO')]
dfs_1HUC8_CO = dfs_1HUC8
df_1RU_HUC8_CO = df_1RU[(df_1RU.ReportingUnitTypeCV == 'HUC8') & ((df_1RU.StateCV == 'CO'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_HUC8_CO.ReportingUnitUUID.values, index = df_1RU_HUC8_CO.ReportingUnitNativeID).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_1HUC8_CO['ReportingUnitUUID'] = dfs_1HUC8_CO.apply(lambda row: retrieveCountyName(row['HUC8']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_1HUC8_CO = pd.merge(dfs_1HUC8_CO, df_1RU_HUC8_CO, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2HUC8_CO = pd.DataFrame() #empty dataframe
dfs_2HUC8_CO['OBJECTID'] = dfs_1HUC8_CO.index
dfs_2HUC8_CO['Shape'] = 'Polygon'
dfs_2HUC8_CO['ReportingUnitID'] = dfs_1HUC8_CO['ReportingUnitID']
dfs_2HUC8_CO['ReportingUnitUUID'] = dfs_1HUC8_CO['ReportingUnitUUID']
dfs_2HUC8_CO['ReportingUnitNativeID'] = dfs_1HUC8_CO['ReportingUnitNativeID']
dfs_2HUC8_CO['ReportingUnitName'] = dfs_1HUC8_CO['ReportingUnitName']
dfs_2HUC8_CO['ReportingUnitTypeCV'] = dfs_1HUC8_CO['ReportingUnitTypeCV']
dfs_2HUC8_CO['StateCV'] = dfs_1HUC8_CO['StateCV']
dfs_2HUC8_CO['Shape_Length'] = dfs_1HUC8_CO['Shape_Leng']
dfs_2HUC8_CO['Shape_Area'] = dfs_1HUC8_CO['Shape_Area']
dfs_2HUC8_CO['geometry'] = dfs_1HUC8_CO['geometry']

# view output
dfs_2HUC8_CO.head(3)

Unnamed: 0,OBJECTID,Shape,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,,,,,,,3.943636,0.450856,"POLYGON ((-104.18034 36.92065, -104.18082 36.9..."
1,1,Polygon,,,,,,,3.00689,0.273469,"POLYGON ((-104.49909 36.32423, -104.49957 36.3..."
2,2,Polygon,,,,,,,6.201702,0.531733,"POLYGON ((-104.21278 35.94052, -104.21295 35.9..."


In [6]:
# Merge dataframes
frames = [dfs_2HUC8_UT, dfs_2HUC8_CO]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,,,,,,,3.943636,0.450856,"POLYGON ((-104.18034 36.92065, -104.18082 36.9..."
1,1,Polygon,,,,,,,3.00689,0.273469,"POLYGON ((-104.49909 36.32423, -104.49957 36.3..."
2,2,Polygon,,,,,,,6.201702,0.531733,"POLYGON ((-104.21278 35.94052, -104.21295 35.9..."


In [7]:
# drop NA rows
outdf = outdf.dropna(subset=['ReportingUnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,Shape_Length,Shape_Area,geometry
55,55,Polygon,434.0,15010013,15010013,Meadow Valley Wash,HUC8,UT,6.318081,0.670572,"POLYGON ((-114.03649 37.95701, -114.03647 37.9..."
59,59,Polygon,433.0,15010010,15010010,Lower Virgin,HUC8,UT,4.699079,0.540441,"POLYGON ((-114.06750 37.60696, -114.06708 37.6..."
97,97,Polygon,432.0,15010009,15010009,Fort Pearce Wash,HUC8,UT,3.953473,0.436551,"POLYGON ((-112.75584 37.06340, -112.75670 37.0..."
224,224,Polygon,428.0,14080204,14080204,Chinle,HUC8,UT,7.769751,1.066831,"POLYGON ((-109.17363 36.79355, -109.17375 36.7..."
227,227,Polygon,423.0,14070006,14070006,Lower Lake Powell,HUC8,UT,5.126493,0.770043,"POLYGON ((-111.00805 37.22851, -111.00779 37.2..."
...,...,...,...,...,...,...,...,...,...,...,...
2034,2034,Polygon,20881.0,COag_RU34,11020009,Upper Arkansas-John Martin Reservoir,HUC8,CO,7.004299,1.026645,"POLYGON ((-103.63509 38.80092, -103.63364 38.8..."
2037,2037,Polygon,20884.0,COag_RU37,14020002,Upper Gunnison,HUC8,CO,5.238936,0.643883,"POLYGON ((-107.03062 38.83861, -107.03026 38.8..."
2038,2038,Polygon,20878.0,COag_RU31,14020003,Tomichi,HUC8,CO,3.160962,0.294426,"POLYGON ((-106.56505 38.69850, -106.56484 38.6..."
2039,2039,Polygon,20862.0,COag_RU15,14020005,Lower Gunnison,HUC8,CO,4.109297,0.446387,"POLYGON ((-107.85652 39.07754, -107.85617 39.0..."


In [8]:
# Export the dataframe to a shapefile.
# Note: rememeber that shapefiles autofil in fields names, will need to fix field names in app upload.
dfsOut = gpd.GeoDataFrame(outdf, geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/P_WaDEHUC8.shp") # export shape file