# Creating Shapefiles for Web App - Custom Basin 2_DAUCO

Inputs: 
- Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.
- Detailed Analysis Units by County source shapefile.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,WaDEName
0,27796,AZag_RU1,WaDEAZ_RU1,PHOENIX AMA,Active Management Area,AZ,Custom Basin
1,27797,AZag_RU2,WaDEAZ_RU2,PINAL AMA,Active Management Area,AZ,Custom Basin
2,27798,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,Custom Basin


### California - Detailed Analysis Units by County

In [3]:
# Grab the CA Planning Area Shapefile.
# Paring RU_ID to ReportingUnitNativeID

CADAUCOshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Custom/CA/WaDECADAU.shp')
dfs_CADAUCO = pd.DataFrame(CADAUCOshapefile_input)
dfs_CADAUCO.head(3)

Unnamed: 0,HR_Code,PA_Num,Shape_Leng,Shape_Le_1,Shape_Area,RU_ID,geometry
0,1,101,2.962973,2.962973,0.330267,DAU00125,"POLYGON ((-121.08710 41.99514, -120.70108 41.9..."
1,1,101,2.002975,2.002975,0.147869,DAU00147,"POLYGON ((-121.88226 42.00329, -121.44784 41.9..."
2,1,101,2.012166,2.012166,0.166514,DAU00247,"POLYGON ((-122.02221 42.00440, -121.94694 42.0..."


In [4]:
# Custom
# State: CA, Detailed Analysis Units by County
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_CAdauco = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Detailed Analysis Units by County') & (df_1RU.StateCV == 'CA')]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_CAdauco.ReportingUnitUUID.values, index = df_1RU_Custom_CAdauco.ReportingUnitNativeID).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_CADAUCO['ReportingUnitUUID'] = dfs_CADAUCO.apply(lambda row: retrieveCountyName(row['RU_ID']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_CADAUCO = pd.merge(dfs_CADAUCO, df_1RU_Custom_CAdauco, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2CADAUCO = pd.DataFrame() #empty dataframe
dfs_2CADAUCO['OBJECTID'] = dfs_CADAUCO.index
dfs_2CADAUCO['Shape'] = 'Polygon'
dfs_2CADAUCO['UnitID'] = dfs_CADAUCO['ReportingUnitID']
dfs_2CADAUCO['UnitUUID'] = dfs_CADAUCO['ReportingUnitUUID']
dfs_2CADAUCO['NativeID'] = dfs_CADAUCO['ReportingUnitNativeID']
dfs_2CADAUCO['Name'] = dfs_CADAUCO['ReportingUnitName']
dfs_2CADAUCO['TypeCV'] = dfs_CADAUCO['ReportingUnitTypeCV']
dfs_2CADAUCO['StateCV'] = dfs_CADAUCO['StateCV']
dfs_2CADAUCO['geometry'] = dfs_CADAUCO['geometry']

# view output
dfs_2CADAUCO.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26761.0,CAag_RU67,DAU00125,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.08710 41.99514, -120.70108 41.9..."
1,1,Polygon,26762.0,CAag_RU68,DAU00147,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.88226 42.00329, -121.44784 41.9..."
2,2,Polygon,26763.0,CAag_RU69,DAU00247,Butte Valley,Detailed Analysis Units by County,CA,"POLYGON ((-122.02221 42.00440, -121.94694 42.0..."


### Concatenate and Export

In [5]:
# Merge dataframes
frames = [dfs_2CADAUCO]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26761.0,CAag_RU67,DAU00125,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.08710 41.99514, -120.70108 41.9..."
1,1,Polygon,26762.0,CAag_RU68,DAU00147,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.88226 42.00329, -121.44784 41.9..."
2,2,Polygon,26763.0,CAag_RU69,DAU00247,Butte Valley,Detailed Analysis Units by County,CA,"POLYGON ((-122.02221 42.00440, -121.94694 42.0..."


In [6]:
# drop NA rows
outdf = outdf.dropna(subset=['UnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26761.0,CAag_RU67,DAU00125,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.08710 41.99514, -120.70108 41.9..."
1,1,Polygon,26762.0,CAag_RU68,DAU00147,Lost River,Detailed Analysis Units by County,CA,"POLYGON ((-121.88226 42.00329, -121.44784 41.9..."
2,2,Polygon,26763.0,CAag_RU69,DAU00247,Butte Valley,Detailed Analysis Units by County,CA,"POLYGON ((-122.02221 42.00440, -121.94694 42.0..."
3,3,Polygon,26764.0,CAag_RU70,DAU00347,Scott Valley,Detailed Analysis Units by County,CA,"POLYGON ((-122.81946 41.76864, -122.79461 41.7..."
4,4,Polygon,26765.0,CAag_RU71,DAU00447,Shasta Valley,Detailed Analysis Units by County,CA,"POLYGON ((-122.58104 41.82717, -122.56769 41.8..."
...,...,...,...,...,...,...,...,...,...
514,514,Polygon,27241.0,CAag_RU547,DAU40323,Upper Russian,Detailed Analysis Units by County,CA,"POLYGON ((-123.18216 39.24063, -123.18714 39.2..."
516,516,Polygon,27242.0,CAag_RU548,DAU40423,Middle Russian,Detailed Analysis Units by County,CA,"POLYGON ((-122.85634 38.86348, -122.84992 38.8..."
517,517,Polygon,27243.0,CAag_RU549,DAU40449,Middle Russian,Detailed Analysis Units by County,CA,"POLYGON ((-122.82249 38.85118, -122.81670 38.8..."
518,518,Polygon,27244.0,CAag_RU550,DAU40523,Dry Creek,Detailed Analysis Units by County,CA,"POLYGON ((-123.10947 38.87033, -123.11125 38.8..."


In [7]:
# Export the dataframe to a shapefile.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/WaDE_CustomBasin2_DAUCO.shp") # export shape file