# Creating Shapefiles for Web App - Custom_C

Inputs: 
1) Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,EPSGCodeCV
0,20733,NM_C_1,35001,Bernalillo,County,NM,EPSG:4326
1,20734,NM_C_2,35003,Catron,County,NM,EPSG:4326
2,20735,NM_C_3,35005,Chaves,County,NM,EPSG:4326


### California - Hydrologic Region

In [3]:
# Grab the CA Planning Area Shapefile.
# Paring HR_NAME to ReportingUnitName

CAHRshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/SourceFiles/Custom/CA/CA_Hydrologic_Regions.shp')
dfs_HR = pd.DataFrame(CAHRshapefile_input)
dfs_HR.head(3)

Unnamed: 0,OBJECTID,HR_NAME,geometry
0,13,Central Coast,"POLYGON ((-122.11808 37.25528, -122.11795 37.2..."
1,14,Colorado River,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
2,15,North Coast,"POLYGON ((-122.30410 42.00836, -122.28218 42.0..."


In [4]:
# Custom
# State: CA, Hydrologic Regions
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_CAHR = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Hydrologic Region') & (df_1RU.StateCV == 'CA')]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_CAHR.ReportingUnitUUID.values, index = df_1RU_Custom_CAHR.ReportingUnitName).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_HR['ReportingUnitUUID'] = dfs_HR.apply(lambda row: retrieveCountyName(row['HR_NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_HR = pd.merge(dfs_HR, df_1RU_Custom_CAHR, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2HR = pd.DataFrame() #empty dataframe
dfs_2HR['OBJECTID'] = dfs_HR.index
dfs_2HR['Shape'] = 'Polygon'
dfs_2HR['UnitID'] = dfs_HR['ReportingUnitID']
dfs_2HR['UnitUUID'] = dfs_HR['ReportingUnitUUID']
dfs_2HR['NativeID'] = dfs_HR['ReportingUnitNativeID']
dfs_2HR['Name'] = dfs_HR['ReportingUnitName']
dfs_2HR['TypeCV'] = dfs_HR['ReportingUnitTypeCV']
dfs_2HR['StateCV'] = dfs_HR['StateCV']
dfs_2HR['geometry'] = dfs_HR['geometry']

# view output
dfs_2HR.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25967,CAag_RU57,3,Central Coast,Hydrologic Region,CA,"POLYGON ((-122.11808 37.25528, -122.11795 37.2..."
1,1,Polygon,25968,CAag_RU58,10,Colorado River,Hydrologic Region,CA,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
2,2,Polygon,25969,CAag_RU59,1,North Coast,Hydrologic Region,CA,"POLYGON ((-122.30410 42.00836, -122.28218 42.0..."


In [5]:
# Merge dataframes
frames = [dfs_2HR]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25967,CAag_RU57,3,Central Coast,Hydrologic Region,CA,"POLYGON ((-122.11808 37.25528, -122.11795 37.2..."
1,1,Polygon,25968,CAag_RU58,10,Colorado River,Hydrologic Region,CA,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
2,2,Polygon,25969,CAag_RU59,1,North Coast,Hydrologic Region,CA,"POLYGON ((-122.30410 42.00836, -122.28218 42.0..."


In [6]:
# drop NA rows
outdf = outdf.dropna(subset=['UnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,25967,CAag_RU57,3,Central Coast,Hydrologic Region,CA,"POLYGON ((-122.11808 37.25528, -122.11795 37.2..."
1,1,Polygon,25968,CAag_RU58,10,Colorado River,Hydrologic Region,CA,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
2,2,Polygon,25969,CAag_RU59,1,North Coast,Hydrologic Region,CA,"POLYGON ((-122.30410 42.00836, -122.28218 42.0..."
3,3,Polygon,25971,CAag_RU60,8,North Lahontan,Hydrologic Region,CA,"POLYGON ((-119.99946 41.99466, -119.99940 41.9..."
4,4,Polygon,25972,CAag_RU61,5,Sacramento River,Hydrologic Region,CA,"POLYGON ((-120.20882 41.99296, -120.20892 41.9..."
5,5,Polygon,25973,CAag_RU62,2,San Francisco Bay,Hydrologic Region,CA,"POLYGON ((-122.60736 38.65174, -122.60694 38.6..."
6,6,Polygon,25974,CAag_RU63,6,San Joaquin River,Hydrologic Region,CA,"POLYGON ((-120.53749 38.75047, -120.53636 38.7..."
7,7,Polygon,25975,CAag_RU64,4,South Coast,Hydrologic Region,CA,"POLYGON ((-119.10918 34.82375, -119.10905 34.8..."
8,8,Polygon,25976,CAag_RU65,9,South Lahontan,Hydrologic Region,CA,"POLYGON ((-118.88460 38.22193, -118.87599 38.2..."
9,9,Polygon,25977,CAag_RU66,7,Tulare Lake,Hydrologic Region,CA,"POLYGON ((-118.89596 37.20829, -118.89539 37.2..."


In [7]:
# Export the dataframe to a shapefile.
# Note: rememeber that shapefiles autofill in fields names, will need to fix field names in app upload.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/P_Custom_C.shp") # export shape file