# Creating Shapefiles for Web App - Counties

Inputs: 
1) Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.

2) WaDECounties.shp.  Shapefile of Unitied State counties.  

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,EPSGCodeCV
0,20733,NM_C_1,35001,Bernalillo,County,NM,EPSG:4326
1,20734,NM_C_2,35003,Catron,County,NM,EPSG:4326
2,20735,NM_C_3,35005,Chaves,County,NM,EPSG:4326


In [3]:
# Grab the WaDE County Shapefile.
shapefile_input = gpd.read_file('SourceFiles/Counties/WaDECounties.shp')
dfs_1County = pd.DataFrame(shapefile_input)
dfs_1County.head(3)

Unnamed: 0,GEOID,NAME,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,StateCV,geometry
0,40011,Blaine,36,36-40011,2.082196,0.242637,1_Blaine_36,OK,"POLYGON ((-98.63690 36.16489, -98.21054 36.164..."
1,40065,Jackson,36,36-40065,3.125862,0.20441,1_Jackson_36,OK,"POLYGON ((-99.84466 34.50679, -99.66651 34.507..."
2,40079,Le Flore,36,36-40079,3.07134,0.411717,1_Le Flore_36,OK,"POLYGON ((-95.06005 34.80396, -95.05951 34.855..."


In [4]:
# t1 = gpd.read_file('SourceFiles/Counties/WaDECounties.shp')
# t1.crs

In [5]:
# County
# State: Utah
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
dfs_1County_UT = dfs_1County[(dfs_1County.StateCV == 'UT')]
df_1RU_County_UT = df_1RU[(df_1RU.ReportingUnitTypeCV == 'County') & ((df_1RU.StateCV == 'UT'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_County_UT.ReportingUnitUUID.values, index = df_1RU_County_UT.ReportingUnitName).to_dict()
def retrieveCountyName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_1County_UT['ReportingUnitUUID'] = dfs_1County_UT.apply(lambda row: retrieveCountyName(row['NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_1County_UT = pd.merge(dfs_1County_UT, df_1RU_County_UT, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2County_UT = pd.DataFrame() #empty dataframe
dfs_2County_UT['OBJECTID'] = dfs_1County_UT.index
dfs_2County_UT['Shape'] = 'Polygon'
dfs_2County_UT['UnitID'] = dfs_1County_UT['ReportingUnitID']
dfs_2County_UT['UnitUUID'] = dfs_1County_UT['ReportingUnitUUID']
dfs_2County_UT['NativeID'] = dfs_1County_UT['ReportingUnitNativeID']
dfs_2County_UT['Name'] = dfs_1County_UT['ReportingUnitName']
dfs_2County_UT['TypeCV'] = dfs_1County_UT['ReportingUnitTypeCV']
dfs_2County_UT['StateCV'] = dfs_1County_UT['StateCV_y']
dfs_2County_UT['Shape_Length'] = dfs_1County_UT['Shape_Leng']
dfs_2County_UT['Shape_Area'] = dfs_1County_UT['Shape_Area']
dfs_2County_UT['geometry'] = dfs_1County_UT['geometry']

# view output
dfs_2County_UT.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_1County_UT['ReportingUnitUUID'] = dfs_1County_UT.apply(lambda row: retrieveCountyName(row['NAME']), axis=1)


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,24128,UTag_RU1,49001,Beaver,County,UT,4.225326,0.690759,"POLYGON ((-114.05049 38.49996, -114.05015 38.5..."
1,1,Polygon,24129,UTag_RU2,49003,Box Elder,County,UT,6.110898,1.880748,"POLYGON ((-114.04255 41.21092, -114.04172 41.9..."
2,2,Polygon,24151,UTag_RU24,49013,Duchesne,County,UT,3.918885,0.891891,"POLYGON ((-110.90435 40.70150, -110.89777 40.7..."


In [6]:
# County
# State: New Mexico
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
dfs_1County_NM = dfs_1County[(dfs_1County.StateCV == 'NM')]
df_1RU_County_NM = df_1RU[(df_1RU.ReportingUnitTypeCV == 'County') & ((df_1RU.StateCV == 'NM'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_County_NM.ReportingUnitUUID.values, index = df_1RU_County_NM.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_1County_NM['ReportingUnitUUID'] = dfs_1County_NM.apply(lambda row: retrieveUUID(row['NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_1County_NM = pd.merge(dfs_1County_NM, df_1RU_County_NM, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2County_NM = pd.DataFrame() #empty dataframe
dfs_2County_NM['OBJECTID'] = dfs_1County_NM.index
dfs_2County_NM['Shape'] = 'Polygon'
dfs_2County_NM['UnitID'] = dfs_1County_NM['ReportingUnitID']
dfs_2County_NM['UnitUUID'] = dfs_1County_NM['ReportingUnitUUID']
dfs_2County_NM['NativeID'] = dfs_1County_NM['ReportingUnitNativeID']
dfs_2County_NM['Name'] = dfs_1County_NM['ReportingUnitName']
dfs_2County_NM['TypeCV'] = dfs_1County_NM['ReportingUnitTypeCV']
dfs_2County_NM['StateCV'] = dfs_1County_NM['StateCV_y']
dfs_2County_NM['Shape_Length'] = dfs_1County_NM['Shape_Leng']
dfs_2County_NM['Shape_Area'] = dfs_1County_NM['Shape_Area']
dfs_2County_NM['geometry'] = dfs_1County_NM['geometry']

# # view output
dfs_2County_NM.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_1County_NM['ReportingUnitUUID'] = dfs_1County_NM.apply(lambda row: retrieveUUID(row['NAME']), axis=1)


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,20739,NM_C_7,35011,De Baca,County,NM,3.45123,0.592267,"POLYGON ((-104.89241 34.25992, -104.89202 34.6..."
1,1,Polygon,20749,NM_C_17,35029,Luna,County,NM,3.504984,0.734031,"POLYGON ((-108.22981 32.20716, -108.22934 32.5..."
2,2,Polygon,20751,NM_C_19,35033,Mora,County,NM,3.907811,0.499959,"POLYGON ((-105.72471 35.90021, -105.71861 35.9..."


In [7]:
# County
# State: Texas
# Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
dfs_1County_TX = dfs_1County[(dfs_1County.StateCV == 'TX')]
df_1RU_County_TX = df_1RU[(df_1RU.ReportingUnitTypeCV == 'County') & ((df_1RU.StateCV == 'TX'))]
df_1RU_County_TX['ReportingUnitName'] = df_1RU_County_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_County_TX.ReportingUnitUUID.values, index = df_1RU_County_TX.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_1County_TX['ReportingUnitUUID'] = dfs_1County_TX.apply(lambda row: retrieveUUID(row['NAME']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_1County_TX = pd.merge(dfs_1County_TX, df_1RU_County_TX, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2County_TX = pd.DataFrame() #empty dataframe
dfs_2County_TX['OBJECTID'] = dfs_1County_TX.index
dfs_2County_TX['Shape'] = 'Polygon'
dfs_2County_TX['UnitID'] = dfs_1County_TX['ReportingUnitID']
dfs_2County_TX['UnitUUID'] = dfs_1County_TX['ReportingUnitUUID']
dfs_2County_TX['NativeID'] = dfs_1County_TX['ReportingUnitNativeID']
dfs_2County_TX['Name'] = dfs_1County_TX['ReportingUnitName']
dfs_2County_TX['TypeCV'] = dfs_1County_TX['ReportingUnitTypeCV']
dfs_2County_TX['StateCV'] = dfs_1County_TX['StateCV_y']
dfs_2County_TX['Shape_Length'] = dfs_1County_TX['Shape_Leng']
dfs_2County_TX['Shape_Area'] = dfs_1County_TX['Shape_Area']
dfs_2County_TX['geometry'] = dfs_1County_TX['geometry']

# # view output
dfs_2County_TX.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1RU_County_TX['ReportingUnitName'] = df_1RU_County_TX['ReportingUnitName'].str.title()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_1County_TX['ReportingUnitUUID'] = dfs_1County_TX.apply(lambda row: retrieveUUID(row['NAME']), axis=1)


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,22854,TX_48349,48349,Goliad,County,TX,2.040917,0.205341,"POLYGON ((-97.77853 28.66803, -97.41734 28.925..."
1,1,Polygon,22859,TX_48369,48369,Grimes,County,TX,2.192218,0.195546,"POLYGON ((-96.18831 30.59961, -96.18678 30.605..."
2,2,Polygon,22874,TX_48429,48429,Hidalgo,County,TX,3.16572,0.371341,"POLYGON ((-98.58529 26.26027, -98.32067 26.783..."


In [8]:
# Merge dataframes
frames = [dfs_2County_UT, dfs_2County_NM, dfs_2County_TX]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,24128,UTag_RU1,49001,Beaver,County,UT,4.225326,0.690759,"POLYGON ((-114.05049 38.49996, -114.05015 38.5..."
1,1,Polygon,24129,UTag_RU2,49003,Box Elder,County,UT,6.110898,1.880748,"POLYGON ((-114.04255 41.21092, -114.04172 41.9..."
2,2,Polygon,24151,UTag_RU24,49013,Duchesne,County,UT,3.918885,0.891891,"POLYGON ((-110.90435 40.70150, -110.89777 40.7..."


In [9]:
# drop NA rows
outdf = outdf.dropna(subset=['UnitID'])
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,Shape_Length,Shape_Area,geometry
0,0,Polygon,24128,UTag_RU1,49001,Beaver,County,UT,4.225326,0.690759,"POLYGON ((-114.05049 38.49996, -114.05015 38.5..."
1,1,Polygon,24129,UTag_RU2,49003,Box Elder,County,UT,6.110898,1.880748,"POLYGON ((-114.04255 41.21092, -114.04172 41.9..."
2,2,Polygon,24151,UTag_RU24,49013,Duchesne,County,UT,3.918885,0.891891,"POLYGON ((-110.90435 40.70150, -110.89777 40.7..."
3,3,Polygon,24152,UTag_RU25,49015,Emery,County,UT,6.238504,1.203165,"POLYGON ((-111.30701 38.67233, -111.29914 38.6..."
4,4,Polygon,24154,UTag_RU27,49019,Grand,County,UT,5.230998,0.991719,"POLYGON ((-110.17897 38.90920, -110.17447 38.9..."
...,...,...,...,...,...,...,...,...,...,...,...
249,249,Polygon,22791,TX_48097,48097,Brown,County,TX,2.062885,0.235828,"POLYGON ((-99.20341 31.75822, -99.19587 32.079..."
250,250,Polygon,22805,TX_48153,48153,Clay,County,TX,2.426910,0.281454,"POLYGON ((-98.42358 33.83605, -98.42353 34.082..."
251,251,Polygon,22846,TX_48317,48317,Franklin,County,TX,1.267760,0.073747,"POLYGON ((-95.30872 32.99456, -95.30859 33.377..."
252,252,Polygon,22871,TX_48417,48417,Hays,County,TX,1.795378,0.164304,"POLYGON ((-98.29417 30.04680, -98.17298 30.356..."


In [10]:
# Export the dataframe to a shapefile.
# Note: rememeber that shapefiles autofil in fields names, will need to fix field names in app upload.
dfsOut = gpd.GeoDataFrame(outdf, crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/P_WaDECounties.shp") # export shape file

### Bonus Code