# Creating Shapefiles for Web App - Basin

Inputs: 
- Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.
- Texas basins source shapefile.
- Wyoming basins source shapefile.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape"
os.chdir(workingDir)

# Grab AggreagatedAmounts & ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,WaDEName
0,27796,AZag_RU1,WaDEAZ_RU1,PHOENIX AMA,Active Management Area,AZ,Custom Basin
1,27797,AZag_RU2,WaDEAZ_RU2,PINAL AMA,Active Management Area,AZ,Custom Basin
2,27798,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,Custom Basin


### Texas - Basin

In [3]:
# Grab the TX Basin Shapefile.
# Paring RU_Name to ReportingUnitName

TXBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Basins/WaDEBasins.shp')
dfs_TXB = pd.DataFrame(TXBshapefile_input)
dfs_TXB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [4]:
# Custom
# State: TX, Basin
# Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_TX = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'TX'))]
df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_TX.ReportingUnitUUID.values, index = df_1RU_Custom_TX.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_TXB['ReportingUnitUUID'] = dfs_TXB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_TXB = pd.merge(dfs_TXB, df_1RU_Custom_TX, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2TXB = pd.DataFrame() #empty dataframe
dfs_2TXB['OBJECTID'] = dfs_TXB.index
dfs_2TXB['Shape'] = 'Polygon'
dfs_2TXB['UnitID'] = dfs_TXB['ReportingUnitID']
dfs_2TXB['UnitUUID'] = dfs_TXB['ReportingUnitUUID']
dfs_2TXB['NativeID'] = dfs_TXB['ReportingUnitNativeID']
dfs_2TXB['Name'] = dfs_TXB['ReportingUnitName']
dfs_2TXB['TypeCV'] = dfs_TXB['ReportingUnitTypeCV']
dfs_2TXB['StateCV'] = dfs_TXB['StateCV']
dfs_2TXB['geometry'] = dfs_TXB['geometry']

#view output
dfs_2TXB.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27801.0,TXag_RU1,WaDETX_RU1,Brazos,Basin,TX,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,27912.0,TXag_RU2,WaDETX_RU2,Brazos-Colorado,Basin,TX,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,28001.0,TXag_RU3,WaDETX_RU3,Canadian,Basin,TX,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


### Wyoming - Basin

In [5]:
# Grab the WY Basin Shapefile.
# Paring RU_Name to ReportingUnitName

WYBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Basins/WaDEBasins.shp')
dfs_WYB = pd.DataFrame(WYBshapefile_input)
dfs_WYB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [6]:
# Custom
# State: WY, Basin
# # Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_WY = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'WY'))]
# df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_WY.ReportingUnitUUID.values, index = df_1RU_Custom_WY.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_WYB['ReportingUnitUUID'] = dfs_WYB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)
dfs_WYB
# Merging temporal dataframes into one, using left-join.
dfs_WYB = pd.merge(dfs_WYB, df_1RU_Custom_WY, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

# Creating new output state specific dataframe with fields of interest.
dfs_2WYB = pd.DataFrame() #empty dataframe
dfs_2WYB['OBJECTID'] = dfs_WYB.index
dfs_2WYB['Shape'] = 'Polygon'
dfs_2WYB['UnitID'] = dfs_WYB['ReportingUnitID']
dfs_2WYB['UnitUUID'] = dfs_WYB['ReportingUnitUUID']
dfs_2WYB['NativeID'] = dfs_WYB['ReportingUnitNativeID']
dfs_2WYB['Name'] = dfs_WYB['ReportingUnitName']
dfs_2WYB['TypeCV'] = dfs_WYB['ReportingUnitTypeCV']
dfs_2WYB['StateCV'] = dfs_WYB['StateCV']
dfs_2WYB['geometry'] = dfs_WYB['geometry']

#view output
dfs_2WYB.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,,,,,,,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,,,,,,,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,,,,,,,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


### Concatenate and Export

In [7]:
# Merge dataframes
frames = [dfs_2TXB, dfs_2WYB]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27801.0,TXag_RU1,WaDETX_RU1,Brazos,Basin,TX,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,27912.0,TXag_RU2,WaDETX_RU2,Brazos-Colorado,Basin,TX,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,28001.0,TXag_RU3,WaDETX_RU3,Canadian,Basin,TX,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [8]:
# Export the dataframe to a shapefile.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/WaDE_Basin.shp") # export shape file