# Creating Shapefiles for Web App - Basin

Inputs: 
- Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.
- Texas basins source shapefile.
- Wyoming basins source shapefile.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape"
os.chdir(workingDir)

# Grab AggreagatedAmounts & ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,WaDEName
0,27796,AZag_RU1,WaDEAZ_RU1,PHOENIX AMA,Active Management Area,AZ,Custom Basin
1,27797,AZag_RU2,WaDEAZ_RU2,PINAL AMA,Active Management Area,AZ,Custom Basin
2,27798,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,Custom Basin


### Texas - Basin

In [3]:
# Grab the TX Basin Shapefile.
# Paring RU_Name to ReportingUnitName

TXBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Basins/WaDEBasins.shp')
dfs_TXB = pd.DataFrame(TXBshapefile_input)
dfs_TXB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [4]:
# Custom
# State: TX, Basin
# Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_TX = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'TX'))]
df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_TX.ReportingUnitUUID.values, index = df_1RU_Custom_TX.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_TXB['ReportingUnitUUID'] = dfs_TXB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_TXB = pd.merge(dfs_TXB, df_1RU_Custom_TX, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')
dfs_TXB = dfs_TXB.dropna().reset_index()

# Creating new output state specific dataframe with fields of interest.
dfs_2TXB = pd.DataFrame() #empty dataframe
dfs_2TXB['OBJECTID'] = dfs_TXB.index
dfs_2TXB['Shape'] = 'Polygon'
dfs_2TXB['UnitID'] = dfs_TXB['ReportingUnitID']
dfs_2TXB['UnitUUID'] = dfs_TXB['ReportingUnitUUID']
dfs_2TXB['NativeID'] = dfs_TXB['ReportingUnitNativeID']
dfs_2TXB['Name'] = dfs_TXB['ReportingUnitName']
dfs_2TXB['TypeCV'] = dfs_TXB['ReportingUnitTypeCV']
dfs_2TXB['StateCV'] = dfs_TXB['StateCV']
dfs_2TXB['geometry'] = dfs_TXB['geometry']

#view output
dfs_2TXB

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1RU_Custom_TX['ReportingUnitName'] = df_1RU_Custom_TX['ReportingUnitName'].str.title()


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27801.0,TXag_RU1,WaDETX_RU1,Brazos,Basin,TX,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,27912.0,TXag_RU2,WaDETX_RU2,Brazos-Colorado,Basin,TX,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,28001.0,TXag_RU3,WaDETX_RU3,Canadian,Basin,TX,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."
3,3,Polygon,28012.0,TXag_RU4,WaDETX_RU4,Colorado,Basin,TX,"MULTIPOLYGON (((-95.98255 28.59958, -95.98390 ..."
4,4,Polygon,28023.0,TXag_RU5,WaDETX_RU5,Colorado-Lavaca,Basin,TX,"MULTIPOLYGON (((-96.37258 28.38773, -96.37073 ..."
5,5,Polygon,28034.0,TXag_RU6,WaDETX_RU6,Cypress,Basin,TX,"POLYGON ((-94.04301 33.14548, -94.04290 32.382..."
6,6,Polygon,28045.0,TXag_RU7,WaDETX_RU7,Guadalupe,Basin,TX,"MULTIPOLYGON (((-99.07658 29.85230, -99.07661 ..."
7,7,Polygon,28056.0,TXag_RU8,WaDETX_RU8,Lavaca,Basin,TX,"POLYGON ((-96.28305 29.25331, -96.30103 29.227..."
8,8,Polygon,28067.0,TXag_RU9,WaDETX_RU9,Lavaca-Guadalupe,Basin,TX,"MULTIPOLYGON (((-96.84942 28.06996, -96.84513 ..."
9,9,Polygon,27802.0,TXag_RU10,WaDETX_RU10,Neches,Basin,TX,"POLYGON ((-93.84042 30.03340, -93.86840 29.996..."


### Wyoming - Basin

In [5]:
# Grab the WY Basin Shapefile.
# Paring RU_Name to ReportingUnitName

WYBshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/Basins/WaDEBasins.shp')
dfs_WYB = pd.DataFrame(WYBshapefile_input)
dfs_WYB.head(3)

Unnamed: 0,RU_ID,RU_Name,StateNum,State_RU,Shape_Leng,Shape_Area,TypeNameNu,geometry
0,12,Brazos,37,37-12,28.211813,10.653969,3_Brazos_37,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,13,Brazos-Colorado,37,37-13,7.2726,0.431697,3_Brazos-Colorado_37,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,1,Canadian,37,37-1,9.47117,3.312585,3_Canadian_37,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."


In [6]:
# Custom
# State: WY, Basin
# # Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_Custom_WY = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Basin') & ((df_1RU.StateCV == 'WY'))]

# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_Custom_WY.ReportingUnitUUID.values, index = df_1RU_Custom_WY.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ''
    return outList
dfs_WYB['ReportingUnitUUID'] = dfs_WYB.apply(lambda row: retrieveUUID(row['RU_Name']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_WYB = pd.merge(dfs_WYB, df_1RU_Custom_WY, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')
dfs_WYB = dfs_WYB.dropna().reset_index()

# Creating new output state specific dataframe with fields of interest.
dfs_2WYB = pd.DataFrame() #empty dataframe
dfs_2WYB['OBJECTID'] = dfs_WYB.index
dfs_2WYB['Shape'] = 'Polygon'
dfs_2WYB['UnitID'] = dfs_WYB['ReportingUnitID']
dfs_2WYB['UnitUUID'] = dfs_WYB['ReportingUnitUUID']
dfs_2WYB['NativeID'] = dfs_WYB['ReportingUnitNativeID']
dfs_2WYB['Name'] = dfs_WYB['ReportingUnitName']
dfs_2WYB['TypeCV'] = dfs_WYB['ReportingUnitTypeCV']
dfs_2WYB['StateCV'] = dfs_WYB['StateCV']
dfs_2WYB['geometry'] = dfs_WYB['geometry']

#view output
dfs_2WYB

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,26014.0,WYag_RU1,WaDEWY_RU1,Bear River Planning Basin,Basin,WY,"POLYGON ((-110.74937 42.60565, -110.72887 42.5..."
1,1,Polygon,26015.0,WYag_RU2,WaDEWY_RU2,Green River Planning Basin,Basin,WY,"POLYGON ((-110.02174 43.43713, -109.99858 43.4..."
2,2,Polygon,26016.0,WYag_RU3,WaDEWY_RU3,Northeast River Planning Basin,Basin,WY,"POLYGON ((-105.00107 45.00031, -104.05819 44.9..."
3,3,Polygon,26017.0,WYag_RU4,WaDEWY_RU4,Platte River Planning Basin,Basin,WY,"POLYGON ((-106.52719 43.19357, -106.52964 43.1..."
4,4,Polygon,26018.0,WYag_RU5,WaDEWY_RU5,Powder-Tongue River Planning Basin,Basin,WY,"POLYGON ((-105.85433 45.00043, -105.04859 44.9..."
5,5,Polygon,26019.0,WYag_RU6,WaDEWY_RU6,Salt-Snake River Planning Basin,Basin,WY,"POLYGON ((-110.63437 44.48433, -110.61510 44.4..."
6,6,Polygon,26020.0,WYag_RU7,WaDEWY_RU7,Wind-Bighorn River Planning Basin,Basin,WY,"POLYGON ((-109.10415 45.00585, -109.06805 44.9..."


### Concatenate and Export

In [7]:
# Merge dataframes
frames = [dfs_2TXB, dfs_2WYB]
outdf = pd.concat(frames)
outdf

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,27801.0,TXag_RU1,WaDETX_RU1,Brazos,Basin,TX,"MULTIPOLYGON (((-95.38128 28.87588, -95.38851 ..."
1,1,Polygon,27912.0,TXag_RU2,WaDETX_RU2,Brazos-Colorado,Basin,TX,"MULTIPOLYGON (((-95.75817 28.73139, -95.75682 ..."
2,2,Polygon,28001.0,TXag_RU3,WaDETX_RU3,Canadian,Basin,TX,"POLYGON ((-100.00040 35.81694, -100.02271 35.8..."
3,3,Polygon,28012.0,TXag_RU4,WaDETX_RU4,Colorado,Basin,TX,"MULTIPOLYGON (((-95.98255 28.59958, -95.98390 ..."
4,4,Polygon,28023.0,TXag_RU5,WaDETX_RU5,Colorado-Lavaca,Basin,TX,"MULTIPOLYGON (((-96.37258 28.38773, -96.37073 ..."
5,5,Polygon,28034.0,TXag_RU6,WaDETX_RU6,Cypress,Basin,TX,"POLYGON ((-94.04301 33.14548, -94.04290 32.382..."
6,6,Polygon,28045.0,TXag_RU7,WaDETX_RU7,Guadalupe,Basin,TX,"MULTIPOLYGON (((-99.07658 29.85230, -99.07661 ..."
7,7,Polygon,28056.0,TXag_RU8,WaDETX_RU8,Lavaca,Basin,TX,"POLYGON ((-96.28305 29.25331, -96.30103 29.227..."
8,8,Polygon,28067.0,TXag_RU9,WaDETX_RU9,Lavaca-Guadalupe,Basin,TX,"MULTIPOLYGON (((-96.84942 28.06996, -96.84513 ..."
9,9,Polygon,27802.0,TXag_RU10,WaDETX_RU10,Neches,Basin,TX,"POLYGON ((-93.84042 30.03340, -93.86840 29.996..."


In [8]:
# Export the dataframe to a shapefile.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/WaDE_Basin.shp") # export shape file