# Creating Shapefiles for Web App - Basin

Inputs: 
- Pagg_ReportingUnit.csv.  Contains reportinug unit info from WaDE database.
- USBR Upper Colorado River Basin by State source shapefile.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Set working directory
workingDir = "C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape"
os.chdir(workingDir)

# Grab AggreagatedAmounts ReportingUnit.csv file.
reportingunits_input = pd.read_csv('SourceFiles/Pagg_ReportingUnit.csv')
df_1RU = pd.DataFrame(reportingunits_input)
df_1RU.head(3)

Unnamed: 0,ReportingUnitID,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,StateCV,WaDEName
0,27796,AZag_RU1,WaDEAZ_RU1,PHOENIX AMA,Active Management Area,AZ,Custom Basin
1,27797,AZag_RU2,WaDEAZ_RU2,PINAL AMA,Active Management Area,AZ,Custom Basin
2,27798,AZag_RU3,WaDEAZ_RU3,PRESCOTT AMA,Active Management Area,AZ,Custom Basin


### USBR - Upper Colorado River Basin by State

In [3]:
# Grab the Upper Colorado River Basin by State shapefile.
# Paring RU_Name to ReportingUnitName

USBRshapefile_input = gpd.read_file('C:/Users/rjame/Documents/RShinyAppPractice/CreateAppShapefiles/App2_AggregatedShape/SourceFiles/USBR/USBR.shp')
dfs_USBR = pd.DataFrame(USBRshapefile_input)
dfs_USBR.head(3)

Unnamed: 0,ReportingU,Reportin_1,StateCV,EPSGCodeCV,TypeNameNu,TypeIDNum,Shape_Leng,Shape_Area,geometry
0,CO_green river,Tributary,US,EPSG:4326,6_Co_Green River_100,6_Unspecified_100,9.245061,2.884388,"POLYGON ((-109.04826 39.50320, -109.05007 40.9..."
1,AZ_colorado river,Tributary,US,EPSG:4326,6_Az_Colorado River_100,6_Unspecified_100,10.432424,1.78856,"POLYGON ((-109.04522 36.99735, -109.04604 36.0..."
2,CO_upper main stem,Tributary,US,EPSG:4326,6_Co_Upper Main Stem_100,6_Unspecified_100,16.910983,5.968905,"POLYGON ((-109.04634 37.92188, -109.04826 39.5..."


In [4]:
# Custom
# State: US (United Sates), USBR Upper Colorado River Basin by State.
# Notes: Need to change the string case of ReportingUnitName to title
###########################################################################

# Create temporal dataframes for state specific and reportingunit type storage
df_1RU_USBR = df_1RU[(df_1RU.ReportingUnitTypeCV == 'Tributary') & ((df_1RU.StateCV == 'US'))]

df_1RU_USBR['ReportingUnitName'] = df_1RU_USBR['ReportingUnitName']


# retreive ReportingUnitUUID.
ReportingUnitUUIDdict = pd.Series(df_1RU_USBR.ReportingUnitUUID.values, index = df_1RU_USBR.ReportingUnitName).to_dict()
def retrieveUUID(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = ReportingUnitUUIDdict[String1]
        except:
            outList = ""
    return outList
dfs_USBR['ReportingUnitUUID'] = dfs_USBR.apply(lambda row: retrieveUUID(row['ReportingU']), axis=1)

# Merging temporal dataframes into one, using left-join.
dfs_USBR = pd.merge(dfs_USBR, df_1RU_USBR, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')
dfs_USBR = dfs_USBR.dropna().reset_index()

# Creating new output state specific dataframe with fields of interest.
dfs_2USBR = pd.DataFrame() #empty dataframe
dfs_2USBR['OBJECTID'] = dfs_USBR.index
dfs_2USBR['Shape'] = 'Polygon'
dfs_2USBR['UnitID'] = dfs_USBR['ReportingUnitID']
dfs_2USBR['UnitUUID'] = dfs_USBR['ReportingUnitUUID']
dfs_2USBR['NativeID'] = dfs_USBR['ReportingUnitNativeID']
dfs_2USBR['Name'] = dfs_USBR['ReportingUnitName']
dfs_2USBR['TypeCV'] = dfs_USBR['ReportingUnitTypeCV']
dfs_2USBR['StateCV'] = dfs_USBR['StateCV_y']
dfs_2USBR['geometry'] = dfs_USBR['geometry']

#view output
dfs_2USBR

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1RU_USBR['ReportingUnitName'] = df_1RU_USBR['ReportingUnitName']


Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,28161,USBRag_RU2,WaDEUSBR_RU2,CO_green river,Tributary,US,"POLYGON ((-109.04826 39.50320, -109.05007 40.9..."
1,1,Polygon,28160,USBRag_RU1,WaDEUSBR_RU1,AZ_colorado river,Tributary,US,"POLYGON ((-109.04522 36.99735, -109.04604 36.0..."
2,2,Polygon,28162,USBRag_RU3,WaDEUSBR_RU3,CO_upper main stem,Tributary,US,"POLYGON ((-109.04634 37.92188, -109.04826 39.5..."
3,3,Polygon,28168,USBRag_RU9,WaDEUSBR_RU9,WY_green river,Tributary,US,"POLYGON ((-106.86125 41.00002, -110.72029 40.9..."
4,4,Polygon,28165,USBRag_RU6,WaDEUSBR_RU6,UT_green river,Tributary,US,"POLYGON ((-109.05007 40.99894, -109.04826 39.5..."
5,5,Polygon,28166,USBRag_RU7,WaDEUSBR_RU7,UT_upper main stem,Tributary,US,"POLYGON ((-109.04826 39.50320, -109.04634 37.9..."
6,6,Polygon,28167,USBRag_RU8,WaDEUSBR_RU8,UT_colorado river,Tributary,US,"POLYGON ((-109.05007 37.92160, -109.05006 36.9..."
7,7,Polygon,28164,USBRag_RU5,WaDEUSBR_RU5,NM_colorado river,Tributary,US,"POLYGON ((-109.04604 36.03339, -109.04522 36.9..."
8,8,Polygon,28163,USBRag_RU4,WaDEUSBR_RU4,CO_colorado river,Tributary,US,"POLYGON ((-109.05006 36.99735, -109.05007 37.9..."


### Concatenate and Export

In [5]:
# Merge dataframes
frames = [dfs_2USBR]
outdf = pd.concat(frames)
outdf.head(3)

Unnamed: 0,OBJECTID,Shape,UnitID,UnitUUID,NativeID,Name,TypeCV,StateCV,geometry
0,0,Polygon,28161,USBRag_RU2,WaDEUSBR_RU2,CO_green river,Tributary,US,"POLYGON ((-109.04826 39.50320, -109.05007 40.9..."
1,1,Polygon,28160,USBRag_RU1,WaDEUSBR_RU1,AZ_colorado river,Tributary,US,"POLYGON ((-109.04522 36.99735, -109.04604 36.0..."
2,2,Polygon,28162,USBRag_RU3,WaDEUSBR_RU3,CO_upper main stem,Tributary,US,"POLYGON ((-109.04634 37.92188, -109.04826 39.5..."


In [6]:
# # drop NA rows
# outdf = outdf.dropna(subset=['UnitID'])
# outdf

In [7]:
# Export the dataframe to a shapefile.
dfsOut = gpd.GeoDataFrame(outdf,  crs="EPSG:4326", geometry='geometry') # covert to geodataframe
dfsOut.to_file("Processed_Shapefiles/USBR.shp") # export shape file