# Working with UT Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah/AggregatedAmounts/RawInputData"
os.chdir(workingDir)

# Water Budget Data

In [2]:
#CSV input file
fileInput = "UT Water Budget Data 11102020.xlsx"

In [3]:
# check input 2011
df = pd.read_excel(fileInput)
df.head(3)

Unnamed: 0,AllocationCropDutyAmount,Amount,BeneficialUseCategory,CommunityWaterSupplySystem,CropTypeCV,CustomerTypeCV,DataPublicationDate,Geometry,InterbasinTransferFromID,InterbasinTransferToID,IrrigatedAcreage,Unnamed: 11,MethodUUID,NAICSCodeCV,OrganizationUUID,PopulationServed,PowerGeneratedGWh,PowerType,Unnamed: 18,Unnamed: 19,ReportingUnitName,ReportingUnitNativeID,ReportingUnitTypeCV,ReportYearCV,Unnamed: 24,TimeframeEnd,TimeframeStart,USGSCategoryCV,UT_VariableCV,UT_VariableSpecificCV,WaterSourceID
0,,80061.753287,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Beaver,49001,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW
1,,238427.649653,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Box Elder,49003,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW
2,,112529.510081,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Cache,49005,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW


In [4]:
# Build WaterSourceTypeCV using provdied ADWR water source dictionary.
WSDict = {
"Fresh_SW_GW" : "Groundwater, Surface Water",
"Fresh_Groundwater" : "Groundwater",
"Fresh_Surface Water" : "Surface Water"}

def createWSTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue  # remove whitespace chars
        try:
            outList = WSDict[String1]
        except:
            outList = ''
    return outList

df['inputWaterSourceTypeCV'] = df.apply(lambda row: createWSTypeCV(row['WaterSourceID']), axis=1)
df

Unnamed: 0,AllocationCropDutyAmount,Amount,BeneficialUseCategory,CommunityWaterSupplySystem,CropTypeCV,CustomerTypeCV,DataPublicationDate,Geometry,InterbasinTransferFromID,InterbasinTransferToID,IrrigatedAcreage,Unnamed: 11,MethodUUID,NAICSCodeCV,OrganizationUUID,PopulationServed,PowerGeneratedGWh,PowerType,Unnamed: 18,Unnamed: 19,ReportingUnitName,ReportingUnitNativeID,ReportingUnitTypeCV,ReportYearCV,Unnamed: 24,TimeframeEnd,TimeframeStart,USGSCategoryCV,UT_VariableCV,UT_VariableSpecificCV,WaterSourceID,inputWaterSourceTypeCV
0,,80061.753287,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Beaver,49001,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW,"Groundwater, Surface Water"
1,,238427.649653,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Box Elder,49003,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW,"Groundwater, Surface Water"
2,,112529.510081,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Cache,49005,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW,"Groundwater, Surface Water"
3,,27021.922929,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Carbon,49007,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW,"Groundwater, Surface Water"
4,,20364.506196,Agriculture,,,,,,,,,,Consumptive Use Estimate,111000,UTDWRE,,,,,,Daggett,49009,County,2005,,2006-09-30,2005-10-01,Irrigation,Consumptive Use,"Consumptive Use, Irrigation",Fresh_SW_GW,"Groundwater, Surface Water"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13066,,23157.531500,Municipal/Industrial,,,,,,,,,,Withdrawal Volume Estimate,221310,UTDWRE,,,,,,St. George,2005-10-01 00:00:00,Subarea,2006,,2006-09-30,2006-10-01,Public Supply,Withdrawal,"Withdrawal, Public Supply",Fresh_Groundwater,Groundwater
13067,,23158.139300,Municipal/Industrial,,,,,,,,,,Withdrawal Volume Estimate,221310,UTDWRE,,,,,,St. George,2005-10-01 00:00:00,Subarea,2007,,2007-09-30,2007-10-01,Public Supply,Withdrawal,"Withdrawal, Public Supply",Fresh_Groundwater,Groundwater
13068,,23157.531500,Municipal/Industrial,,,,,,,,,,Withdrawal Volume Estimate,221310,UTDWRE,,,,,,St. George,2005-10-01 00:00:00,Subarea,2008,,2008-09-30,2008-10-01,Public Supply,Withdrawal,"Withdrawal, Public Supply",Fresh_Groundwater,Groundwater
13069,,23158.762600,Municipal/Industrial,,,,,,,,,,Withdrawal Volume Estimate,221310,UTDWRE,,,,,,St. George,2005-10-01 00:00:00,Subarea,2009,,2009-09-30,2009-10-01,Public Supply,Withdrawal,"Withdrawal, Public Supply",Fresh_Groundwater,Groundwater


# Shapefile Data

In [5]:
# Shapefile input
UT_County_SF = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah\AggregatedAmounts/RawInputData/UT_Counties_SF/UT_Counties.shp')
UT_HUC8_SF = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah\AggregatedAmounts/RawInputData/UT_HUC8_SF/UT_HUC8.shp')
UT_Subarea_SF = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah\AggregatedAmounts/RawInputData/UT_Subarea_SF/UT_Subarea.shp')

In [6]:
# UT, County
# get name and geometry
###########################################################################
columnsList = ['RU_Name', 'RU_ID', 'RU_Type','geometry']
dfCountyShape = pd.DataFrame(columns=columnsList)
dfCountyShape['RU_Name'] = UT_County_SF['NAME']
dfCountyShape['RU_ID'] = UT_County_SF['State_RU']
dfCountyShape['RU_ID'] = dfCountyShape['RU_ID'].map(lambda x: x.lstrip("46-"))
dfCountyShape['RU_ID'] = "4" + dfCountyShape['RU_ID']
dfCountyShape['RU_Type'] = 'County'
dfCountyShape['geometry'] = UT_County_SF['geometry']
dfCountyShape.head(3)

Unnamed: 0,RU_Name,RU_ID,RU_Type,geometry
0,Beaver,49001,County,"POLYGON ((-114.05049 38.49996, -114.05015 38.5..."
1,Box Elder,49003,County,"POLYGON ((-114.04255 41.21092, -114.04172 41.9..."
2,Duchesne,49013,County,"POLYGON ((-110.90435 40.70150, -110.89777 40.7..."


In [7]:
# UT, HUC8
# get name and geometry
###########################################################################
columnsList = ['RU_Name', 'RU_ID', 'RU_Type','geometry']
dfHUC8Shape = pd.DataFrame(columns=columnsList)
dfHUC8Shape['RU_Name'] = UT_HUC8_SF['NAME']
dfHUC8Shape['RU_ID'] = UT_HUC8_SF['HUC8']
dfHUC8Shape['RU_Type'] = 'HUC8'
dfHUC8Shape['geometry'] = UT_HUC8_SF['geometry']
dfHUC8Shape.head(3)

Unnamed: 0,RU_Name,RU_ID,RU_Type,geometry
0,Meadow Valley Wash,15010013,HUC8,"POLYGON ((-114.03649 37.95701, -114.03647 37.9..."
1,Lower Virgin,15010010,HUC8,"POLYGON ((-114.06750 37.60696, -114.06708 37.6..."
2,Fort Pearce Wash,15010009,HUC8,"POLYGON ((-112.75584 37.06340, -112.75670 37.0..."


In [8]:
# UT, Subarea
# get name and geometry
###########################################################################
columnsList = ['RU_Name', 'RU_ID', 'RU_Type','geometry']
dfSubareaShape = pd.DataFrame(columns=columnsList)
dfSubareaShape['RU_Name'] = UT_Subarea_SF['RU_Name']
dfSubareaShape['RU_ID'] = UT_Subarea_SF['RU_ID']
dfSubareaShape['RU_Type'] = 'Subarea'
dfSubareaShape['geometry'] = UT_Subarea_SF['geometry']
dfSubareaShape.head(3)

Unnamed: 0,RU_Name,RU_ID,RU_Type,geometry
0,Curlew Valley,00-01-03,Subarea,"POLYGON ((-112.58699 42.00092, -112.57918 41.9..."
1,Clear Creek,000-01-03,Subarea,"POLYGON ((-113.17778 42.00082, -113.20384 41.9..."
2,Promontory Point,00-07-02,Subarea,"POLYGON ((-112.36394 42.00018, -112.38128 41.9..."


In [9]:
# Concatenate shp datafraes together.
frames = [dfCountyShape, dfHUC8Shape, dfSubareaShape]
dfAllShape = pd.concat(frames).reset_index()
dfAllShape

Unnamed: 0,index,RU_Name,RU_ID,RU_Type,geometry
0,0,Beaver,49001,County,"POLYGON ((-114.05049 38.49996, -114.05015 38.5..."
1,1,Box Elder,49003,County,"POLYGON ((-114.04255 41.21092, -114.04172 41.9..."
2,2,Duchesne,49013,County,"POLYGON ((-110.90435 40.70150, -110.89777 40.7..."
3,3,Emery,49015,County,"POLYGON ((-111.30701 38.67233, -111.29914 38.6..."
4,4,Grand,49019,County,"POLYGON ((-110.17897 38.90920, -110.17447 38.9..."
...,...,...,...,...,...
240,144,Brigham City,01-01-07,Subarea,"POLYGON ((-112.01361 41.64418, -112.00460 41.6..."
241,145,Cache Valley,01-01-04,Subarea,"POLYGON ((-111.50799 42.00025, -111.50842 41.9..."
242,146,Randolph,01-03-02,Subarea,"POLYGON ((-111.04977 41.80850, -111.05113 41.5..."
243,147,Evanston,01-03-01,Subarea,"MULTIPOLYGON (((-110.72426 40.99222, -110.7332..."


### Inspect Output Data & Export

In [10]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

AllocationCropDutyAmount             float64
Amount                               float64
BeneficialUseCategory                 object
CommunityWaterSupplySystem           float64
CropTypeCV                           float64
CustomerTypeCV                       float64
DataPublicationDate                  float64
Geometry                             float64
InterbasinTransferFromID             float64
InterbasinTransferToID               float64
IrrigatedAcreage                     float64
Unnamed: 11                          float64
MethodUUID                            object
NAICSCodeCV                            int64
OrganizationUUID                      object
PopulationServed                     float64
PowerGeneratedGWh                    float64
PowerType                            float64
Unnamed: 18                          float64
Unnamed: 19                          float64
ReportingUnitName                     object
ReportingUnitNativeID                 object
ReportingU

In [11]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfAllShape.dtypes)

index          int64
RU_Name       object
RU_ID         object
RU_Type       object
geometry    geometry
dtype: object


In [12]:
# Export out to CSV.
df.to_csv('P_utAggMaster.csv', index=False) # The output.
dfAllShape.to_csv('P_utGeometry.csv', index=False) # The output geometry.