# Pre-processing Utah Allocation data for WaDEQA upload.
- Purpose:  To pre-process the Utah data into one master file for simple DataFrame creation and extraction

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Utah/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Point of Diversion Data

In [3]:
# Input File
FI_PoD = "PointsOfDiversion_input.csv"
dfPODin = pd.read_csv(FI_PoD, encoding = "ISO-8859-1")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPODin:
    dfPODin['WaDEUUID'] = "utD" + dfPODin.index.astype(str)
    dfPODin.to_csv('PointsOfDiversion_input.csv', index=False)

dfPODin.head()

Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,OBJECTID,WRNUM,CHEXNUM,TYPE,SUMMARY_ST,STATUS,PRIORITY,USES,CFS,ACFT,LOCATION,WIN,OWNER,SOURCE,WebLink,Latitude,Longitude
0,utD0,"Unused Site Record,Incomplete or bad entry for...",,0,795590894,0005007P00,,Underground,A,APPLAPP,,,0.0,0.0,N460 E1460 SW 17 25S 23E SL,22579,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62434,-109.40242
1,utD1,"Unused Site Record,Incomplete or bad entry for...",,1,795590895,0005008P00,,Underground,A,APPLAPP,,,0.0,0.0,S100 E1650 NW 20 25S 23E SL,22558,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62279,-109.40179
2,utD10,"Unused Site Record,Incomplete or bad entry for...",,10,795590904,0015001M00,,Underground,A,APPLAPP,,,0.0,0.0,N940 E560 SE 18 1S 19W SL,21431,TOM JONES,Non-Production Well: Unknown,https://www.waterrights.utah.gov/search/?q=001...,40.73587,-114.03529
3,utD100,,,100,795590994,81-1960,,Point to Point,P,DILWUC,1854.0,S,0.013,0.0,N660 E660 S4 08 39S 15W SL,0,USA FOREST SERVICE,Unnamed Wash,https://www.waterrights.utah.gov/search/?q=81-...,37.40388,-113.55916
4,utD1000,,,1000,795591894,19-83,,Point to Point,P,DIL,1856.0,S,0.011,0.0,S660 W1980 E4 15 32S 18W SL,0,USA BUREAU OF LAND MANAGEMENT,Hamlin Spring No. 1,https://www.waterrights.utah.gov/search/?q=19-83,38.01811,-113.84853


In [4]:
def fixNativeID(valA):
    outString = str(valA).strip()
    return outString

dfPODin['WRNUM'] = dfPODin.apply(lambda row: fixNativeID(row['WRNUM']), axis=1)

In [5]:
# For creating BeneficialUseCategory
benUseDict = {
    "I" : "Irrigation",
    "S" : "Stockwatering",
    "D" : "Domestic",
    "M" : "Municipal",
    "X" : "Mining",
    "P" : "Power",
    "O" : "Other"}
def assignBenUseCategory(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "WaDE_Unspecified"
    else:
        outList = ",".join(benUseDict[inx] for inx in list(str(colrowValue)))
    return outList


dfPODin['in_BeneficialUseCategory'] = dfPODin.apply(lambda row: assignBenUseCategory(row['USES']), axis=1)
dfPODin['in_BeneficialUseCategory'].unique()

array(['WaDE_Unspecified', 'Stockwatering',
       'Domestic,Irrigation,Stockwatering', 'Irrigation,Other',
       'Domestic,Irrigation', 'Irrigation', 'Other,Stockwatering',
       'Other', 'Domestic', 'Irrigation,Stockwatering',
       'Domestic,Stockwatering', 'Municipal', 'Stockwatering,Mining',
       'Irrigation,Municipal', 'Irrigation,Municipal,Stockwatering',
       'Municipal,Other', 'Domestic,Other',
       'Domestic,Irrigation,Other,Stockwatering',
       'Domestic,Irrigation,Other', 'Domestic,Other,Stockwatering',
       'Domestic,Other,Mining', 'Irrigation,Other,Stockwatering',
       'Other,Mining', 'Mining', 'Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Municipal,Other', 'Domestic,Stockwatering,Mining',
       'Domestic,Municipal', 'Other,Stockwatering,Mining',
       'Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Irrigation,Municipal,Stockwatering', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Power'

In [6]:
# Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(index=dfPODin.index)

# Data Assessment UUID
dfPOD['WaDEUUID'] = dfPODin['WaDEUUID']

# Water Source
dfPOD["in_WaterSourceName"] = "WaDE_Unspecified"
dfPOD["in_WaterSourceTypeCV"] = dfPODin['TYPE']

# Site
dfPOD["in_CoordinateAccuracy"] = "WaDE_Unspecified"
dfPOD["in_CoordinateMethodCV"] = "WaDE_Unspecified"
dfPOD['in_HUC12'] = ""
dfPOD['in_HUC8'] = ""
dfPOD['in_County'] = ""
dfPOD["in_Latitude"] = dfPODin['Latitude']
dfPOD["in_Longitude"] = dfPODin['Longitude']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = dfPODin['SOURCE']
dfPOD["in_SiteNativeID"] = "POD" + dfPODin['OBJECTID'].astype(str)
dfPOD["in_SiteTypeCV"] = dfPODin['SOURCE']
dfPOD["in_StateCV"] = "UT"

# Allocation
dfPOD["in_AllocationFlow_CFS"] = dfPODin['CFS'].astype(float)
dfPOD["in_AllocationVolume_AF"] = dfPODin['ACFT'].astype(float)
dfPOD['in_AllocationLegalStatusCV'] = dfPODin['STATUS']
dfPOD["in_AllocationNativeID"] = dfPODin['WRNUM']
dfPOD['in_AllocationOwner'] = dfPODin['OWNER']
dfPOD['in_AllocationPriorityDate'] = dfPODin['PRIORITY']
dfPOD['in_AllocationTimeframeEnd'] = ""
dfPOD['in_AllocationTimeframeStart'] = ""
dfPOD['in_AllocationTypeCV'] = ""
dfPOD["in_BeneficialUseCategory"] = dfPODin['in_BeneficialUseCategory']  #see above
dfPOD['in_CommunityWaterSupplySystem'] = ""
dfPOD['in_ExemptOfVolumeFlowPriority'] = "0"
dfPOD["in_IrrigatedAcreage"] = ""
dfPOD["in_WaterAllocationNativeURL"] = ""

dfPOD = dfPOD.drop_duplicates().reset_index(drop=True)
print(len(dfPOD))
dfPOD.head(1)

350410


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_WaterAllocationNativeURL
0,utD0,WaDE_Unspecified,Underground,WaDE_Unspecified,WaDE_Unspecified,,,,38.62434,-109.40242,POD,Non-Production Well: Test,POD795590894,Non-Production Well: Test,UT,0.0,0.0,APPLAPP,0005007P00,CASTLE VALLEY TOWN OF,,,,,WaDE_Unspecified,,0,,


## Place of Use Data

In [7]:
# Input File
FI_POU = "Utah_Place_of_Use_Irrigation_input.csv"
dfPOUin = pd.read_csv(FI_POU, encoding = "ISO-8859-1") # Place of Use Input

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPOUin:
    dfPOUin['WaDEUUID'] = "utU" + dfPOUin.index.astype(str)
    dfPOUin.to_csv('Utah_Place_of_Use_Irrigation_input.csv', index=False)

print(len(dfPOUin))
dfPOUin.head()

  dfPOUin = pd.read_csv(FI_POU, encoding = "ISO-8859-1") # Place of Use Input


214494


Unnamed: 0,level_0,index,ÃÂ¯ÃÂ»ÃÂ¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUM,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Length,Shape_Area,WaDEUUID
0,0,0,1,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,utU0
1,1,1,2,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,01-1078,https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05,utU1
2,2,1,2,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,01-1124,https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05,utU2
3,3,2,11,21320851,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,Jan-50,https://maps.waterrights.utah.gov/POUPolygons/...,37,429.43018,89.41734,38.81333,-109.2994,0.00093,0.0,utU3
4,4,2,11,21320851,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,01-134,https://maps.waterrights.utah.gov/POUPolygons/...,37,429.43018,89.41734,38.81333,-109.2994,0.00093,0.0,utU4


In [8]:
dfPOUin = dfPOUin.drop_duplicates().reset_index(drop=True)
print(len(dfPOUin))
dfPOUin.head()

214494


Unnamed: 0,level_0,index,ÃÂ¯ÃÂ»ÃÂ¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUM,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Length,Shape_Area,WaDEUUID
0,0,0,1,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,utU0
1,1,1,2,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,01-1078,https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05,utU1
2,2,1,2,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,01-1124,https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05,utU2
3,3,2,11,21320851,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,Jan-50,https://maps.waterrights.utah.gov/POUPolygons/...,37,429.43018,89.41734,38.81333,-109.2994,0.00093,0.0,utU3
4,4,2,11,21320851,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,01-134,https://maps.waterrights.utah.gov/POUPolygons/...,37,429.43018,89.41734,38.81333,-109.2994,0.00093,0.0,utU4


In [9]:
# I manually solved this to prevet future errors.

# # Need to split out WRNUMS into their own row
# # The explode() method explodes lists into separate rows.
# dfPOUin = dfPOUin.assign(WRNUMS=dfPOUin['WRNUMS'].str.split(',')).explode('WRNUMS').reset_index()
# dfPOUin = dfPOUin.rename({'WRNUMS': 'WRNUM'}, axis=1)
# dfPOUin = dfPOUin.replace(np.nan, "").reset_index()
# print(len(dfPOUin))
# dfPOUin.head(1)

dfPOUin = dfPOUin.drop_duplicates().reset_index(drop=True)
print(len(dfPOUin))

214494


In [10]:
def fixNativeID(valA):
    outString = str(valA).strip()
    return outString

dfPOUin['WRNUM'] = dfPOUin.apply(lambda row: fixNativeID(row['WRNUM']), axis=1)

In [11]:
# CFS not provided for POU data.  Will instead assume they share values.
# merging dfPOD data to ensure that the POUs are using the same CFS and AF as the PODS.
dfPOUin = pd.merge(dfPOUin, dfPOD, left_on='WRNUM', right_on='in_AllocationNativeID', how='left')
print(len(dfPOUin))
dfPOUin.head(1)

852522


Unnamed: 0,level_0,index,ÃÂ¯ÃÂ»ÃÂ¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUM,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Length,Shape_Area,WaDEUUID_x,WaDEUUID_y,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_WaterAllocationNativeURL
0,0,0,1,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,utU0,utD186522,WaDE_Unspecified,Underground,WaDE_Unspecified,WaDE_Unspecified,,,,38.68213,-109.67368,POD,Underground Water Well (Existing),POD795777416,Underground Water Well (Existing),UT,0.0,1.375,APPLUNAP,01-1127,UTAH SCHOOL AND INSTITUTIONAL TRUST LANDS ADMIN.,20180710,,,,Other,,0,,


In [12]:
# Creating the output Dataframe for POUs.

dfPOU = pd.DataFrame(index=dfPOUin.index)

# Data Assessment UUID
dfPOU['WaDEUUID'] = dfPOUin['WaDEUUID_x']

# Water Source
dfPOU["in_WaterSourceName"] = "WaDE_Unspecified"
dfPOU["in_WaterSourceTypeCV"] = ""

# Site
dfPOU["in_CoordinateAccuracy"] = "WaDE_Unspecified"
dfPOU["in_CoordinateMethodCV"] = "WaDE_Unspecified"
dfPOU['in_County'] = ""
dfPOU['in_HUC12'] = ""
dfPOU['in_HUC8'] = ""
dfPOU["in_Latitude"] = dfPOUin['Latitude']
dfPOU["in_Longitude"] = dfPOUin['Longitude']
dfPOU["in_PODorPOUSite"] = "POU"
dfPOU["in_SiteName"] = "WaDE_Unspecified"
dfPOU["in_SiteNativeID"] = "POU" + dfPOUin['RECORD_ID'].astype(str)
dfPOU["in_SiteTypeCV"] = ""
dfPOU["in_StateCV"] = "UT"

# Allocation
dfPOU["in_AllocationFlow_CFS"] = dfPOUin['in_AllocationFlow_CFS'].astype(float)  # from POD data
dfPOU["in_AllocationVolume_AF"] = dfPOUin['in_AllocationVolume_AF'].astype(float)
dfPOU['in_AllocationLegalStatusCV'] = dfPOUin['in_AllocationLegalStatusCV'] # from POD data
dfPOU["in_AllocationNativeID"] = dfPOUin['WRNUM'].str.strip()
dfPOU['in_AllocationOwner'] = dfPOUin['in_AllocationOwner']  # from POD data
dfPOU['in_AllocationPriorityDate'] = dfPOUin['in_AllocationPriorityDate']  # from POD data
dfPOU['in_AllocationTimeframeEnd'] = ""
dfPOU['in_AllocationTimeframeStart'] = ""
dfPOU['in_AllocationTypeCV'] = ""
dfPOU["in_BeneficialUseCategory"] = dfPOUin['in_BeneficialUseCategory']  # from POD data
dfPOU['in_CommunityWaterSupplySystem'] = ""
dfPOU['in_ExemptOfVolumeFlowPriority'] = "0"
dfPOU["in_IrrigatedAcreage"] = dfPOUin['ACRES'].astype(float)
dfPOU["in_WaterAllocationNativeURL"] = ""

dfPOU = dfPOU.drop_duplicates().reset_index(drop=True)
print(len(dfPOUin))
dfPOU.head(1)

852522


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_WaterAllocationNativeURL
0,utU0,WaDE_Unspecified,,WaDE_Unspecified,WaDE_Unspecified,,,,38.65522,-109.67395,POU,WaDE_Unspecified,POU21320841,,UT,0.0,1.375,APPLUNAP,01-1127,UTAH SCHOOL AND INSTITUTIONAL TRUST LANDS ADMIN.,20180710,,,,Other,,0,0.00431,


## Concatenate POD and POU Data.  Clean Data.

In [13]:
# Concatenate
frames = [dfPOD, dfPOU]
dfout = pd.concat(frames)

#Removing all NaN Values and replacing with blank
dfout = dfout.replace(np.nan, "", regex=True).reset_index()

print(len(dfout))
dfout.head(1)

712809


Unnamed: 0,index,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_WaterAllocationNativeURL
0,0,utD0,WaDE_Unspecified,Underground,WaDE_Unspecified,WaDE_Unspecified,,,,38.62434,-109.40242,POD,Non-Production Well: Test,POD795590894,Non-Production Well: Test,UT,0.0,0.0,APPLAPP,0005007P00,CASTLE VALLEY TOWN OF,,,,,WaDE_Unspecified,,0,,


In [14]:
# Clean Owner info. Remove special characters
import re

def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).strip()
    return Val

dfout['in_AllocationOwner'] = dfout.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
dfout['in_AllocationOwner'].unique()

array(['CASTLE VALLEY TOWN OF', 'TOM JONES', 'USA FOREST SERVICE', ...,
       'BARBARA NIELSEN', 'GAYLEN D KINDER', 'STOCKTON CITY CORPORATION'],
      dtype=object)

In [15]:
# Changing datatype of used date fields. 
dfout['in_AllocationPriorityDate'] = pd.to_datetime(dfout['in_AllocationPriorityDate'], errors = 'coerce')
dfout['in_AllocationPriorityDate'] = pd.to_datetime(dfout["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))

In [16]:
# Making Sure datatype of Long, Lat, Wrex, Irrigation are Float
dfout['in_AllocationFlow_CFS'] = pd.to_numeric(dfout['in_AllocationFlow_CFS'], errors='coerce')
dfout['in_AllocationVolume_AF'] = pd.to_numeric(dfout['in_AllocationVolume_AF'], errors='coerce')

In [17]:
# For filling in WaDE_Unspecified when null

def assignBeneficialUseCategory(val):
    val = str(val).strip().rstrip(',')
    if val == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"
    else:
        outString = val
    return outString

dfout['in_BeneficialUseCategory'] =dfout.apply(lambda row: assignBeneficialUseCategory(row['in_BeneficialUseCategory']), axis=1)
dfout['in_BeneficialUseCategory'].unique()

array(['WaDE_Unspecified', 'Stockwatering',
       'Domestic,Irrigation,Stockwatering', 'Irrigation,Other',
       'Domestic,Irrigation', 'Irrigation', 'Other,Stockwatering',
       'Other', 'Domestic', 'Irrigation,Stockwatering',
       'Domestic,Stockwatering', 'Municipal', 'Stockwatering,Mining',
       'Irrigation,Municipal', 'Irrigation,Municipal,Stockwatering',
       'Municipal,Other', 'Domestic,Other',
       'Domestic,Irrigation,Other,Stockwatering',
       'Domestic,Irrigation,Other', 'Domestic,Other,Stockwatering',
       'Domestic,Other,Mining', 'Irrigation,Other,Stockwatering',
       'Other,Mining', 'Mining', 'Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Municipal,Other', 'Domestic,Stockwatering,Mining',
       'Domestic,Municipal', 'Other,Stockwatering,Mining',
       'Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Irrigation,Municipal,Stockwatering', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Power'

In [18]:
# Creating WaterSourceTypeCV

WaterSourceTypeCVDictionary={
"Underground" : "Groundwater",
"Abandonded Well" : "Groundwater",
"Point to Point" : "Surface Water",
"Surface" : "Surface Water",
"Return" : "Surface Water",
"Drain" : "Surface Water",
"Spring" : "Surface Water",
"Rediversion" : "Surface Water"}
def CreateWaterSourceTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"
    else:
        val = val.strip()
        try:
            outString = WaterSourceTypeCVDictionary[val]
        except:
            outString = "WaDE_Unspecified"
    return outString

dfout['in_WaterSourceTypeCV'] = dfout.apply(lambda row: CreateWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
dfout['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'WaDE_Unspecified'], dtype=object)

In [19]:
# SiteName, For filling in WaDE_Unspecified when null

def assignSiteName(val):
    val = str(val).strip()
    if val == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"
    else:
        outString = val
    return outString

dfout['in_SiteName'] = dfout.apply(lambda row: assignSiteName(row['in_SiteName']), axis=1)
dfout['in_SiteName'].unique()

array(['Non-Production Well: Test', 'Non-Production Well: Unknown',
       'Unnamed Wash', ..., 'Drains and Ditches', 'Ridge Spring #5',
       'Louis Spring #2'], dtype=object)

In [20]:
# SiteNativeID, For filling in WaDE_Unspecified when null

def assignSiteNativeID(val):
    val = str(val).strip()
    return val

dfout['in_SiteNativeID'] = dfout.apply(lambda row: assignSiteNativeID(row['in_SiteNativeID']), axis=1)
dfout['in_SiteNativeID'].unique()

array(['POD795590894', 'POD795590895', 'POD795590904', ..., 'POU21330838',
       'POU21330839', 'POU21330840'], dtype=object)

In [21]:
# Assign SiteTypeCV value.
# Uses the re library, but requires for loop.
# Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# Create the Lists
canalList = ["canal", "canals"]
creekList = ["creek"]
ditchList = ["ditch"]
drainList = ["drain", "drains"]
lakeList = ["lake"]
pondList = ["pond"]
reservoirList = ["reservoir"]
riverList = ["river", "fork", "surface"]
sloughList = ["slough"]
springList = ["spring", "springs", "gulch", "seep"]
tunnelList = ["tunnel", "tunnels"]
washList = ["wash"]
wellList = ["well", "wells", "well:", "draw", "hollow"]

# Making the dictionary
listDictionary = {}
listDictionary["Canal"] = canalList
listDictionary["Creek"] = creekList
listDictionary["Ditch"] = ditchList
listDictionary["Drain"] = drainList
listDictionary["Lake"] = lakeList
listDictionary["Pond"] = pondList
listDictionary["Reservoir"] = reservoirList
listDictionary["River"] = riverList
listDictionary["Slough"] = sloughList
listDictionary["Spring"] = springList
listDictionary["Tunnel"] = tunnelList
listDictionary["Wash"] = washList
listDictionary["Well"] = wellList

def CreateSiteTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"
    else:
        outString = "WaDE_Unspecified" # Default
        
        # Cleaning text / simple search format
        val = val.replace(",", " ")
        val = val.replace(".", " ")
        val = val.replace(";", " ")
        val = val.replace("-", " ")
        val = val.replace("/", " ")
        val = val.replace("(", " ")
        val = val.replace(")", " ")
        val = val.lower().strip()
        val = " "+val+" "
        
        for x in listDictionary:
            labelString = x
            valueList = listDictionary[x]
            for words in valueList:
                if re.search(" "+words+ " ", val): outString = x
            
    return outString

dfout['in_SiteTypeCV'] = dfout.apply(lambda row: CreateSiteTypeCV( row['in_SiteTypeCV']), axis=1)
dfout['in_SiteTypeCV'].unique()

array(['Well', 'Wash', 'Spring', 'Creek', 'River', 'Lake', 'Drain',
       'Slough', 'Ditch', 'WaDE_Unspecified', 'Tunnel', 'Reservoir',
       'Canal', 'Pond'], dtype=object)

In [22]:
# Assign LegalStatusCV value.
# Uses the re library, but requires for loop.
# Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# Create the Lists
ADECList = ["ADEC"]
ADVList = ["ADV"]
APPList = ["APP"]
CERTList = ["CERT"]
DECList = ["DEC"]
DILList = ["DIL"]
DISList = ["DIS"]
EXPList = ["EXP"]
FORFList = ["FORF"]
LAPList = ["LAP"]
NPRList = ["NPR"]
NUSEList = ["NUSE"]
PERFList = ["PERF"]
REJList = ["REJ"]
RNUMList = ["RNUM"]
STATUSList = ["STATUS"]
TEMPList = ["TEMP"]
TERMList = ["TERM"]
UGWCList = ["UGWC"]
UNAPList = ["UNAP"]
WDList = ["WD"]
WUCList = ["WUC"]


# Making the dictionary
listDictionary = {}

listDictionary["Lapsed"] = LAPList

listDictionary["Adjudication Decree"] = ADECList
listDictionary["Adverse Use Claim"] = ADVList
listDictionary["Approved"] = APPList
listDictionary["Certificated"] = CERTList
listDictionary["Decree"] = DECList
listDictionary["Diligence Claim"] = DILList
listDictionary["Disallowed"] = DISList
listDictionary["Expired"] = EXPList
listDictionary["Forfeited"] = FORFList
listDictionary["No Proof Required"] = NPRList
listDictionary["Nonuse"] = NUSEList
listDictionary["Perfected"] = PERFList
listDictionary["Rejected"] = REJList
listDictionary["Renumbered"] = RNUMList
listDictionary["Deff"] = STATUSList
listDictionary["Temp Applications"] = TEMPList
listDictionary["Terminated"] = TERMList
listDictionary["Underground Water Claim"] = UGWCList
listDictionary["Unapproved"] = UNAPList
listDictionary["Withdrawn"] = WDList
listDictionary["Water User`s Claim"] = WUCList


def CreateLegalStatus(val):
    val = str(val).strip()
    if val == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"
    else:
        outString = ""
        for x in listDictionary:
            valueList = listDictionary[x]
            for words in valueList:
                if words in val: outString = x
    
    if outString == "" or pd.isnull(val):
        outString = "WaDE_Unspecified"

    return outString

dfout['in_AllocationLegalStatusCV'] = dfout.apply(lambda row: CreateLegalStatus( row['in_AllocationLegalStatusCV']), axis=1)
dfout['in_AllocationLegalStatusCV'].unique()

array(['Approved', 'Water User`s Claim', 'Diligence Claim',
       'Certificated', 'Decree', 'Lapsed', 'Forfeited',
       'WaDE_Unspecified', 'Rejected', 'Renumbered',
       'Underground Water Claim', 'No Proof Required', 'Disallowed',
       'Withdrawn', 'Unapproved', 'Temp Applications', 'Expired',
       'Adverse Use Claim', 'Terminated'], dtype=object)

## WaDE Custom Elements (due to missing state site info)

In [23]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEUT_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceTypeCV']), axis=1)
dfout['in_WaterSourceNativeID'].unique()

array(['WaDEUT_WS1', 'WaDEUT_WS2', 'WaDEUT_WS3'], dtype=object)

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [29]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('Utah_Place_of_Use_Irrigation/Utah_Place_of_Use_Irrigation.shp')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head(1)

75224


Unnamed: 0,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUMS,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Leng,Shape_Area,geometry
0,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,"POLYGON ((-109.67401 38.65519, -109.67395 38.6..."


In [25]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['RECORD_ID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POU21320841,"POLYGON ((-109.67401 38.65519, -109.67395 38.6..."
1,POU21320842,"MULTIPOLYGON (((-109.68702 38.65724, -109.6870..."
2,POU21320843,"POLYGON ((-109.28259 38.84929, -109.28329 38.8..."


## Export Data

In [26]:
#Removing all NaN Values and replacing with blank
dfout = dfout.replace(np.nan, "", regex=True)
print(len(dfout))
dfout.head(3)

712809


Unnamed: 0,index,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_WaterAllocationNativeURL,in_WaterSourceNativeID
0,0,utD0,WaDE_Unspecified,Groundwater,WaDE_Unspecified,WaDE_Unspecified,,,,38.62434,-109.40242,POD,Non-Production Well: Test,POD795590894,Well,UT,0.0,0.0,Approved,0005007P00,CASTLE VALLEY TOWN OF,NaT,,,,WaDE_Unspecified,,0,,,WaDEUT_WS1
1,1,utD1,WaDE_Unspecified,Groundwater,WaDE_Unspecified,WaDE_Unspecified,,,,38.62279,-109.40179,POD,Non-Production Well: Test,POD795590895,Well,UT,0.0,0.0,Approved,0005008P00,CASTLE VALLEY TOWN OF,NaT,,,,WaDE_Unspecified,,0,,,WaDEUT_WS1
2,2,utD10,WaDE_Unspecified,Groundwater,WaDE_Unspecified,WaDE_Unspecified,,,,40.73587,-114.03529,POD,Non-Production Well: Unknown,POD795590904,Well,UT,0.0,0.0,Approved,0015001M00,TOM JONES,NaT,,,,WaDE_Unspecified,,0,,,WaDEUT_WS1


In [27]:
#Exporting to Finished File
dfout.to_csv('Pwr_UtahMain.csv', index=False)  # The output
dfPoUshape.to_csv('P_utGeometry.csv', index=False) # The output geometry.