# Pre-processing Utah Allocation data for WaDE upload.
- Purpose:  To pre-process the Utah data into one master file for simple DataFrame creation and extraction

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/Utah/WaterAllocation" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

The working Directory is: G:/Shared drives/WaDE Data/Utah/WaterAllocation


## Point of Diversion Data

In [3]:
# Input File
FI_PoD = "RawInputData/Utah_Points_of_Diversion.zip"
dfinPOD = pd.read_csv(FI_PoD, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "utD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('RawInputData/Utah_Points_of_Diversion.zip', compression=dict(method='zip', archive_name='Utah_Points_of_Diversion.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head()

  dfinPOD = pd.read_csv(FI_PoD, encoding = "ISO-8859-1").replace(np.nan, "")


382049


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,OBJECTID,WRNUM,CHEXNUM,TYPE,SUMMARY_ST,STATUS,PRIORITY,USES,CFS,ACFT,LOCATION,Latitude,Longitude,WIN,OWNER,SOURCE,WebLink
0,utD0,,,1,864475622,66-1001,t39016,Point to Point,T,APPLLAP,20130516.0,O,0.061,6.38,N660 E660 SW 28 14S 1E SL,39.56057,-111.83268,0,LEVAN IRRIGATION COMPANY,P,https://www.waterrights.utah.gov/search/?q=t39016
1,utD1,Incomplete or bad entry for AllocationPriority...,,2,864475623,0259005M00,,Underground,A,APPLAPP,,,0.0,0.0,N616 E379 S4 09 2S 1W SL,40.6548,-111.95654,25726,TAYLORSVILLE CITY C/O GEO ENGERY SYSTEMS,Non-Production Well: Closed Loop Heat Exchange,https://www.waterrights.utah.gov/search/?q=025...
2,utD10,Incomplete or bad entry for AllocationPriority...,,11,864475632,0259008M00,,Underground,A,APPLAPP,,,0.0,0.0,S2750 E120 NW 10 2S 1W SL,40.66009,-111.94791,25924,FEC DRILLING,Non-Production Well: Unknown,https://www.waterrights.utah.gov/search/?q=025...
3,utD100,,,101,864475722,18-427,,Underground,P,APPLWUC,19800721.0,DIS,0.015,0.0,N980 W1550 SE 04 14S 18W SL,39.61925,-113.87756,21198,STEPHEN P MUMM,Underground Water Well,https://www.waterrights.utah.gov/search/?q=18-427
4,utD1000,Incomplete or bad entry for AllocationPriority...,,1001,864476622,0265001P00,,Abandonded Well,A,APPLAPP,,,0.0,0.0,N1465 W2085 SE 04 17S 3E SL,39.3627,-111.58694,24997,JEFFERIES VINCENT,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=026...


In [4]:
# We don't want to use any CHEXNUM data
dfinPOD['CHEXNUM'] = dfinPOD['CHEXNUM'].str.strip()
dfinPOD = dfinPOD[dfinPOD['CHEXNUM'] == ""].reset_index(drop=True)
print(len(dfinPOD))
dfinPOD.head()

282359


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,OBJECTID,WRNUM,CHEXNUM,TYPE,SUMMARY_ST,STATUS,PRIORITY,USES,CFS,ACFT,LOCATION,Latitude,Longitude,WIN,OWNER,SOURCE,WebLink
0,utD1,Incomplete or bad entry for AllocationPriority...,,2,864475623,0259005M00,,Underground,A,APPLAPP,,,0.0,0.0,N616 E379 S4 09 2S 1W SL,40.6548,-111.95654,25726,TAYLORSVILLE CITY C/O GEO ENGERY SYSTEMS,Non-Production Well: Closed Loop Heat Exchange,https://www.waterrights.utah.gov/search/?q=025...
1,utD10,Incomplete or bad entry for AllocationPriority...,,11,864475632,0259008M00,,Underground,A,APPLAPP,,,0.0,0.0,S2750 E120 NW 10 2S 1W SL,40.66009,-111.94791,25924,FEC DRILLING,Non-Production Well: Unknown,https://www.waterrights.utah.gov/search/?q=025...
2,utD100,,,101,864475722,18-427,,Underground,P,APPLWUC,19800721.0,DIS,0.015,0.0,N980 W1550 SE 04 14S 18W SL,39.61925,-113.87756,21198,STEPHEN P MUMM,Underground Water Well,https://www.waterrights.utah.gov/search/?q=18-427
3,utD1000,Incomplete or bad entry for AllocationPriority...,,1001,864476622,0265001P00,,Abandonded Well,A,APPLAPP,,,0.0,0.0,N1465 W2085 SE 04 17S 3E SL,39.3627,-111.58694,24997,JEFFERIES VINCENT,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=026...
4,utD10000,,,10044,864485665,73-1713,,Point to Point,P,DIL,1856.0,S,0.0,0.448,S660 W1980 E4 29 36S 10W SL,37.63907,-113.00251,0,SIDEHILL LLC,White Rock Spring Stream,https://www.waterrights.utah.gov/search/?q=73-...


In [5]:
# For creating BeneficialUseCategory
benUseDict = {
    "I" : "Irrigation",
    "S" : "Stockwatering",
    "D" : "Domestic",
    "M" : "Municipal",
    "X" : "Mining",
    "P" : "Power",
    "O" : "Other"}
def assignBenUseCategory(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        outList = ",".join(benUseDict[inx] for inx in list(str(colrowValue)))
    return outList


dfinPOD['in_BeneficialUseCategory'] = dfinPOD.apply(lambda row: assignBenUseCategory(row['USES']), axis=1)
dfinPOD['in_BeneficialUseCategory'].unique()

array(['', 'Domestic,Irrigation,Stockwatering', 'Stockwatering',
       'Domestic,Other', 'Other', 'Irrigation', 'Domestic',
       'Domestic,Irrigation', 'Irrigation,Stockwatering',
       'Irrigation,Municipal,Stockwatering', 'Municipal',
       'Domestic,Stockwatering',
       'Domestic,Irrigation,Other,Stockwatering', 'Irrigation,Other',
       'Other,Stockwatering', 'Domestic,Irrigation,Municipal', 'Power',
       'Irrigation,Other,Stockwatering', 'Municipal,Other',
       'Domestic,Other,Stockwatering',
       'Domestic,Irrigation,Municipal,Stockwatering',
       'Domestic,Irrigation,Other', 'Mining',
       'Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Stockwatering,Mining', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Mining',
       'Domestic,Irrigation,Mining', 'Domestic,Municipal',
       'Domestic,Irrigation,Other,Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Irrigation,Municipal,Other', 'Other,Mining',
       'M

In [6]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "UTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "UTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "UTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['SOURCE']
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOD['TYPE']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = dfinPOD['SOURCE']
df['in_SiteNativeID'] = "POD" + dfinPOD['OBJECTID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "UT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['CFS'].astype(float)
df['in_AllocationLegalStatusCV'] = dfinPOD['STATUS']
df['in_AllocationNativeID'] =  dfinPOD['WRNUM'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOD['OWNER']
df['in_AllocationPriorityDate'] = dfinPOD['PRIORITY']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['ACFT']
df['in_BeneficialUseCategory'] = dfinPOD['in_BeneficialUseCategory']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['WebLink']

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True)
print(len(outPOD))
outPOD.head()

282359


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD1,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Closed Loop Heat Exchange,,Underground,,,,4326,,,,40.6548,-111.95654,,,POD,Non-Production Well: Closed Loop Heat Exchange,POD864475623,,,UT,,,,,,,,,,0.0,APPLAPP,0259005M00,TAYLORSVILLE CITY C/O GEO ENGERY SYSTEMS,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=025...
1,utD10,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Unknown,,Underground,,,,4326,,,,40.66009,-111.94791,,,POD,Non-Production Well: Unknown,POD864475632,,,UT,,,,,,,,,,0.0,APPLAPP,0259008M00,FEC DRILLING,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=025...
2,utD100,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,,Underground,,,,4326,,,,39.61925,-113.87756,,,POD,Underground Water Well,POD864475722,,,UT,,,,,,,,,,0.015,APPLWUC,18-427,STEPHEN P MUMM,19800721.0,,,,,0.0,"Domestic,Irrigation,Stockwatering",,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=18-427
3,utD1000,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Test,,Abandonded Well,,,,4326,,,,39.3627,-111.58694,,,POD,Non-Production Well: Test,POD864476622,,,UT,,,,,,,,,,0.0,APPLAPP,0265001P00,JEFFERIES VINCENT,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=026...
4,utD10000,UTwr_M1,UTwr_V1,UTwr_O1,,,,White Rock Spring Stream,,Point to Point,,,,4326,,,,37.63907,-113.00251,,,POD,White Rock Spring Stream,POD864485665,,,UT,,,,,,,,,,0.0,DIL,73-1713,SIDEHILL LLC,1856.0,,,,,0.448,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=73-...


## Place of Use Data

In [7]:
# Input File - place of use data
FI_POU = "RawInputData/Utah_Place_of_Use_Irrigation.zip"
dfinPOU = pd.read_csv(FI_POU, encoding = "ISO-8859-1").replace(np.nan, "") 

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "utU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('RawInputData/Utah_Place_of_Use_Irrigation.zip', compression=dict(method='zip', archive_name='Utah_Place_of_Use_Irrigation.csv'), index=False)

dfinPOU['WRNUMS'] = dfinPOU['WRNUMS'].replace(" ", "").str.strip().astype(str)
dfinPOU = dfinPOU.drop_duplicates().reset_index(drop=True)
print(len(dfinPOU))
dfinPOU.head()

  dfinPOU = pd.read_csv(FI_POU, encoding = "ISO-8859-1").replace(np.nan, "")


76449


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUMS,dbURL,recordID,cent_Latitude,cent_Longitude,Shape__Are,Shape__Len,Shape_Length,Shape_Area
0,utU0,,,1,34305720,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,38.65522,-109.67395,17.43164,31.16859,0.00033,0.0
1,utU1,,,2,34305721,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,"01-1078, 01-1124",https://maps.waterrights.utah.gov/POUPolygons/...,3,38.65817,-109.68494,76631.45801,2248.41702,0.02335,1e-05
2,utU10,,,11,34305730,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,"01-50, 01-134",https://maps.waterrights.utah.gov/POUPolygons/...,37,38.81333,-109.2994,429.43018,89.41734,0.00093,0.0
3,utU100,,,101,34305820,622976,5,,,ProofMap,https://waterrights.utah.gov/docImport/0513/05...,0.0073,05-3185,https://maps.waterrights.utah.gov/POUPolygons/...,190,38.31771,-109.45342,29.5166,24.34504,0.00026,0.0
4,utU1000,,,1001,34306720,4166,9,,,ProofMap,https://waterrights.utah.gov/docImport/0513/05...,0.12817,09-2167,https://maps.waterrights.utah.gov/POUPolygons/...,1997,37.28068,-109.56935,518.45996,109.49079,0.00113,0.0


In [8]:
# Need to split out WRNUMS into their own row
# The explode() method explodes lists into separate rows.
dfinPOU = dfinPOU.assign(WRNUMS=dfinPOU['WRNUMS'].str.split(',')).explode('WRNUMS').reset_index()
dfinPOU = dfinPOU.rename({'WRNUMS': 'WRNUM'}, axis=1)
dfinPOU['WRNUM'] = dfinPOU['WRNUM'].str.strip().replace("", 0).fillna(0).astype(str)
dfinPOU = dfinPOU.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
dfinPOU['WRNUM'].unique()

array(['01-1127', '01-1078', '01-1124', ..., '25-1623', '25-1624',
       '25-1626'], dtype=object)

In [9]:
# tie POD data to POU data for correct watersource info
dfinPOU = pd.merge(dfinPOU, outPOD[['in_AllocationNativeID', 'in_WaterSourceTypeCV', 'in_WaterSourceName']], left_on='WRNUM', right_on='in_AllocationNativeID', how='left')
print(len(dfinPOU))
dfinPOU.head(1)

562185


Unnamed: 0,index,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUM,dbURL,recordID,cent_Latitude,cent_Longitude,Shape__Are,Shape__Len,Shape_Length,Shape_Area,in_AllocationNativeID,in_WaterSourceTypeCV,in_WaterSourceName
0,0,utU0,,,1,34305720,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,38.65522,-109.67395,17.43164,31.16859,0.00033,0.0,01-1127,Underground,Underground Water Well


In [10]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "UTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "UTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "UTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOU['in_WaterSourceName'] # from POD
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOU['in_WaterSourceTypeCV'] # from POD

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['cent_Latitude']
df['in_Longitude'] = dfinPOU['cent_Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfinPOU['RECORD_ID'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "UT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOU['WRNUM'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = ""
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = ""
df['in_BeneficialUseCategory'] = ""
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = ""

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True)
print(len(outPOU))
outPOU.head()

255805


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utU0,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,,Underground,,,,4326,,,,38.65522,-109.67395,,,POU,,POU34305720,,,UT,,,,,,,,,,,,01-1127,,,,,,,,,,,,,,0,,,,,,,,,,
1,utU1,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,,Underground,,,,4326,,,,38.65817,-109.68494,,,POU,,POU34305721,,,UT,,,,,,,,,,,,01-1078,,,,,,,,,,,,,,0,,,,,,,,,,
2,utU1,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,,Underground,,,,4326,,,,38.65817,-109.68494,,,POU,,POU34305721,,,UT,,,,,,,,,,,,01-1124,,,,,,,,,,,,,,0,,,,,,,,,,
3,utU10,UTwr_M1,UTwr_V1,UTwr_O1,,,,Colorado River and Well,,Surface,,,,4326,,,,38.81333,-109.2994,,,POU,,POU34305730,,,UT,,,,,,,,,,,,01-50,,,,,,,,,,,,,,0,,,,,,,,,,
4,utU10,UTwr_M1,UTwr_V1,UTwr_O1,,,,Colorado River and Well,,Underground,,,,4326,,,,38.81333,-109.2994,,,POU,,POU34305730,,,UT,,,,,,,,,,,,01-50,,,,,,,,,,,,,,0,,,,,,,,,,


## Concatenate POD and POU Data.  Clean Data.

In [11]:
# Concatenate dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

538164


In [12]:
# Assign LegalStatusCV value.
# Uses the re library, but requires for loop.
# Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# Create the Lists
ADECList = ["ADEC"]
ADVList = ["ADV"]
APPList = ["APP"]
CERTList = ["CERT"]
DECList = ["DEC"]
DILList = ["DIL"]
DISList = ["DIS"]
EXPList = ["EXP"]
FORFList = ["FORF"]
LAPList = ["LAP"]
NPRList = ["NPR"]
NUSEList = ["NUSE"]
PERFList = ["PERF"]
REJList = ["REJ"]
RNUMList = ["RNUM"]
STATUSList = ["STATUS"]
TEMPList = ["TEMP"]
TERMList = ["TERM"]
UGWCList = ["UGWC"]
UNAPList = ["UNAP"]
WDList = ["WD"]
WUCList = ["WUC"]


# Making the dictionary
listDictionary = {}

listDictionary["Lapsed"] = LAPList

listDictionary["Adjudication Decree"] = ADECList
listDictionary["Adverse Use Claim"] = ADVList
listDictionary["Approved"] = APPList
listDictionary["Certificated"] = CERTList
listDictionary["Decree"] = DECList
listDictionary["Diligence Claim"] = DILList
listDictionary["Disallowed"] = DISList
listDictionary["Expired"] = EXPList
listDictionary["Forfeited"] = FORFList
listDictionary["No Proof Required"] = NPRList
listDictionary["Nonuse"] = NUSEList
listDictionary["Perfected"] = PERFList
listDictionary["Rejected"] = REJList
listDictionary["Renumbered"] = RNUMList
listDictionary["Deff"] = STATUSList
listDictionary["Temp Applications"] = TEMPList
listDictionary["Terminated"] = TERMList
listDictionary["Underground Water Claim"] = UGWCList
listDictionary["Unapproved"] = UNAPList
listDictionary["Withdrawn"] = WDList
listDictionary["Water User`s Claim"] = WUCList


def CreateLegalStatus(val):
    val = str(val).strip()
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        outString = ""
        for x in listDictionary:
            valueList = listDictionary[x]
            for words in valueList:
                if words in val: outString = x
    
    if outString == "" or pd.isnull(val):
        outString = ""

    return outString

outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: CreateLegalStatus( row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Approved', 'Water User`s Claim', 'Diligence Claim', '',
       'Renumbered', 'No Proof Required', 'Disallowed', 'Certificated',
       'Underground Water Claim', 'Rejected', 'Decree',
       'Temp Applications', 'Withdrawn', 'Adverse Use Claim',
       'Unapproved', 'Lapsed', 'Expired', 'Terminated', 'Forfeited'],
      dtype=object)

In [13]:
# Clean owner name up
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;,/\)(-]", "", Val).replace("  ", " ").strip()
    return Val

In [14]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['TAYLORSVILLE CITY CO GEO ENGERY SYSTEMS', 'FEC DRILLING',
       'STEPHEN P MUMM', ..., 'RAWLINGS', 'IRA WAYMAN',
       'E D AND ELDA HAWS'], dtype=object)

In [15]:
# ID POD source data has a few names that contain a ',' in them, but should still be okay
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['NonProduction Well: Closed Loop Heat Exchange',
       'NonProduction Well: Unknown', 'Underground Water Well', ...,
       'Sethys Canyon Spring #2', 'Sethys Canyon Spring #3',
       'Unnamed Tributary to Soilder Creek'], dtype=object)

In [16]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [17]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Non-Production Well: Closed Loop Heat Exchange',
       'Non-Production Well: Unknown', 'Underground Water Well', ...,
       'Sethys Canyon Spring #2', 'Sethys Canyon Spring #3',
       'Unnamed Tributary to Soilder Creek'], dtype=object)

In [18]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Underground', 'Abandonded Well', 'Point to Point', 'Surface',
       'Rediversion', 'Point of Return', 'Spring', 'Drain', 'Sewage',
       'Sewage Reuse', ''], dtype=object)

In [19]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array([''], dtype=object)

In [20]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['NonProduction Well: Closed Loop Heat Exchange',
       'NonProduction Well: Unknown', 'Underground Water Well', ...,
       'Sethys Canyon Spring #2', 'Sethys Canyon Spring #3',
       'Unnamed Tributary to Soilder Creek'], dtype=object)

In [21]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['TAYLORSVILLE CITY CO GEO ENGERY SYSTEMS', 'FEC DRILLING',
       'STEPHEN P MUMM', ..., 'RAWLINGS', 'IRA WAYMAN',
       'E D AND ELDA HAWS'], dtype=object)

In [22]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['', 'Domestic,Irrigation,Stockwatering', 'Stockwatering',
       'Domestic,Other', 'Other', 'Irrigation', 'Domestic',
       'Domestic,Irrigation', 'Irrigation,Stockwatering',
       'Irrigation,Municipal,Stockwatering', 'Municipal',
       'Domestic,Stockwatering',
       'Domestic,Irrigation,Other,Stockwatering', 'Irrigation,Other',
       'Other,Stockwatering', 'Domestic,Irrigation,Municipal', 'Power',
       'Irrigation,Other,Stockwatering', 'Municipal,Other',
       'Domestic,Other,Stockwatering',
       'Domestic,Irrigation,Municipal,Stockwatering',
       'Domestic,Irrigation,Other', 'Mining',
       'Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Stockwatering,Mining', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Mining',
       'Domestic,Irrigation,Mining', 'Domestic,Municipal',
       'Domestic,Irrigation,Other,Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Irrigation,Municipal,Other', 'Other,Mining',
       'M

In [23]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([40.65479994, 40.66009038, 39.61925335, ..., 41.6812731 ,
       41.64289046, 41.67786685])

In [24]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-111.95654327, -111.94791433, -113.87756202, ..., -111.95649944,
       -111.9248735 , -111.93489009])

In [25]:
# Changing datatype of used date fields. 
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array([                          'NaT', '1980-07-21T00:00:00.000000000',
       '1856-01-01T00:00:00.000000000', ...,
       '1935-10-30T00:00:00.000000000', '1958-07-15T00:00:00.000000000',
       '1942-09-05T00:00:00.000000000'], dtype='datetime64[ns]')

In [26]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array(['', 0.015, 0.338, ..., 2.565, 4.748, 6.94], dtype=object)

In [27]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array(['', 0.448, 0.476, ..., 30.75, 54.48, 411.04], dtype=object)

In [36]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName'].str.strip()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].str.strip()
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    colrowValue = str(A).strip() + str(B).strip()
    try:
        outList = WaterSourceNativeIDdict[colrowValue]
    except:
        outList = ""
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2', 'wadeID3', ..., 'wadeID26038', 'wadeID26039',
       'wadeID26040'], dtype=object)

## Drop non-Active AllocationLegalStatusCV Water Rights
- For UT, we don't want water rights that are considered: Rejected, Disallowed, Withdrawn, Temp Applications, Unapproved, Forfeited, Lapsed, Expired, Terminated

In [29]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = ["Rejected", "Disallowed", "Withdrawn", "Temp Applications", "Unapproved", "Forfeited", "Lapsed", "Expired", "Terminated"]

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

514224


array(['Approved', 'Water User`s Claim', 'Diligence Claim', '',
       'Renumbered', 'No Proof Required', 'Certificated',
       'Underground Water Claim', 'Decree', 'Adverse Use Claim'],
      dtype=object)

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [30]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('RawInputData/shapefiles/Utah_Place_of_Use_Irrigation.zip')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head()

76449


Unnamed: 0,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUMS,dbURL,recordID,cent_Latit,cent_Longi,Shape__Are,Shape__Len,Shape_Leng,Shape_Area,geometry
0,34305720,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,38.65522,-109.67395,17.43164,31.16859,0.00033,0.0,"POLYGON ((-109.67401 38.65519, -109.67402 38.6..."
1,34305721,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,"01-1078, 01-1124",https://maps.waterrights.utah.gov/POUPolygons/...,3,38.65817,-109.68494,76631.45801,2248.41702,0.02335,1e-05,"MULTIPOLYGON (((-109.68702 38.65724, -109.6870..."
2,34305722,80,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,0.49387,01-1122,https://maps.waterrights.utah.gov/POUPolygons/...,4,38.84946,-109.28307,1998.11963,212.27895,0.00227,0.0,"POLYGON ((-109.28259 38.84929, -109.28302 38.8..."
3,34305723,627071,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,0.1673,01-1169,https://maps.waterrights.utah.gov/POUPolygons/...,5,39.07298,-109.13151,676.93848,433.03334,0.00448,0.0,"MULTIPOLYGON (((-109.13127 39.07287, -109.1312..."
4,34305724,63,1,,,ProofMap,https://waterrights.utah.gov/docSys/v903/K903/...,0.42,01-1106,https://maps.waterrights.utah.gov/POUPolygons/...,28,38.67979,-109.68728,1838.14722,411.68113,0.00469,0.0,"POLYGON ((-109.68840 38.67975, -109.68840 38.6..."


In [31]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['RECORD_ID'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
print(len(dfPoUshape))
dfPoUshape.head()

76449


Unnamed: 0,in_SiteNativeID,geometry
0,POU34305720,"POLYGON ((-109.67401 38.65519, -109.67402 38.6..."
1,POU34305721,"MULTIPOLYGON (((-109.68702 38.65724, -109.6870..."
2,POU34305722,"POLYGON ((-109.28259 38.84929, -109.28302 38.8..."
3,POU34305723,"MULTIPOLYGON (((-109.13127 39.07287, -109.1312..."
4,POU34305724,"POLYGON ((-109.68840 38.67975, -109.68840 38.6..."


## Export Data

In [32]:
outdf.info

<bound method DataFrame.info of         WaDEUUID in_MethodUUID in_VariableSpecificUUID in_OrganizationUUID  \
0           utD1       UTwr_M1                 UTwr_V1             UTwr_O1   
1          utD10       UTwr_M1                 UTwr_V1             UTwr_O1   
2         utD100       UTwr_M1                 UTwr_V1             UTwr_O1   
3        utD1000       UTwr_M1                 UTwr_V1             UTwr_O1   
4       utD10000       UTwr_M1                 UTwr_V1             UTwr_O1   
...          ...           ...                     ...                 ...   
514219   utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
514220   utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
514221   utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
514222   utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
514223   utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   

       in_Geometry in_GNISFeatu

In [33]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD1,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Closed Loop Heat Exchange,wadeID1,Underground,,,,4326,,,,40.65480,-111.95654,,,POD,NonProduction Well: Closed Loop Heat Exchange,POD864475623,,,UT,,,,,,,,,,,Approved,0259005M00,TAYLORSVILLE CITY CO GEO ENGERY SYSTEMS,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=025...
1,utD10,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Unknown,wadeID2,Underground,,,,4326,,,,40.66009,-111.94791,,,POD,NonProduction Well: Unknown,POD864475632,,,UT,,,,,,,,,,,Approved,0259008M00,FEC DRILLING,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=025...
2,utD100,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,wadeID3,Underground,,,,4326,,,,39.61925,-113.87756,,,POD,Underground Water Well,POD864475722,,,UT,,,,,,,,,,0.01500,Water User`s Claim,18-427,STEPHEN P MUMM,1980-07-21,,,,,,"Domestic,Irrigation,Stockwatering",,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=18-427
3,utD1000,UTwr_M1,UTwr_V1,UTwr_O1,,,,Non-Production Well: Test,wadeID4,Abandonded Well,,,,4326,,,,39.36270,-111.58694,,,POD,NonProduction Well: Test,POD864476622,,,UT,,,,,,,,,,,Approved,0265001P00,JEFFERIES VINCENT,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=026...
4,utD10000,UTwr_M1,UTwr_V1,UTwr_O1,,,,White Rock Spring Stream,wadeID5,Point to Point,,,,4326,,,,37.63907,-113.00251,,,POD,White Rock Spring Stream,POD864485665,,,UT,,,,,,,,,,,Diligence Claim,73-1713,SIDEHILL LLC,1856-01-01,,,,,0.44800,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=73-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514219,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,wadeID3,Underground,,,,4326,,,,41.67787,-111.93489,,,POU,,POU34315719,,,UT,,,,,,,,,,,,25-1622,,NaT,,,,,,,,,,,,0,,,,,,,,,,
514220,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,wadeID3,Underground,,,,4326,,,,41.67787,-111.93489,,,POU,,POU34315719,,,UT,,,,,,,,,,,,25-1623,,NaT,,,,,,,,,,,,0,,,,,,,,,,
514221,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,wadeID3,Underground,,,,4326,,,,41.67787,-111.93489,,,POU,,POU34315719,,,UT,,,,,,,,,,,,25-1624,,NaT,,,,,,,,,,,,0,,,,,,,,,,
514222,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,Underground Water Well,wadeID3,Underground,,,,4326,,,,41.67787,-111.93489,,,POU,,POU34315719,,,UT,,,,,,,,,,,,25-1626,,NaT,,,,,,,,,,,,0,,,,,,,,,,


In [37]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pwr_utMain.zip', compression=dict(method='zip', archive_name='Pwr_utMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.

In [35]:
# # Not sure how this was made, it does not agree with any WaDE Logic / existing UT notes

# ####################################################

# # Assign SiteTypeCV value.
# # Uses the re library, but requires for loop.
# # Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# # Create the Lists
# canalList = ["canal", "canals"]
# creekList = ["creek"]
# ditchList = ["ditch"]
# drainList = ["drain", "drains"]
# lakeList = ["lake"]
# pondList = ["pond"]
# reservoirList = ["reservoir"]
# riverList = ["river", "fork", "surface"]
# sloughList = ["slough"]
# springList = ["spring", "springs", "gulch", "seep"]
# tunnelList = ["tunnel", "tunnels"]
# washList = ["wash"]
# wellList = ["well", "wells", "well:", "draw", "hollow"]

# # Making the dictionary
# listDictionary = {}
# listDictionary["Canal"] = canalList
# listDictionary["Creek"] = creekList
# listDictionary["Ditch"] = ditchList
# listDictionary["Drain"] = drainList
# listDictionary["Lake"] = lakeList
# listDictionary["Pond"] = pondList
# listDictionary["Reservoir"] = reservoirList
# listDictionary["River"] = riverList
# listDictionary["Slough"] = sloughList
# listDictionary["Spring"] = springList
# listDictionary["Tunnel"] = tunnelList
# listDictionary["Wash"] = washList
# listDictionary["Well"] = wellList

# def CreateSiteTypeCV(val):
#     if val == "" or pd.isnull(val):
#         outString = ""
#     else:
#         outString = "" # Default
        
#         # Cleaning text / simple search format
#         val = val.replace(",", " ")
#         val = val.replace(".", " ")
#         val = val.replace(";", " ")
#         val = val.replace("-", " ")
#         val = val.replace("/", " ")
#         val = val.replace("(", " ")
#         val = val.replace(")", " ")
#         val = val.lower().strip()
#         val = " "+val+" "
        
#         for x in listDictionary:
#             labelString = x
#             valueList = listDictionary[x]
#             for words in valueList:
#                 if re.search(" "+words+ " ", val): outString = x
            
#     return outString

# outdf['in_SiteTypeCV'] = outdf.apply(lambda row: CreateSiteTypeCV( row['in_SiteTypeCV']), axis=1)
# outdf['in_SiteTypeCV'].unique()