# Pre-processing Utah Allocation data for WaDE upload.
- Purpose:  To pre-process the Utah data into one master file for simple DataFrame creation and extraction

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Utah/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Point of Diversion Data

In [3]:
# Input File
FI_PoD = "PointsOfDiversion_input.zip"
dfinPOD = pd.read_csv(FI_PoD, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "utD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('PointsOfDiversion_input.zip', compression=dict(method='zip', archive_name='PointsOfDiversion_input.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head()

350356


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,OBJECTID,WRNUM,CHEXNUM,TYPE,SUMMARY_ST,STATUS,PRIORITY,USES,CFS,ACFT,LOCATION,WIN,OWNER,SOURCE,WebLink,Latitude,Longitude
0,utD0,"Unused Site Record,Incomplete or bad entry for...",,0,795590894,0005007P00,,Underground,A,APPLAPP,,,0.0,0.0,N460 E1460 SW 17 25S 23E SL,22579,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62434,-109.40242
1,utD1,"Unused Site Record,Incomplete or bad entry for...",,1,795590895,0005008P00,,Underground,A,APPLAPP,,,0.0,0.0,S100 E1650 NW 20 25S 23E SL,22558,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62279,-109.40179
2,utD10,"Unused Site Record,Incomplete or bad entry for...",,10,795590904,0015001M00,,Underground,A,APPLAPP,,,0.0,0.0,N940 E560 SE 18 1S 19W SL,21431,TOM JONES,Non-Production Well: Unknown,https://www.waterrights.utah.gov/search/?q=001...,40.73587,-114.03529
3,utD100,,,100,795590994,81-1960,,Point to Point,P,DILWUC,1854.0,S,0.013,0.0,N660 E660 S4 08 39S 15W SL,0,USA FOREST SERVICE,Unnamed Wash,https://www.waterrights.utah.gov/search/?q=81-...,37.40388,-113.55916
4,utD1000,,,1000,795591894,19-83,,Point to Point,P,DIL,1856.0,S,0.011,0.0,S660 W1980 E4 15 32S 18W SL,0,USA BUREAU OF LAND MANAGEMENT,Hamlin Spring No. 1,https://www.waterrights.utah.gov/search/?q=19-83,38.01811,-113.84853


In [4]:
# We don't want to use any CHEXNUM data
dfinPOD['CHEXNUM'] = dfinPOD['CHEXNUM'].str.strip()
dfinPOD = dfinPOD[dfinPOD['CHEXNUM'] == ""].reset_index(drop=True)
print(len(dfinPOD))
dfinPOD.head()

280375


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,OBJECTID,WRNUM,CHEXNUM,TYPE,SUMMARY_ST,STATUS,PRIORITY,USES,CFS,ACFT,LOCATION,WIN,OWNER,SOURCE,WebLink,Latitude,Longitude
0,utD0,"Unused Site Record,Incomplete or bad entry for...",,0,795590894,0005007P00,,Underground,A,APPLAPP,,,0.0,0.0,N460 E1460 SW 17 25S 23E SL,22579,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62434,-109.40242
1,utD1,"Unused Site Record,Incomplete or bad entry for...",,1,795590895,0005008P00,,Underground,A,APPLAPP,,,0.0,0.0,S100 E1650 NW 20 25S 23E SL,22558,CASTLE VALLEY TOWN OF,Non-Production Well: Test,https://www.waterrights.utah.gov/search/?q=000...,38.62279,-109.40179
2,utD10,"Unused Site Record,Incomplete or bad entry for...",,10,795590904,0015001M00,,Underground,A,APPLAPP,,,0.0,0.0,N940 E560 SE 18 1S 19W SL,21431,TOM JONES,Non-Production Well: Unknown,https://www.waterrights.utah.gov/search/?q=001...,40.73587,-114.03529
3,utD100,,,100,795590994,81-1960,,Point to Point,P,DILWUC,1854.0,S,0.013,0.0,N660 E660 S4 08 39S 15W SL,0,USA FOREST SERVICE,Unnamed Wash,https://www.waterrights.utah.gov/search/?q=81-...,37.40388,-113.55916
4,utD1000,,,1000,795591894,19-83,,Point to Point,P,DIL,1856.0,S,0.011,0.0,S660 W1980 E4 15 32S 18W SL,0,USA BUREAU OF LAND MANAGEMENT,Hamlin Spring No. 1,https://www.waterrights.utah.gov/search/?q=19-83,38.01811,-113.84853


In [5]:
# For creating BeneficialUseCategory
benUseDict = {
    "I" : "Irrigation",
    "S" : "Stockwatering",
    "D" : "Domestic",
    "M" : "Municipal",
    "X" : "Mining",
    "P" : "Power",
    "O" : "Other"}
def assignBenUseCategory(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        outList = ",".join(benUseDict[inx] for inx in list(str(colrowValue)))
    return outList


dfinPOD['in_BeneficialUseCategory'] = dfinPOD.apply(lambda row: assignBenUseCategory(row['USES']), axis=1)
dfinPOD['in_BeneficialUseCategory'].unique()

array(['', 'Stockwatering', 'Domestic,Irrigation,Stockwatering',
       'Irrigation,Other', 'Domestic,Irrigation', 'Irrigation',
       'Other,Stockwatering', 'Other', 'Domestic',
       'Irrigation,Stockwatering', 'Domestic,Stockwatering', 'Municipal',
       'Stockwatering,Mining', 'Irrigation,Municipal',
       'Irrigation,Municipal,Stockwatering', 'Municipal,Other',
       'Domestic,Other', 'Domestic,Irrigation,Other,Stockwatering',
       'Domestic,Irrigation,Other', 'Domestic,Other,Stockwatering',
       'Domestic,Other,Mining', 'Irrigation,Other,Stockwatering',
       'Other,Mining', 'Mining', 'Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Municipal,Other', 'Domestic,Stockwatering,Mining',
       'Domestic,Municipal', 'Other,Stockwatering,Mining',
       'Domestic,Irrigation,Municipal,Stockwatering', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Power', 'Other,Power',
       'Domestic,Municipal,Stockwatering', 'Domestic,Othe

In [6]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "UTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "UTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "UTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOD['TYPE']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = dfinPOD['SOURCE']
df['in_SiteNativeID'] = "POD" + dfinPOD['OBJECTID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['SOURCE']
df['in_StateCV'] = "UT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['CFS'].astype(float)
df['in_AllocationLegalStatusCV'] = dfinPOD['STATUS']
df['in_AllocationNativeID'] =  dfinPOD['WRNUM'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOD['OWNER']
df['in_AllocationPriorityDate'] = dfinPOD['PRIORITY']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['ACFT']
df['in_BeneficialUseCategory'] = dfinPOD['in_BeneficialUseCategory']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['WebLink']

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True)
print(len(outPOD))
outPOD.head()

280375


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD0,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,38.62434,-109.40242,,,POD,Non-Production Well: Test,POD795590894,,Non-Production Well: Test,UT,,,,,,,,,,0.0,APPLAPP,0005007P00,CASTLE VALLEY TOWN OF,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=000...
1,utD1,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,38.62279,-109.40179,,,POD,Non-Production Well: Test,POD795590895,,Non-Production Well: Test,UT,,,,,,,,,,0.0,APPLAPP,0005008P00,CASTLE VALLEY TOWN OF,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=000...
2,utD10,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,40.73587,-114.03529,,,POD,Non-Production Well: Unknown,POD795590904,,Non-Production Well: Unknown,UT,,,,,,,,,,0.0,APPLAPP,0015001M00,TOM JONES,,,,,,0.0,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=001...
3,utD100,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Point to Point,,,,4326,,,,37.40388,-113.55916,,,POD,Unnamed Wash,POD795590994,,Unnamed Wash,UT,,,,,,,,,,0.013,DILWUC,81-1960,USA FOREST SERVICE,1854.0,,,,,0.0,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=81-...
4,utD1000,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Point to Point,,,,4326,,,,38.01811,-113.84853,,,POD,Hamlin Spring No. 1,POD795591894,,Hamlin Spring No. 1,UT,,,,,,,,,,0.011,DIL,19-83,USA BUREAU OF LAND MANAGEMENT,1856.0,,,,,0.0,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=19-83


## Place of Use Data

In [7]:
# Input File - place of use data
FI_POU = "Utah_Place_of_Use_Irrigation.zip"
dfinPOU = pd.read_csv(FI_POU, encoding = "ISO-8859-1").replace(np.nan, "") 

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "utU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('Utah_Place_of_Use_Irrigation.zip', compression=dict(method='zip', archive_name='Utah_Place_of_Use_Irrigation.csv'), index=False)

dfinPOU = dfinPOU.drop_duplicates().reset_index(drop=True)
print(len(dfinPOU))
dfinPOU.head()

  dfinPOU = pd.read_csv(FI_POU, encoding = "ISO-8859-1").replace(np.nan, "")


75224


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUMS,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Leng,Shape_Area
0,utU0,"Unused Site Record,Incomplete or bad entry for...",,0,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0
1,utU1,"Unused Site Record,Incomplete or bad entry for...",,1,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,"01-1078, 01-1124",https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05
2,utU10,,,10,21320851,120,1,,,Hydrgraphic Survey Map,https://waterrights.utah.gov/docSys/v925/R925/...,0.10614,"01-50, 01-134",https://maps.waterrights.utah.gov/POUPolygons/...,37,429.43018,89.41734,38.81333,-109.2994,0.00093,0.0
3,utU100,,,100,21320941,622976,5,,,ProofMap,https://waterrights.utah.gov/docImport/0513/05...,0.0073,05-3185,https://maps.waterrights.utah.gov/POUPolygons/...,190,29.5166,24.34504,38.31771,-109.45342,0.00021,0.0
4,utU1000,,,1000,21321841,4130,9,,,ProofMap,https://waterrights.utah.gov/docImport/0513/05...,0.51337,09-2132,https://maps.waterrights.utah.gov/POUPolygons/...,1996,2076.69238,182.5279,37.27859,-109.56886,0.00186,0.0


In [8]:
# Need to split out WRNUMS into their own row
# The explode() method explodes lists into separate rows.
dfinPOU = dfinPOU.assign(WRNUMS=dfinPOU['WRNUMS'].str.split(',')).explode('WRNUMS').reset_index()
dfinPOU = dfinPOU.rename({'WRNUMS': 'WRNUM'}, axis=1)
dfinPOU = dfinPOU.replace(np.nan, "").reset_index()
dfinPOU = dfinPOU.drop_duplicates().reset_index(drop=True)

dfinPOU['WRNUM'] =  dfinPOD['WRNUM'].replace("", 0).fillna(0).astype(str)
print(len(dfinPOU))

214494


In [9]:
# tie POD data to POU data for correct watersource info
dfinPOU = pd.merge(dfinPOU, outPOD[['in_AllocationNativeID', 'in_WaterSourceTypeCV']], left_on='WRNUM', right_on='in_AllocationNativeID', how='left')
print(len(dfinPOU))
dfinPOU.head(1)

2138897


Unnamed: 0,level_0,index,WaDEUUID,ReasonRemoved,IncompleteField,Ã¯Â»Â¿OID_,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUM,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Leng,Shape_Area,in_AllocationNativeID,in_WaterSourceTypeCV
0,0,0,utU0,"Unused Site Record,Incomplete or bad entry for...",,0,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,0005007P00,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,0005007P00,Underground


In [10]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "UTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "UTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "UTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOU['in_WaterSourceTypeCV'] # from POD

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['Latitude']
df['in_Longitude'] = dfinPOU['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfinPOU['RECORD_ID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "UT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOU['WRNUM'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = ""
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = ""
df['in_BeneficialUseCategory'] = ""
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = ""

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True)
print(len(outPOU))
outPOU.head()

206555


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utU0,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,38.65522,-109.67395,,,POU,,POU21320841,,,UT,,,,,,,,,,,,0005007P00,,,,,,,,,,,,,,0,,,,,,,,,,
1,utU1,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,38.65817,-109.68494,,,POU,,POU21320842,,,UT,,,,,,,,,,,,0005008P00,,,,,,,,,,,,,,0,,,,,,,,,,
2,utU1,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Underground,,,,4326,,,,38.65817,-109.68494,,,POU,,POU21320842,,,UT,,,,,,,,,,,,0015001M00,,,,,,,,,,,,,,0,,,,,,,,,,
3,utU10,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Point to Point,,,,4326,,,,38.81333,-109.2994,,,POU,,POU21320851,,,UT,,,,,,,,,,,,81-1960,,,,,,,,,,,,,,0,,,,,,,,,,
4,utU10,UTwr_M1,UTwr_V1,UTwr_O1,,,,,,Point to Point,,,,4326,,,,38.81333,-109.2994,,,POU,,POU21320851,,,UT,,,,,,,,,,,,19-83,,,,,,,,,,,,,,0,,,,,,,,,,


## Concatenate POD and POU Data.  Clean Data.

In [11]:
# Concatenate dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

486930


In [12]:
# Creating WaterSourceTypeCV

WaterSourceTypeCVDictionary={
"Underground" : "Groundwater",
"Abandonded Well" : "Groundwater",
"Point to Point" : "Surface Water",
"Surface" : "Surface Water",
"Return" : "Surface Water",
"Drain" : "Surface Water",
"Spring" : "Surface Water",
"Rediversion" : "Surface Water"}
def CreateWaterSourceTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        val = val.strip()
        try:
            outString = WaterSourceTypeCVDictionary[val]
        except:
            outString = ""
    return outString

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: CreateWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', ''], dtype=object)

In [13]:
# Assign SiteTypeCV value.
# Uses the re library, but requires for loop.
# Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# Create the Lists
canalList = ["canal", "canals"]
creekList = ["creek"]
ditchList = ["ditch"]
drainList = ["drain", "drains"]
lakeList = ["lake"]
pondList = ["pond"]
reservoirList = ["reservoir"]
riverList = ["river", "fork", "surface"]
sloughList = ["slough"]
springList = ["spring", "springs", "gulch", "seep"]
tunnelList = ["tunnel", "tunnels"]
washList = ["wash"]
wellList = ["well", "wells", "well:", "draw", "hollow"]

# Making the dictionary
listDictionary = {}
listDictionary["Canal"] = canalList
listDictionary["Creek"] = creekList
listDictionary["Ditch"] = ditchList
listDictionary["Drain"] = drainList
listDictionary["Lake"] = lakeList
listDictionary["Pond"] = pondList
listDictionary["Reservoir"] = reservoirList
listDictionary["River"] = riverList
listDictionary["Slough"] = sloughList
listDictionary["Spring"] = springList
listDictionary["Tunnel"] = tunnelList
listDictionary["Wash"] = washList
listDictionary["Well"] = wellList

def CreateSiteTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        outString = "" # Default
        
        # Cleaning text / simple search format
        val = val.replace(",", " ")
        val = val.replace(".", " ")
        val = val.replace(";", " ")
        val = val.replace("-", " ")
        val = val.replace("/", " ")
        val = val.replace("(", " ")
        val = val.replace(")", " ")
        val = val.lower().strip()
        val = " "+val+" "
        
        for x in listDictionary:
            labelString = x
            valueList = listDictionary[x]
            for words in valueList:
                if re.search(" "+words+ " ", val): outString = x
            
    return outString

outdf['in_SiteTypeCV'] = outdf.apply(lambda row: CreateSiteTypeCV( row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Well', 'Wash', 'Spring', 'Creek', 'Lake', 'River', 'Drain',
       'Slough', 'Ditch', '', 'Tunnel', 'Reservoir', 'Pond', 'Canal'],
      dtype=object)

In [14]:
# Assign LegalStatusCV value.
# Uses the re library, but requires for loop.
# Order that the lists are inputed into dictoinary is important, want to overide generic search with a more specific search.

# Create the Lists
ADECList = ["ADEC"]
ADVList = ["ADV"]
APPList = ["APP"]
CERTList = ["CERT"]
DECList = ["DEC"]
DILList = ["DIL"]
DISList = ["DIS"]
EXPList = ["EXP"]
FORFList = ["FORF"]
LAPList = ["LAP"]
NPRList = ["NPR"]
NUSEList = ["NUSE"]
PERFList = ["PERF"]
REJList = ["REJ"]
RNUMList = ["RNUM"]
STATUSList = ["STATUS"]
TEMPList = ["TEMP"]
TERMList = ["TERM"]
UGWCList = ["UGWC"]
UNAPList = ["UNAP"]
WDList = ["WD"]
WUCList = ["WUC"]


# Making the dictionary
listDictionary = {}

listDictionary["Lapsed"] = LAPList

listDictionary["Adjudication Decree"] = ADECList
listDictionary["Adverse Use Claim"] = ADVList
listDictionary["Approved"] = APPList
listDictionary["Certificated"] = CERTList
listDictionary["Decree"] = DECList
listDictionary["Diligence Claim"] = DILList
listDictionary["Disallowed"] = DISList
listDictionary["Expired"] = EXPList
listDictionary["Forfeited"] = FORFList
listDictionary["No Proof Required"] = NPRList
listDictionary["Nonuse"] = NUSEList
listDictionary["Perfected"] = PERFList
listDictionary["Rejected"] = REJList
listDictionary["Renumbered"] = RNUMList
listDictionary["Deff"] = STATUSList
listDictionary["Temp Applications"] = TEMPList
listDictionary["Terminated"] = TERMList
listDictionary["Underground Water Claim"] = UGWCList
listDictionary["Unapproved"] = UNAPList
listDictionary["Withdrawn"] = WDList
listDictionary["Water User`s Claim"] = WUCList


def CreateLegalStatus(val):
    val = str(val).strip()
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        outString = ""
        for x in listDictionary:
            valueList = listDictionary[x]
            for words in valueList:
                if words in val: outString = x
    
    if outString == "" or pd.isnull(val):
        outString = ""

    return outString

outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: CreateLegalStatus( row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Approved', 'Water User`s Claim', 'Diligence Claim',
       'Certificated', 'Decree', 'Lapsed', 'Forfeited', '', 'Rejected',
       'Renumbered', 'Underground Water Claim', 'No Proof Required',
       'Disallowed', 'Withdrawn', 'Unapproved', 'Temp Applications',
       'Expired', 'Adverse Use Claim', 'Terminated'], dtype=object)

In [15]:
# Clean owner name up
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;,/\)(-]", "", Val).title().strip()
    return Val

In [16]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Castle Valley Town Of', 'Tom Jones', 'Usa Forest Service', ...,
       'Barbara Nielsen', 'Gaylen D Kinder', 'Stockton City Corporation'],
      dtype=object)

In [17]:
# ID POD source data has a few names that contain a ',' in them, but should still be okay
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['Nonproduction Well: Test', 'Nonproduction Well: Unknown',
       'Unnamed Wash', ..., 'Ridge Spring #5', 'Louis Spring #2',
       'Section 3 Spring'], dtype=object)

In [18]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [19]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array([''], dtype=object)

In [20]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', ''], dtype=object)

In [21]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Well', 'Wash', 'Spring', 'Creek', 'Lake', 'River', 'Drain',
       'Slough', 'Ditch', '', 'Tunnel', 'Reservoir', 'Pond', 'Canal'],
      dtype=object)

In [22]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['Nonproduction Well: Test', 'Nonproduction Well: Unknown',
       'Unnamed Wash', ..., 'Ridge Spring #5', 'Louis Spring #2',
       'Section 3 Spring'], dtype=object)

In [23]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Castle Valley Town Of', 'Tom Jones', 'Usa Forest Service', ...,
       'Barbara Nielsen', 'Gaylen D Kinder', 'Stockton City Corporation'],
      dtype=object)

In [24]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['', 'Stockwatering', 'Domestic,Irrigation,Stockwatering',
       'Irrigation,Other', 'Domestic,Irrigation', 'Irrigation',
       'Other,Stockwatering', 'Other', 'Domestic',
       'Irrigation,Stockwatering', 'Domestic,Stockwatering', 'Municipal',
       'Stockwatering,Mining', 'Irrigation,Municipal',
       'Irrigation,Municipal,Stockwatering', 'Municipal,Other',
       'Domestic,Other', 'Domestic,Irrigation,Other,Stockwatering',
       'Domestic,Irrigation,Other', 'Domestic,Other,Stockwatering',
       'Domestic,Other,Mining', 'Irrigation,Other,Stockwatering',
       'Other,Mining', 'Mining', 'Power',
       'Domestic,Irrigation,Municipal,Other,Stockwatering',
       'Domestic,Municipal,Other', 'Domestic,Stockwatering,Mining',
       'Domestic,Municipal', 'Other,Stockwatering,Mining',
       'Domestic,Irrigation,Municipal,Stockwatering', 'Irrigation,Mining',
       'Domestic,Mining', 'Stockwatering,Power', 'Other,Power',
       'Domestic,Municipal,Stockwatering', 'Domestic,Othe

In [25]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([38.62434132, 38.6227946 , 40.73586587, ..., 41.68992803,
       41.69158829, 41.69421694])

In [26]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-109.4024171 , -109.4017862 , -114.0352916 , ..., -111.90823137,
       -111.90686903, -111.91184622])

In [27]:
# Changing datatype of used date fields. 
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array([                          'NaT', '1854-01-01T00:00:00.000000000',
       '1856-01-01T00:00:00.000000000', ...,
       '1935-01-26T00:00:00.000000000', '2009-04-16T00:00:00.000000000',
       '1978-07-19T00:00:00.000000000'], dtype='datetime64[ns]')

In [28]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array(['', 0.013, 0.011, ..., 5.525, 12.03, 28.5], dtype=object)

In [29]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array(['', 6.73, 1.455, ..., 13.761, 9.148, 3016.8], dtype=object)

In [30]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2', ''], dtype=object)

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [31]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('Utah_Place_of_Use_Irrigation/Utah_Place_of_Use_Irrigation.shp')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head()

75224


Unnamed: 0,RECORD_ID,GROUP_NUMB,AREA_CODE,CHNUM,POU_TYPE,SOURCE,URL,ACRES,WRNUMS,dbURL,recordID,Shape__Are,Shape__Len,Latitude,Longitude,Shape_Leng,Shape_Area,geometry
0,21320841,85,1,,,ProofMap,https://waterrights.utah.gov/adjdinfo/hydromap...,0.00431,01-1127,https://maps.waterrights.utah.gov/POUPolygons/...,1,17.43164,31.16859,38.65522,-109.67395,0.00028,0.0,"POLYGON ((-109.67401 38.65519, -109.67395 38.6..."
1,21320842,82,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,18.94507,"01-1078, 01-1124",https://maps.waterrights.utah.gov/POUPolygons/...,3,76631.45801,2248.41702,38.65817,-109.68494,0.02335,1e-05,"MULTIPOLYGON (((-109.68702 38.65724, -109.6870..."
2,21320843,80,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,0.49387,01-1122,https://maps.waterrights.utah.gov/POUPolygons/...,4,1998.11963,212.27895,38.84946,-109.28307,0.00224,0.0,"POLYGON ((-109.28259 38.84929, -109.28329 38.8..."
3,21320844,627071,1,,,ProofMap,https://waterrights.utah.gov/docImport/0547/05...,0.1673,01-1169,https://maps.waterrights.utah.gov/POUPolygons/...,5,676.93848,433.03334,39.07298,-109.13151,0.00021,0.0,"POLYGON ((-109.13127 39.07287, -109.13124 39.0..."
4,21320845,63,1,,,ProofMap,https://waterrights.utah.gov/docSys/v903/K903/...,0.42,01-1106,https://maps.waterrights.utah.gov/POUPolygons/...,28,1838.14722,411.68113,38.67979,-109.68728,0.00469,0.0,"POLYGON ((-109.68840 38.67975, -109.68840 38.6..."


In [32]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['RECORD_ID'].replace("", 0).fillna(0).astype(int).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head()

Unnamed: 0,in_SiteNativeID,geometry
0,POU21320841,"POLYGON ((-109.67401 38.65519, -109.67395 38.6..."
1,POU21320842,"MULTIPOLYGON (((-109.68702 38.65724, -109.6870..."
2,POU21320843,"POLYGON ((-109.28259 38.84929, -109.28329 38.8..."
3,POU21320844,"POLYGON ((-109.13127 39.07287, -109.13124 39.0..."
4,POU21320845,"POLYGON ((-109.68840 38.67975, -109.68840 38.6..."


## Export Data

In [33]:
outdf.info

<bound method DataFrame.info of        WaDEUUID in_MethodUUID in_VariableSpecificUUID in_OrganizationUUID  \
0          utD0       UTwr_M1                 UTwr_V1             UTwr_O1   
1          utD1       UTwr_M1                 UTwr_V1             UTwr_O1   
2         utD10       UTwr_M1                 UTwr_V1             UTwr_O1   
3        utD100       UTwr_M1                 UTwr_V1             UTwr_O1   
4       utD1000       UTwr_M1                 UTwr_V1             UTwr_O1   
...         ...           ...                     ...                 ...   
486925  utU9998       UTwr_M1                 UTwr_V1             UTwr_O1   
486926  utU9998       UTwr_M1                 UTwr_V1             UTwr_O1   
486927  utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
486928  utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   
486929  utU9999       UTwr_M1                 UTwr_V1             UTwr_O1   

       in_Geometry in_GNISFeatureNameCV in_

In [34]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD0,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID1,Groundwater,,,,4326,,,,38.62434,-109.40242,,,POD,Nonproduction Well: Test,POD795590894,,Well,UT,,,,,,,,,,,Approved,0005007P00,Castle Valley Town Of,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=000...
1,utD1,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID1,Groundwater,,,,4326,,,,38.62279,-109.40179,,,POD,Nonproduction Well: Test,POD795590895,,Well,UT,,,,,,,,,,,Approved,0005008P00,Castle Valley Town Of,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=000...
2,utD10,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID1,Groundwater,,,,4326,,,,40.73587,-114.03529,,,POD,Nonproduction Well: Unknown,POD795590904,,Well,UT,,,,,,,,,,,Approved,0015001M00,Tom Jones,NaT,,,,,,,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=001...
3,utD100,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,37.40388,-113.55916,,,POD,Unnamed Wash,POD795590994,,Wash,UT,,,,,,,,,,0.01300,Water User`s Claim,81-1960,Usa Forest Service,1854-01-01,,,,,,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=81-...
4,utD1000,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,38.01811,-113.84853,,,POD,Hamlin Spring No 1,POD795591894,,Spring,UT,,,,,,,,,,0.01100,Diligence Claim,19-83,Usa Bureau Of Land Management,1856-01-01,,,,,,Stockwatering,,,,,,0,,,,,,,,,,https://www.waterrights.utah.gov/search/?q=19-83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486925,utU9998,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,41.69159,-111.90687,,,POU,,POU21330839,,,UT,,,,,,,,,,,,91-5277,,NaT,,,,,,,,,,,,0,,,,,,,,,,
486926,utU9998,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,41.69159,-111.90687,,,POU,,POU21330839,,,UT,,,,,,,,,,,,25-1218,,NaT,,,,,,,,,,,,0,,,,,,,,,,
486927,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,41.69422,-111.91185,,,POU,,POU21330840,,,UT,,,,,,,,,,,,91-5277,,NaT,,,,,,,,,,,,0,,,,,,,,,,
486928,utU9999,UTwr_M1,UTwr_V1,UTwr_O1,,,,,wadeID2,Surface Water,,,,4326,,,,41.69422,-111.91185,,,POU,,POU21330840,,,UT,,,,,,,,,,,,91-5277,,NaT,,,,,,,,,,,,0,,,,,,,,,,


In [35]:
# Export the output dataframe
outdf.to_csv('Pwr_utMain.zip', index=False, compression="zip")  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.

In [36]:
# # example dataframe
# tempdf = pd.DataFrame({'AllocationNativeID' : ['A1', 'A1', 'C1', "D1"],
#                        'AllocationFlow_CFS' : ["1.1", "", "3.3", "4.4"],
#                        'AllocationVolume_AF' :  ["5.5", "6.6", "7.7", "8.8"],
#                        'BeneficialUseCategory' :["Irrigation", "LiveStock", "Other", "Domestic"]})
# tempdf

In [37]:
# # NativeID only (current approach)
# groupbyList = ['AllocationNativeID']
# tempdfA = tempdf.groupby(groupbyList).agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
# tempdfA

In [38]:
# # NativeID and Flow
# this fails
# groupbyList = ['AllocationNativeID', 'AllocationFlow_CFS']
# tempdfB = tempdf.groupby(groupbyList).agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
# tempdfB

In [39]:
# # NativeID and Flow, convert "" to np.nan
# this fails
# tempdfC = tempdf.replace("", np.nan)
# groupbyList = ['AllocationNativeID', 'AllocationFlow_CFS']
# tempdfC = tempdfC.groupby(groupbyList).agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
# tempdfC