# Pre-processing Nebraska Allocation data for WaDEQA upload.
Date Updated: 10/04/2023
Purpose:  To pre-process the Nebraska data into one master file for simple DataFrame creation and extraction

### Notes:
- asfd

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/Nebraska/WaterAllocation" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

The working Directory is: G:/Shared drives/WaDE Data/Nebraska/WaterAllocation


# POD Surface Water Data
- data already pulled from api, used saved csv instead

In [3]:
# # already done, skip ahead

# %%time
# # API retrieval
# # shoot for 30 pages, that seemed to be all that AllSurfaceWaterPoints offered.
# df = pd.DataFrame()
# countPage = 1
# while countPage < 30:
#     url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(countPage)
#     print(url)

#     # store in dataframe
#     try:
#         responseD = json.loads(requests.get(url).text)
#         DtL = responseD['Results']
#         length = len(DtL)
#         for i in range(length):
#             row = pd.DataFrame([DtL[i]])
#             df = pd.concat([df, row])
#     except:
#         print("Error, issue with API return.")
    
#     countPage = countPage + 1

# # # Use only NeNDR Active provided sites
# # df = df[df['SourceName'] == 'NeDNR']

# print(len(df))
# df.head()

In [4]:
# # already done, skip ahead

# # explode these list....
# dftemp = df.copy()
# dftemp = dftemp.explode('NoticeExemptions')
# dftemp = dftemp.explode('Notices')
# dftemp = dftemp.explode('PointOfDiversions')
# dftemp = dftemp.explode('Contacts')

# print(len(dftemp))
# dftemp.head(1)

In [5]:
# # already done, skip ahead

# # To unpack column's dictionary value new into separate columns -> contact to existing dataframe -> drop unpacked column
# dftemp = pd.concat([dftemp, dftemp["RightUse"].apply(pd.Series)], axis=1).drop(columns="RightUse")
# dftemp = pd.concat([dftemp, dftemp["NoticeExemptions"].apply(pd.Series)], axis=1).drop(columns="NoticeExemptions")
# dftemp = pd.concat([dftemp, dftemp["Notices"].apply(pd.Series)], axis=1).drop(columns="Notices")
# dftemp = pd.concat([dftemp, dftemp["PointOfDiversions"].apply(pd.Series)], axis=1).drop(columns="PointOfDiversions")
# dftemp = pd.concat([dftemp, dftemp["Contacts"].apply(pd.Series)], axis=1).drop(columns="Contacts")

# print(len(dftemp))
# dftemp.head(1)

In [6]:
# # already done, skip ahead

# # Clean Data
# # we don't really need the 'NoticeExemptions' at this time
# dropList = ['PumpSheets', 'SpecialConditions', 'NoticeID', 'NoticeType', 'NoticeDate', 'EffectiveDate', 'ReasonForAdminAction', 'Notes', 'DeleteNotice']
# dftemp = dftemp.drop(dropList, axis=1).drop_duplicates().reset_index(drop=True)
# #dftemp = dftemp.drop(dropList, axis=1)
# print(len(dftemp))
# dftemp.head()

In [7]:
# # already done, skip ahead

# # export api data
# dftemp.to_csv('RawInputData/AllSurfaceWaterPoints.zip', compression=dict(method='zip', archive_name='AllSurfaceWaterPoints.csv'), index=False)  # The output, save as a zip

In [8]:
# POD Data
# read in the above API data that was saved to hard file
PoDAAInput = "RawInputData/AllSurfaceWaterPoints.zip"
dfPoD = pd.read_csv(PoDAAInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoD:
    dfPoD['WaDEUUID'] = "neD" + dfPoD.index.astype(str)
    dfPoD.to_csv('RawInputData/AllSurfaceWaterPoints.zip', compression=dict(method='zip', archive_name='AllSurfaceWaterPoints.csv'), index=False)
    
print(len(dfPoD))
dfPoD.head(1)

28711


  dfPoD = pd.read_csv(PoDAAInput).replace(np.nan, "")


Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,UseCode,UseDescription,SendNotices,UseTypeDescription,0,ExemptionDescription,0.1,Section,SubSection,Township,Range,RangeDirection,CountyName,NrdName,HUC12,Upstream,PrimaryIndicator,PODStatus,LegalDescription,LatitudeDecimalDegrees.1,LongitudeDecimalDegrees.1,0.2,ContactId,FirstName,LastName,ContactType,SeqNum,BeginDate,EndDate,Address1,Address2,City,State,Zip,Phone1,Phone2,Phone3,LicenseNumber,IsDeleted.1,0.3,WaDEUUID
0,1,Active,A-365R,8955.0,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17.0,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,US,Incidental Underground Storage,Yes,Natural Flow,,,,28.0,NE NW,22.0,55.0,West,Scotts Bluff,North Platte,101800090810.0,Yes,Primary,Active,NE NW S28 T22N-R55W,41.85584,-103.70574,,754.0,,Central Irrigation District,Owner,1.0,,,230450 Crow Road,,Gering,Nebraska,69341,3086413425.0,,,,No,,neD0


In [9]:
# Drop non-Active AllocationLegalStatusCV Water Rights
# We only want Active water rights
# We only want active POD sites
dfPoD = dfPoD[dfPoD['RightStatus'] == 'Active'].reset_index(drop=True)
dfPoD = dfPoD[dfPoD['PODStatus'] == 'Active'].reset_index(drop=True)
print(len(dfPoD))
dfPoD.head(1)

21948


Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,UseCode,UseDescription,SendNotices,UseTypeDescription,0,ExemptionDescription,0.1,Section,SubSection,Township,Range,RangeDirection,CountyName,NrdName,HUC12,Upstream,PrimaryIndicator,PODStatus,LegalDescription,LatitudeDecimalDegrees.1,LongitudeDecimalDegrees.1,0.2,ContactId,FirstName,LastName,ContactType,SeqNum,BeginDate,EndDate,Address1,Address2,City,State,Zip,Phone1,Phone2,Phone3,LicenseNumber,IsDeleted.1,0.3,WaDEUUID
0,1,Active,A-365R,8955.0,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17.0,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,US,Incidental Underground Storage,Yes,Natural Flow,,,,28.0,NE NW,22.0,55.0,West,Scotts Bluff,North Platte,101800090810.0,Yes,Primary,Active,NE NW S28 T22N-R55W,41.85584,-103.70574,,754.0,,Central Irrigation District,Owner,1.0,,,230450 Crow Road,,Gering,Nebraska,69341,3086413425.0,,,,No,,neD0


In [10]:
# assign VariableSpecificUUID

def assignVariableSpecificUUID(colvA):
    outList = ''
    colvA = str(colvA).strip()
    if colvA == "CFS":
        outList = "NEwr_V1"
    if colvA == "AF":
        outList = "NEwr_V2"

    return outList

dfPoD['in_VariableSpecificUUID'] = dfPoD.apply(lambda row: assignVariableSpecificUUID(row['Units']), axis=1)
dfPoD['in_VariableSpecificUUID'].unique()

array(['NEwr_V1', 'NEwr_V2', ''], dtype=object)

In [11]:
# AllocationFlow_CFS - based on reporeted Unit

def assignAllocationFlow_CFS(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "CFS":
            outList = colvA
        else:
            outList = ''
    return outList

dfPoD['AllocationFlow_CFS'] = dfPoD.apply(lambda row: assignAllocationFlow_CFS(row['ProGrant'], row['Units']), axis=1)
dfPoD['AllocationFlow_CFS'].unique()

array([0.04, 0.16, 0.01, 0.02, 0.38, 0.06, 1.01, 0.33, 0.23, 1.43, 0.5,
       1.63, 0.12, 0.61, 0.86, 0.11, 2.22, 13.0, 0.21, 0.15, 1.28, '',
       0.48, 0.19, 1572.0, 780.59, 191.86, 15.63, 192.56, 0.14, 861.0,
       45.0, 230.07, 0.0, 0.27, 0.74, 3.07, 2.0, 0.3, 0.79, 1.14, 0.57,
       0.43, 5.14, 0.46, 111.51, 10.0, 3.0, 1.0, 15.0, 1.59, 2.28, 18.19,
       4.0, 14.38, 24.38, 0.17, 2.24, 5.68, 95.94, 82.48, 3.05, 0.4, 2.26,
       66.18, 1.02, 37.76, 42.5, 60.0, 0.09, 20.06, 2.92, 0.67, 30.99,
       1.07, 2.71, 4.91, 0.22, 90.4, 28.57, 0.31, 42.71, 1.11, 59.88, 0.8,
       9.43, 8.93, 5.11, 3.22, 1.48, 0.51, 0.89, 0.88, 1.41, 9.76, 1.8,
       0.26, 0.82, 1.03, 0.29, 1.77, 1.68, 2.57, 1.6, 2.4, 0.64, 1.57,
       0.71, 19.12, 0.08, 0.2, 1.19, 3.99, 1.5, 4.8, 14.19, 2.39, 0.6,
       0.76, 2.3, 0.07, 2.97, 18.57, 12.44, 1.3, 8.69, 2.74, 0.63, 4.29,
       8.77, 14.43, 1.25, 0.85, 11.8, 1.37, 2.19, 31.88, 5.0, 0.1, 11.64,
       15.24, 27.62, 0.96, 11.17, 0.28, 0.69, 9.59, 4.98, 

In [12]:
# AllocationVolume_AF - based on reporeted Unit

def assignAllocationVolume_AF(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "AF":
            outList = colvA
        else:
            outList = ''
    return outList

dfPoD['AllocationVolume_AF'] = dfPoD.apply(lambda row: assignAllocationVolume_AF(row['ProGrant'], row['Units']), axis=1)
dfPoD['AllocationVolume_AF'].unique()

array(['', 10.0, 1.7, ..., 17.99, 34.09, 63.69], dtype=object)

In [13]:
dfPoD['WaterAllocationNativeURL'] = 'https://nednr.nebraska.gov/dynamic/WaterRights/WaterRights/SWRDetailPage?RightId=' + dfPoD['RightID'].astype(str)
dfPoD.head(1)

Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,UseCode,UseDescription,SendNotices,UseTypeDescription,0,ExemptionDescription,0.1,Section,SubSection,Township,Range,RangeDirection,CountyName,NrdName,HUC12,Upstream,PrimaryIndicator,PODStatus,LegalDescription,LatitudeDecimalDegrees.1,LongitudeDecimalDegrees.1,0.2,ContactId,FirstName,LastName,ContactType,SeqNum,BeginDate,EndDate,Address1,Address2,City,State,Zip,Phone1,Phone2,Phone3,LicenseNumber,IsDeleted.1,0.3,WaDEUUID,in_VariableSpecificUUID,AllocationFlow_CFS,AllocationVolume_AF,WaterAllocationNativeURL
0,1,Active,A-365R,8955.0,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17.0,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,US,Incidental Underground Storage,Yes,Natural Flow,,,,28.0,NE NW,22.0,55.0,West,Scotts Bluff,North Platte,101800090810.0,Yes,Primary,Active,NE NW S28 T22N-R55W,41.85584,-103.70574,,754.0,,Central Irrigation District,Owner,1.0,,,230450 Crow Road,,Gering,Nebraska,69341,3086413425.0,,,,No,,neD0,NEwr_V1,0.04,,https://nednr.nebraska.gov/dynamic/WaterRights...


In [14]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPoD['WaDEUUID']

# # Method Info
df['in_MethodUUID'] = "NEwr_M1" # for surface water

# Variable Info
df['in_VariableSpecificUUID'] = dfPoD['in_VariableSpecificUUID']

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfPoD['SourceName']
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "Surface Water"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfPoD['HUC12']
df['in_HUC8'] = ""
df['in_Latitude'] = dfPoD['LatitudeDecimalDegrees']
df['in_Longitude'] = dfPoD['LongitudeDecimalDegrees']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
#df['in_SiteNativeID'] = "POD" + dfPoD['PointOfDiversionID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SiteNativeID'] = ""
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfPoD['AllocationFlow_CFS']
df['in_AllocationLegalStatusCV'] = dfPoD['RightStatus']
df['in_AllocationNativeID'] =  dfPoD['RightID'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = dfPoD['FirstName'] + " " + dfPoD['LastName']
df['in_AllocationPriorityDate'] = dfPoD['PriorityDate']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPoD['AllocationVolume_AF']
df['in_BeneficialUseCategory'] = dfPoD['UseDescription']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPoD['WaterAllocationNativeURL']

outdfPoD = df.copy()
outdfPoD = outdfPoD.drop_duplicates().reset_index(drop=True)
print(len(outdfPoD))
outdfPoD.head()

21948


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,neD0,NEwr_M1,NEwr_V1,NEwr_O1,,,,North Platte River,,Surface Water,,,,4326,,101800090810.0,,41.85584,-103.70574,,,POD,,,,,NE,,,,,,,,,,0.04,Active,1,Central Irrigation District,1897-03-15T00:00:00,,,,,,Incidental Underground Storage,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
1,neD1,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,,Surface Water,,,,4326,,101800090402.0,,42.00158,-104.04618,,,POD,,,,,NE,,,,,,,,,,0.16,Active,2,Steve & Pam Dickey,1933-05-29T00:00:00,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
2,neD2,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,,Surface Water,,,,4326,,101800090402.0,,42.00158,-104.04618,,,POD,,,,,NE,,,,,,,,,,0.16,Active,2,Steve & Pam Dickey,1933-05-29T00:00:00,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
3,neD3,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,,Surface Water,,,,4326,,101800090402.0,,42.00158,-104.04618,,,POD,,,,,NE,,,,,,,,,,0.16,Active,2,Steve & Pam Dickey,1933-05-29T00:00:00,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
4,neD4,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,,Surface Water,,,,4326,,101800090402.0,,42.00158,-104.04618,,,POD,,,,,NE,,,,,,,,,,0.16,Active,2,Steve & Pam Dickey,1933-05-29T00:00:00,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...


# POU Surface Water Data
- will extract and share some elements from above POD surface water data via RightID

In [15]:
# Input File - PoU Shapefile Data
# export dataframe as zipped csv
pouInput = 'RawInputData/shapefile/BND_SurfaceWaterRights_DNR.zip'
dfPOU = gpd.read_file(pouInput).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfPOU = dfPOU.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPOU:
    dfPOU['WaDEUUID'] = "waU" + dfPOU.index.astype(str)
    dfPOU.to_csv('RawInputData/BND_SurfaceWaterRights_DNR.zip', compression=dict(method='zip', archive_name='BND_SurfaceWaterRights_DNR.csv'), index=False)

print(len(dfPOU))
dfPOU.head(1)

7344


Unnamed: 0,OBJECTID,RightID,RightUse,Applicatio,Acres_Orde,WaterDivis,FieldOffic,CarrierA,CarrierB,SourceName,ProGrant,Units,GPM,Rate,CurrentTot,HUC12,PriorityDa,Beneficial,ApprovalDa,Irrigation,Notice_Sta,Notice_Eff,Notice_Typ,Reason_Adm,GlobalID,ProjectID,wadeID,Longitude,Latitude,GlobalID_1,Shape_Leng,Shape_Le_1,Shape_Area,WaDEUUID
0,1,4035,Irrigation from Natural Stream,A-15164,72.0,1D,Lincoln,Pump,,"Indian Creek, Big",1.03,CFS,462,70,72.0,102702020505.0,1978-01-17,,1978-11-15,,Open,2022-09-06,Open,Blue River Compact,{18D626A8-1BCF-43DA-9655-0003FBD63F15},0,wade1,-96.81605,40.04782,{FA10E0EF-71A5-4169-9CC2-33D006254082},4419.99441,0.03462,3e-05,waU0


In [16]:
# assign VariableSpecificUUID

def assignVariableSpecificUUID(colvA):
    outList = ''
    colvA = str(colvA).strip()
    if colvA == "CFS":
        outList = "NEwr_V1"
    if colvA == "AF":
        outList = "NEwr_V2"

    return outList

dfPOU['in_VariableSpecificUUID'] = dfPOU.apply(lambda row: assignVariableSpecificUUID(row['Units']), axis=1)
dfPOU['in_VariableSpecificUUID'].unique()

array(['NEwr_V1', 'NEwr_V2', ''], dtype=object)

In [17]:
# AllocationFlow_CFS - based on reporeted Unit

def assignAllocationFlow_CFS(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "CFS":
            outList = colvA
        else:
            outList = ''
    return outList

dfPOU['AllocationFlow_CFS'] = dfPOU.apply(lambda row: assignAllocationFlow_CFS(row['ProGrant'], row['Units']), axis=1)
dfPOU['AllocationFlow_CFS'].unique()

array([1.03, 0.67, 0.0, 0.14, 0.57, '', 2.14, 1.15, 1.37, 0.24, 2.84,
       1.13, 1.09, 1.14, 1.26, 0.21, 0.9, 0.54, 0.28, 0.6, 3.07, 2.19,
       0.69, 58.08, 0.71, 0.91, 0.35, 0.66, 1.46, 0.17, 1.0, 4.65, 0.41,
       0.16, 1.93, 2.92, 3.29, 1.51, 2.75, 1.22, 0.1, 0.39, 0.52, 0.43,
       0.76, 0.23, 230.07, 0.77, 0.7, 0.53, 0.2, 1.59, 2.33, 0.31, 0.11,
       1.85, 0.94, 1.49, 0.37, 0.38, 0.86, 1.06, 0.36, 1.56, 0.32, 2.71,
       1.58, 0.46, 0.18, 0.13, 0.3, 1.48, 0.05, 0.48, 780.59, 0.74, 0.73,
       1.79, 0.85, 0.44, 4.14, 7.24, 3.35, 0.04, 1.52, 1.71, 1.31, 1.43,
       1.77, 2.02, 2.36, 0.06, 0.29, 3.4, 0.47, 0.81, 2.86, 1.5, 0.12,
       2.09, 6.55, 0.55, 3.21, 0.09, 0.08, 4.26, 1.64, 3.57, 0.82, 0.75,
       0.88, 1.07, 1.02, 0.83, 0.64, 2.29, 0.02, 1.23, 0.19, 0.78, 1.97,
       0.15, 0.68, 0.63, 1.89, 0.33, 1.47, 1.74, 0.79, 1.29, 5.0, 1.9,
       0.45, 3.26, 1.65, 0.27, 0.34, 14.43, 0.56, 0.49, 2.53, 1.08, 2.01,
       1.53, 0.5, 2.1, 0.4, 2.31, 1.32, 1.04, 0.62, 1.2, 1.

In [18]:
# AllocationVolume_AF - based on reporeted Unit

def assignAllocationVolume_AF(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "AF":
            outList = colvA
        else:
            outList = ''
    return outList

dfPOU['AllocationVolume_AF'] = dfPOU.apply(lambda row: assignAllocationVolume_AF(row['ProGrant'], row['Units']), axis=1)
dfPOU['AllocationVolume_AF'].unique()

array(['', 0.0, 17.4, 64.8, 85.8, 150.0, 25.25, 20.0, 51.4, 156.79, 177.2,
       49.9, 5.72, 1.36, 44.0, 510.0, 122.0, 3.0, 95.3, 33.3, 1564.0,
       22.2, 10.0, 82.0, 3.5, 42.8, 71.2, 2.22, 148.0, 2.72, 131.8,
       1615.42, 14.77, 211.0, 258.0, 24.0, 9.69, 25.04, 12.4, 48.0, 15.9,
       24.5, 49.6, 191.87, 12.6, 15.0, 59.6, 26.82, 65.0, 45.0, 38.0,
       62.9, 61.6, 59.0, 73.6, 16.0, 21.0, 234.0, 47.2, 30.0, 154.2, 20.6,
       121.8, 14.1, 240.0, 104.1, 1.1, 57.99, 25.61, 57.4, 935.0, 14.0,
       42.1, 38.1, 46.0, 129.0, 360.9, 57.0, 1.9, 39.0, 48.26, 4.0, 28.0,
       58.3, 53.33, 55.0, 7.85, 49.2, 9.74, 63.1, 4.36, 58.8, 19.06, 18.0,
       80.0, 9.35, 50.2, 22.9, 125.0, 431.7, 21.77, 88.0, 0.7, 44.8, 35.8,
       1248.9, 49.1, 36015.0, 55464.2, 0.1, 113.82, 28.4, 79.6, 590.0,
       43.0, 103.9, 28.1, 72.2, 522.0, 3.61, 8.42, 34.39, 7.0, 48.5, 30.6,
       861.25, 67.8, 68.8, 26.0, 1.87, 29.0, 41.5, 8.0, 10.99, 28513.92,
       14.7, 107.0, 36.0, 25.0, 26.1, 93.87, 208.0, 1

In [19]:
dfPOU['WaterAllocationNativeURL'] = 'https://nednr.nebraska.gov/dynamic/WaterRights/WaterRights/SWRDetailPage?RightId=' + dfPOU['RightID'].astype(str)
dfPOU.head(1)

Unnamed: 0,OBJECTID,RightID,RightUse,Applicatio,Acres_Orde,WaterDivis,FieldOffic,CarrierA,CarrierB,SourceName,ProGrant,Units,GPM,Rate,CurrentTot,HUC12,PriorityDa,Beneficial,ApprovalDa,Irrigation,Notice_Sta,Notice_Eff,Notice_Typ,Reason_Adm,GlobalID,ProjectID,wadeID,Longitude,Latitude,GlobalID_1,Shape_Leng,Shape_Le_1,Shape_Area,WaDEUUID,in_VariableSpecificUUID,AllocationFlow_CFS,AllocationVolume_AF,WaterAllocationNativeURL
0,1,4035,Irrigation from Natural Stream,A-15164,72.0,1D,Lincoln,Pump,,"Indian Creek, Big",1.03,CFS,462,70,72.0,102702020505.0,1978-01-17,,1978-11-15,,Open,2022-09-06,Open,Blue River Compact,{18D626A8-1BCF-43DA-9655-0003FBD63F15},0,wade1,-96.81605,40.04782,{FA10E0EF-71A5-4169-9CC2-33D006254082},4419.99441,0.03462,3e-05,waU0,NEwr_V1,1.03,,https://nednr.nebraska.gov/dynamic/WaterRights...


In [20]:
dfPOU['RightUse'].unique()

array(['Irrigation from Natural Stream',
       'Stor-only (irrigation from a reservoir on lands not covered by a natural flow appropriation)',
       'Supplemental Irrigation (irrig. from reservoir on lands also covered by a natural flow appropriation)',
       'Transfer to Instream',
       'Stor-only and Incidental Underground Storage', 'Recharge',
       'Supplemental Irrigation and Incidental Underground Storage',
       'Conducting Surface Water for Irrigation (Uses Water from an Existing Appropriation)',
       'Incidental Underground Storage', 'Domestic',
       'Conducting Groundwater for Irrigation (Source is a Registered Groundwater Well)',
       'Storage', 'Instream Flow',
       'Irrigation and Incidental Underground Storage',
       'Fish and wildlife',
       'Supplemental Storage (an appropriation that has a prior appropriation for storage)',
       'Domestic Storage Use',
       'Irrigation and Storage (an appropriation which was approved for both uses)',
       'Wetl

In [21]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPOU['WaDEUUID']

# # Method Info
df['in_MethodUUID'] = "NEwr_M1" # for surface water

# Variable Info
df['in_VariableSpecificUUID'] = dfPOU['in_VariableSpecificUUID']

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfPOU['SourceName']
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "Surface Water"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfPOU['HUC12']
df['in_HUC8'] = ""
df['in_Latitude'] = dfPOU['Latitude']
df['in_Longitude'] = dfPOU['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfPOU['wadeID'].replace("", 0).fillna(0).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfPOU['AllocationFlow_CFS']
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfPOU['RightID'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = ""
df['in_AllocationPriorityDate'] = dfPOU['PriorityDa']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPOU['AllocationVolume_AF']
df['in_BeneficialUseCategory'] = dfPOU['RightUse']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPOU['WaterAllocationNativeURL']

outdfPoU = df.copy()
outdfPoU = outdfPoU.drop_duplicates().reset_index(drop=True)
print(len(outdfPoU))
outdfPoU.head()

7344


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,waU0,NEwr_M1,NEwr_V1,NEwr_O1,,,,"Indian Creek, Big",,Surface Water,,,,4326,,102702020505.0,,40.04782,-96.81605,,,POU,,POUwade1,,,NE,,,,,,,,,,1.03,,4035,,1978-01-17,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
1,waU1,NEwr_M1,NEwr_V1,NEwr_O1,,,,Turkey Creek,,Surface Water,,,,4326,,102702040308.0,,40.51107,-97.01462,,,POU,,POUwade3,,,NE,,,,,,,,,,0.67,,10330,,1970-01-07,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
2,waU2,NEwr_M1,NEwr_V1,NEwr_O1,,,,"Loup River, South",,Surface Water,,,,4326,,102100040508.0,,41.03143,-98.82382,,,POU,,POUwade5,,,NE,,,,,,,,,,0.0,,5486,,1970-07-08,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
3,waU3,NEwr_M1,NEwr_V1,NEwr_O1,,,,"Sandy Creek, Big",,Surface Water,,,,4326,,102702060807.0,,40.23899,-97.38548,,,POU,,POUwade6,,,NE,,,,,,,,,,0.14,,10117,,1945-10-29,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
4,waU4,NEwr_M1,NEwr_V1,NEwr_O1,,,,"School Creek, Trib. To",,Surface Water,,,,4326,,102702030203.0,,40.64949,-97.91262,,,POU,,POUwade7,,,NE,,,,,,,,,,0.57,,3116,,1974-12-19,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...


# POD Groundwater Data
- data already pulled from api, used saved csv instead

In [22]:
# %%time
# # API retrieval
# # Checked api website and would allow up to 270 input as page. 

# columns_to_keep = ['WellID','RegistrationNumber','Status','WellUseDescription','NRDName','CountyName','Location','SurfaceWaterAppropriation','Acres','SeriesType','Pump','OwnerContact','RegistrationDate','CompletionDate','LastChangeDate','Latitude','Longitude']

# df = pd.DataFrame(columns=columns_to_keep)

# countPage = 1
# while countPage <= 260:
#    url = "https://nednr.nebraska.gov/IwipApi/api/v1/Wells/AllWells?page=" + str(countPage)
#    print(url)

#    # Store data in DataFrame
#    try:
#        response = requests.get(url)
#        response.raise_for_status()  # Raise an exception for bad status codes (e.g., 404)
#        responseD = json.loads(response.text)
#        DtL = responseD['Results']
#        length = len(DtL)
#        for i in range(length):
#            row = pd.DataFrame([DtL[i]])
           
#            # Check if the 'Status' is 'Active Registered Well' before adding it to the DataFrame
#            if row['Status'].iloc[0] == 'Active Registered Well':
#                # Select only the desired columns
#                row = row[columns_to_keep]
#                df = pd.concat([df, row])
#    except requests.exceptions.RequestException as e:
#        print(f"Request error: {e}")
#    except json.JSONDecodeError as e:
#        print(f"JSON decoding error: {e}")
#    except KeyError as e:
#        print(f"KeyError: {e}")
    
#    countPage = countPage + 1

# # Reset the DataFrame index
# df.reset_index(drop=True, inplace=True)

# print(len(df))
# df.head()

In [23]:
# # already done, skip ahead

# # To unpack column's dictionary value new into separate columns -> contact to existing dataframe -> drop unpacked column
# dftemp = df.copy()
# dftemp = pd.concat([dftemp, dftemp["Pump"].apply(pd.Series)], axis=1).drop(columns=["Pump","PumpColumn_Diameter","PumpDepth","PumpInstallationDate","PumpInstallerContact"])
# dftemp = pd.concat([dftemp, dftemp["OwnerContact"].apply(pd.Series)], axis=1).drop(columns=["OwnerContact","ContactId","ContactType","SeqNum","BeginDate","EndDate","Address1","Address2","City","State","Zip","Phone1","Phone2","Phone3","LicenseNumber","IsDeleted"])

# print(len(dftemp))
# dftemp.head(5)

In [24]:
# # already done, skip ahead

# # export api data
# dftemp.to_csv('RawInputData/AllWells.zip', compression=dict(method='zip', archive_name='AllWells.csv'), index=False)  # The output, save as a zip"

In [25]:
# Input File
FIgw_PoD = "RawInputData/AllWells.zip"
dfgwinPOD = pd.read_csv(FIgw_PoD, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfgwinPOD:
    dfgwinPOD['WaDEUUID'] = "d" + dfgwinPOD.index.astype(str)
    dfgwinPOD.to_csv('RawInputData/AllWells.zip', compression=dict(method='zip', archive_name='AllWells.csv'), index=False)

print(len(dfgwinPOD))
dfgwinPOD.head()

  dfgwinPOD = pd.read_csv(FIgw_PoD, encoding = "ISO-8859-1").replace(np.nan, "")


188893


Unnamed: 0,WellID,RegistrationNumber,Status,WellUseDescription,NRDName,CountyName,Location,SurfaceWaterAppropriation,Acres,SeriesType,RegistrationDate,CompletionDate,LastChangeDate,Latitude,Longitude,PumpRate,FirstName,LastName,WaDEUUID
0,3,A-002200A,Active Registered Well,Irrigation,South Platte,Cheyenne,NW S8-T13-R46West,,46.0,Connected to pump into a common carrier,1931-04-29T00:00:00,1931-10-01T00:00:00,2002-10-28T00:00:00,41.11699,-102.62495,300.0,Henry & Richard,Bruns,d0
1,4,A-002200B,Active Registered Well,Irrigation,South Platte,Cheyenne,NW S8-T13-R46West,,46.0,Connected to pump into a common carrier,1931-04-29T00:00:00,1931-10-01T00:00:00,2011-01-24T11:42:23.48,41.11699,-102.62495,,Henry & Richard,Bruns,d1
2,5,A-002200C,Active Registered Well,Irrigation,South Platte,Cheyenne,NW S8-T13-R46West,,46.0,Connected to pump into a common carrier,1931-04-29T00:00:00,1931-10-01T00:00:00,2002-10-28T00:00:00,41.11699,-102.62495,,Henry & Richard,Bruns,d2
3,7,A-002513,Active Registered Well,Irrigation,Central Platte,Dawson,SWSE S19-T9-R20West,,120.0,Single Project,1935-02-15T00:00:00,1935-02-15T00:00:00,2006-11-08T00:00:00,40.73164,-99.64524,488.0,,Ourada Cattle Company Inc,d3
4,8,A-002770,Active Registered Well,Irrigation,South Platte,Kimball,SWNW S28-T15-R54West,,113.0,Single Project,1937-08-07T00:00:00,1937-08-10T00:00:00,2011-06-15T10:39:43.44,41.24492,-103.52575,1346.0,Gene R & Chari L,Mohr,d4


In [26]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfgwinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NEwr_M2" # for groundwater

# Variable Info
df['in_VariableSpecificUUID'] = "NEwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = "Fresh"
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" # auto fill in below if not provdied
df['in_WaterSourceTypeCV'] = "Groundwater"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = dfgwinPOD['CountyName']
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfgwinPOD['Latitude']
df['in_Longitude'] = dfgwinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"  # "Point of Diversion"
df['in_SiteName'] = dfgwinPOD['Location']
df['in_SiteNativeID'] = "PODgw" + dfgwinPOD['WellID'].astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfgwinPOD['SeriesType']
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = dfgwinPOD['RegistrationDate']
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfgwinPOD['Status']
df['in_AllocationNativeID'] = dfgwinPOD['RegistrationNumber']
df['in_AllocationOwner'] = dfgwinPOD['FirstName'] + " " + dfPoD['LastName']
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfgwinPOD['PumpRate']
df['in_BeneficialUseCategory'] = dfgwinPOD['WellUseDescription']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = "10/04/2023"
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = "1"
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfgwinPOD['Acres']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = "https://nednr.nebraska.gov/Dynamic/Wells/Wells/WellDetails?WellId=" + dfgwinPOD['WellID'].astype(str).str.strip()

outgwPOD = df.copy()
outgwPOD = outgwPOD.drop_duplicates().reset_index(drop=True)
print(len(outgwPOD))
outgwPOD.head()

188893


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,d0,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,,Groundwater,,,Cheyenne,4326,,,,41.11699,-102.62495,,,POD,NW S8-T13-R46West,PODgw3,,Connected to pump into a common carrier,NE,,1931-04-29T00:00:00,,,,,,,,,Active Registered Well,A-002200A,Henry & Richard Central Irrigation District,,,,,,300.0,Irrigation,,,,10/04/2023,,1,,46.0,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
1,d1,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,,Groundwater,,,Cheyenne,4326,,,,41.11699,-102.62495,,,POD,NW S8-T13-R46West,PODgw4,,Connected to pump into a common carrier,NE,,1931-04-29T00:00:00,,,,,,,,,Active Registered Well,A-002200B,Henry & Richard Dickey,,,,,,,Irrigation,,,,10/04/2023,,1,,46.0,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
2,d2,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,,Groundwater,,,Cheyenne,4326,,,,41.11699,-102.62495,,,POD,NW S8-T13-R46West,PODgw5,,Connected to pump into a common carrier,NE,,1931-04-29T00:00:00,,,,,,,,,Active Registered Well,A-002200C,Henry & Richard Dickey,,,,,,,Irrigation,,,,10/04/2023,,1,,46.0,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
3,d3,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,,Groundwater,,,Dawson,4326,,,,40.73164,-99.64524,,,POD,SWSE S19-T9-R20West,PODgw7,,Single Project,NE,,1935-02-15T00:00:00,,,,,,,,,Active Registered Well,A-002513,Dickey,,,,,,488.0,Irrigation,,,,10/04/2023,,1,,120.0,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
4,d4,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,,Groundwater,,,Kimball,4326,,,,41.24492,-103.52575,,,POD,SWNW S28-T15-R54West,PODgw8,,Single Project,NE,,1937-08-07T00:00:00,,,,,,,,,Active Registered Well,A-002770,Gene R & Chari L Dickey,,,,,,1346.0,Irrigation,,,,10/04/2023,,1,,113.0,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...


## Concatenate Data

In [27]:
# Concatenate dataframes
frames = [outdfPoD, outdfPoU, outgwPOD]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

218185


## Clean Data & WaDE Custom Elements

In [28]:
# Making Sure datatype of HUC12 is int.

def assignHUC12(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        outList = int(colrowValue)
    return outList

outdf['in_HUC12'] = outdf.apply(lambda row: assignHUC12(row['in_HUC12']), axis=1)
outdf['in_HUC12'].unique()

array([101800090810, 101800090402, 101800130102, ..., 102100060704,
       101500031701, 102100060805], dtype=object)

In [29]:
# solving a long benefical use
def longBenUseTempFix(val):
    if val == "Supplemental Cooling (an app. for water for cooling through a system that has a prior app. for cooling)":
        outList = "Supplemental Cooling (app for water for cooling through a system that has a prior app for cooling)"
    elif val == "Supplemental Irrigation (irrig. from reservoir on lands also covered by a natural flow appropriation)":
        outList = "Supplemental Irrigation (irrig. from reservoir on lands covered by a natural flow appropriation)"
    else:
        outList = val
    return outList
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: longBenUseTempFix(row['in_BeneficialUseCategory']), axis=1)

for x in outdf['in_BeneficialUseCategory'].sort_values().unique():
    print(f'"' + str(x) + '",')

"Aquaculture",
"Commercial/Industrial",
"Conducting Groundwater for Irrigation (Source is a Registered Groundwater Well)",
"Conducting Surface Water for Irrigation (Uses Water from an Existing Appropriation)",
"Cooling",
"Dewatering (Over 90 Days)",
"Domestic",
"Domestic Storage Use",
"Fish Culture",
"Fish and wildlife",
"Flood Control",
"Ground Heat Exchanger well - Closed Loop Heat Pump well",
"Heat Pump well - Open Loop Heat Pump Well",
"Incidental Underground Storage",
"Induced Ground Water Recharge",
"Injection",
"Instream Basin-Management",
"Instream Flow",
"Intentional Underground Storage",
"Irrigation",
"Irrigation and Incidental Underground Storage",
"Irrigation and Storage (an appropriation which was approved for both uses)",
"Irrigation from Natural Lake",
"Irrigation from Natural Stream",
"Livestock",
"Maintain Level of a Lake",
"Manufacturing",
"Monitoring (Ground Water Quality)",
"Observation (Ground Water Levels)",
"Other",
"Pit (excavation)",
"Power",
"Power and Inciden

In [30]:
# Clean owner name up
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [31]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Central Irrigation District', 'Steve Pam Dickey',
       'Charlene I Fitzgerald', ..., 'Eric Kate Livingston',
       'Susan Livingston',
       'James L Ella Lower Big Blue Natural Resources District'],
      dtype=object)

In [32]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['North Platte River', 'Glenn Springs', 'Schuppe Creek, West', ...,
       'Prairie Pond', 'Vieselmeyer Reservoir', 'Toohey Drain'],
      dtype=object)

In [33]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [34]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater'], dtype=object)

In [35]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['', 'Cheyenne', 'Dawson', 'Kimball', 'Hayes', 'Hitchcock',
       'Scotts Bluff', 'Hall', 'Keith', 'Sioux', 'Banner', 'Lincoln',
       'Merrick', 'Dodge', 'Hamilton', 'Buffalo', 'Colfax', 'Custer',
       'Harlan', 'Franklin', 'Webster', 'Butler', 'Chase', 'Boone',
       'Platte', 'Polk', 'Box Butte', 'York', 'Saline', 'Seward', 'Adams',
       'Sherman', 'Clay', 'Nuckolls', 'Thayer', 'Gage', 'Valley',
       'Howard', 'Phelps', 'Fillmore', 'Kearney', 'Garden', 'Red Willow',
       'Morrill', 'Greeley', 'Cass', 'Furnas', 'Dawes', 'Holt', 'Grant',
       'Dundy', 'Deuel', 'Frontier', 'Brown', 'Knox', 'Sheridan',
       'Gosper', 'Lancaster', 'Sarpy', 'Antelope', 'Washington', 'Nance',
       'Loup', 'Madison', 'Garfield', 'Perkins', 'Rock', 'Logan',
       'Blaine', 'Saunders', 'Stanton', 'Cherry', 'Arthur', 'Douglas',
       'Wheeler', 'Otoe', 'Jefferson', 'Thomas', 'Pierce', 'Nemaha',
       'Thurston', 'Dixon', 'Cedar', 'Cuming', 'Dakota', 'Burt',
       'Johnson', 'Wayne', 

In [36]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['', 'NW S8-T13-R46West', 'SWSE S19-T9-R20West', ...,
       'NWSW S10-T33-R22West', 'NWNE S4-T29-R20West',
       'SWSW S34-T34-R32West'], dtype=object)

In [37]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['', 'Connected to pump into a common carrier', 'Single Project',
       'Part of a DEQ site plan for spill or underground storage',
       'Monitoring Wells Part of a single site'], dtype=object)

In [38]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Active', '', 'Active Registered Well'], dtype=object)

In [39]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Central Irrigation District', 'Steve Pam Dickey',
       'Charlene I Fitzgerald', ..., 'Eric Kate Livingston',
       'Susan Livingston',
       'James L Ella Lower Big Blue Natural Resources District'],
      dtype=object)

In [40]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
uniqueList = list(set([i.strip() for i in ','.join(outdf['in_BeneficialUseCategory'].astype(str)).split(',')]))
uniqueList.sort()
uniqueList

['Aquaculture',
 'Commercial/Industrial',
 'Conducting Groundwater for Irrigation (Source is a Registered Groundwater Well)',
 'Conducting Surface Water for Irrigation (Uses Water from an Existing Appropriation)',
 'Cooling',
 'Dewatering (Over 90 Days)',
 'Domestic',
 'Domestic Storage Use',
 'Fish Culture',
 'Fish and wildlife',
 'Flood Control',
 'Ground Heat Exchanger well - Closed Loop Heat Pump well',
 'Heat Pump well - Open Loop Heat Pump Well',
 'Incidental Underground Storage',
 'Induced Ground Water Recharge',
 'Injection',
 'Instream Basin-Management',
 'Instream Flow',
 'Intentional Underground Storage',
 'Irrigation',
 'Irrigation and Incidental Underground Storage',
 'Irrigation and Storage (an appropriation which was approved for both uses)',
 'Irrigation from Natural Lake',
 'Irrigation from Natural Stream',
 'Livestock',
 'Maintain Level of a Lake',
 'Manufacturing',
 'Monitoring (Ground Water Quality)',
 'Observation (Ground Water Levels)',
 'Other',
 'Pit (excavation

In [41]:
# Ensure Latitude entry is either numireic or a 0
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([41.85584158, 42.00158085, 41.99952353, ..., 42.84489444, 42.51755,
       42.87223055], dtype=object)

In [42]:
# Ensure Longitude entry is either numireic or a 0
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-103.70574234, -104.04617675, -104.04244188, ..., -99.90423888,
       -99.66146944, -101.08125555], dtype=object)

In [43]:
# Changing datatype of Priority Date to date fields entry
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

<DatetimeArray>
['1897-03-15 00:00:00', '1933-05-29 00:00:00', '1946-06-17 00:00:00',
 '1934-08-03 00:00:00', '1933-03-27 00:00:00', '1959-12-01 00:00:00',
 '1962-08-30 00:00:00', '1960-09-28 00:00:00', '1904-12-23 00:00:00',
 '1960-09-22 00:00:00',
 ...
 '2023-07-24 00:00:00', '2023-08-02 00:00:00', '2023-09-07 00:00:00',
 '2023-08-31 00:00:00', '2023-09-11 00:00:00', '2023-08-08 00:00:00',
 '2023-09-13 00:00:00', '2023-09-27 00:00:00', '2023-10-02 00:00:00',
 '2023-11-01 00:00:00']
Length: 5423, dtype: datetime64[ns]

In [44]:
# Ensure Flow entry is either numireic or a 0
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([0.04, 0.16, 0.01, 0.02, 0.38, 0.06, 1.01, 0.33, 0.23, 1.43, 0.5,
       1.63, 0.12, 0.61, 0.86, 0.11, 2.22, 13.0, 0.21, 0.15, 1.28, '',
       0.48, 0.19, 1572.0, 780.59, 191.86, 15.63, 192.56, 0.14, 861.0,
       45.0, 230.07, 0.27, 0.74, 3.07, 2.0, 0.3, 0.79, 1.14, 0.57, 0.43,
       5.14, 0.46, 111.51, 10.0, 3.0, 1.0, 15.0, 1.59, 2.28, 18.19, 4.0,
       14.38, 24.38, 0.17, 2.24, 5.68, 95.94, 82.48, 3.05, 0.4, 2.26,
       66.18, 1.02, 37.76, 42.5, 60.0, 0.09, 20.06, 2.92, 0.67, 30.99,
       1.07, 2.71, 4.91, 0.22, 90.4, 28.57, 0.31, 42.71, 1.11, 59.88, 0.8,
       9.43, 8.93, 5.11, 3.22, 1.48, 0.51, 0.89, 0.88, 1.41, 9.76, 1.8,
       0.26, 0.82, 1.03, 0.29, 1.77, 1.68, 2.57, 1.6, 2.4, 0.64, 1.57,
       0.71, 19.12, 0.08, 0.2, 1.19, 3.99, 1.5, 4.8, 14.19, 2.39, 0.6,
       0.76, 2.3, 0.07, 2.97, 18.57, 12.44, 1.3, 8.69, 2.74, 0.63, 4.29,
       8.77, 14.43, 1.25, 0.85, 11.8, 1.37, 2.19, 31.88, 5.0, 0.1, 11.64,
       15.24, 27.62, 0.96, 11.17, 0.28, 0.69, 9.59, 4.98, 1.49,

In [45]:
# Ensure Volume entry is either numireic or a 0
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array(['', 10.0, 1.7, ..., 278.0, 1108.0, 648.0], dtype=object)

In [46]:
# Creating WaDE Custom water source native ID for easy water source identification
# use unique WaterSourceName and WaterSourceType values
# ----------------------------------------------------------------------------------------------------

# Create temp in_WaterSourceNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_WaterSourceName'] = outdf['in_WaterSourceName'].astype(str).str.strip()
dfTempID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_WaterSourceNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_WaterSourceName'].astype(str) + dfTempID['in_WaterSourceTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_WaterSourceNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_WaterSourceNativeID'], 
                                                                              row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeId1', 'wadeId2', 'wadeId3', ..., 'wadeId1533', 'wadeId1534',
       'wadeId1535'], dtype=object)

In [47]:
# Creating WaDE Custom site native ID for easy site identification
# use Unique Latitude, Longitude, SiteName and SiteTypeCV values
# ----------------------------------------------------------------------------------------------------

# Create temp in_SiteNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_Latitude'] = outdf['in_Latitude'].astype(str).str.strip()
dfTempID['in_Longitude'] = outdf['in_Longitude'].astype(str).str.strip()
dfTempID['in_SiteName'] = outdf['in_SiteName'].astype(str).str.strip()
dfTempID['in_SiteTypeCV'] = outdf['in_SiteTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_SiteNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_Latitude'].astype(str) + dfTempID['in_Longitude'].astype(str) + dfTempID['in_SiteName'].astype(str)+ dfTempID['in_SiteTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_SiteNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB, valC, valD):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip() + str(valC).strip() + str(valD).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_SiteNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_SiteNativeID'], 
                                                                       row['in_Latitude'], row['in_Longitude'],
                                                                       row['in_SiteName'], row['in_SiteTypeCV']), axis=1)
outdf['in_SiteNativeID'].unique()

array(['wadeId1', 'wadeId2', 'wadeId3', ..., 'PODgw276203', 'PODgw276204',
       'PODgw276205'], dtype=object)

## Drop non-Active AllocationLegalStatusCV Water Rights
- For this {state name / organization}, we don't want water rights that are considered: Cancelled

In [48]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = ["Cancelled"] # enter string entries here

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

218185


array(['Active', '', 'Active Registered Well'], dtype=object)

## Shapefile Data
- For attaching geometry to csv inputs.

In [49]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('RawInputData/shapefile/BND_SurfaceWaterRights_DNR.zip')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head()

7344


Unnamed: 0,OBJECTID,RightID,RightUse,Applicatio,Acres_Orde,WaterDivis,FieldOffic,CarrierA,CarrierB,SourceName,ProGrant,Units,GPM,Rate,CurrentTot,HUC12,PriorityDa,Beneficial,ApprovalDa,Irrigation,Notice_Sta,Notice_Eff,Notice_Typ,Reason_Adm,GlobalID,ProjectID,wadeID,Longitude,Latitude,GlobalID_1,Shape_Leng,Shape_Le_1,Shape_Area,geometry
0,1,4035,Irrigation from Natural Stream,A-15164,72.0,1D,Lincoln,Pump,,"Indian Creek, Big",1.03,CFS,462,70,72.0,102702020505.0,1978-01-17,,1978-11-15,,Open,2022-09-06,Open,Blue River Compact,{18D626A8-1BCF-43DA-9655-0003FBD63F15},0,wade1,-96.81605,40.04782,{FA10E0EF-71A5-4169-9CC2-33D006254082},4419.99441,0.03462,3e-05,"MULTIPOLYGON (((-96.81954 40.04925, -96.81951 ..."
1,3,10330,Irrigation from Natural Stream,A-11847B,46.7,1D,Lincoln,Pump,,Turkey Creek,0.67,CFS,300,70,46.7,102702040308.0,1970-01-07,,1970-06-05,,Open,2022-09-06,Open,Blue River Compact,{CFA8A10C-25B7-48C8-95BA-000F18E3A9D3},0,wade3,-97.01462,40.51107,{549984F2-6647-4625-9F6A-12F7A5C4134D},2318.35156,0.01856,2e-05,"POLYGON ((-97.01224 40.51280, -97.01224 40.512..."
2,5,5486,Irrigation from Natural Stream,A-11994,0.0,2A,Ord,Pump,,"Loup River, South",0.0,CFS,318,70,0.0,102100040508.0,1970-07-08,,,,,,,,{BFB820E1-081D-4418-BE5F-0028547A4F6F},0,wade5,-98.82382,41.03143,{3394FD90-5715-4315-BD55-8A60CB727D69},5900.61664,0.05139,2e-05,"MULTIPOLYGON (((-98.82727 41.03020, -98.82950 ..."
3,6,10117,Irrigation from Natural Stream,A-3857A,16.6,1C,Lincoln,Pump,,"Sandy Creek, Big",0.14,CFS,62,140,16.6,102702060807.0,1945-10-29,,1945-11-19,,Open,2019-04-23,Open,Natural Flow (Reminder),{8972DBF2-9D48-4791-AE74-00290D3ACE30},0,wade6,-97.38548,40.23899,{0C0D8DBF-F3D5-49AB-963F-B51263666B79},1746.90699,0.01436,1e-05,"POLYGON ((-97.38431 40.24029, -97.38408 40.240..."
4,7,3116,Irrigation from Natural Stream,A-13323,40.0,1D,Lincoln,Pump,,"School Creek, Trib. To",0.57,CFS,255,70,40.0,102702030203.0,1974-12-19,,1976-09-28,,Open,2016-04-17,Open,Natural Flow (Reminder),{6E3A3D1E-5509-4459-9D4C-002B533F49C6},0,wade7,-97.91262,40.64949,{70C8CA7B-973D-4FA9-B37F-E05563AEEED0},3281.57362,0.02684,2e-05,"MULTIPOLYGON (((-97.91352 40.65004, -97.91335 ..."


In [50]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['wadeID'].replace("", 0).fillna(0).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head()

Unnamed: 0,in_SiteNativeID,geometry
0,POUwade1,"MULTIPOLYGON (((-96.81954 40.04925, -96.81951 ..."
1,POUwade3,"POLYGON ((-97.01224 40.51280, -97.01224 40.512..."
2,POUwade5,"MULTIPOLYGON (((-98.82727 41.03020, -98.82950 ..."
3,POUwade6,"POLYGON ((-97.38431 40.24029, -97.38408 40.240..."
4,POUwade7,"MULTIPOLYGON (((-97.91352 40.65004, -97.91335 ..."


### Exporting to Finished File

In [51]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 218185 entries, 0 to 218184
Data columns (total 63 columns):
 #   Column                                        Non-Null Count   Dtype         
---  ------                                        --------------   -----         
 0   WaDEUUID                                      218185 non-null  object        
 1   in_MethodUUID                                 218185 non-null  object        
 2   in_VariableSpecificUUID                       218185 non-null  object        
 3   in_OrganizationUUID                           218185 non-null  object        
 4   in_Geometry                                   218185 non-null  object        
 5   in_GNISFeatureNameCV                          218185 non-null  object        
 6   in_WaterQualityIndicatorCV                    218185 non-null  object        
 7   in_WaterSourceName                            218185 non-null  object        
 8   in_WaterSourceNativeID                        218185 n

In [52]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,neD0,NEwr_M1,NEwr_V1,NEwr_O1,,,,North Platte River,wadeId1,Surface Water,,,,4326,,101800090810,,41.85584,-103.70574,,,POD,,wadeId1,,,NE,,,,,,,,,,0.04000,Active,1,Central Irrigation District,1897-03-15,,,,,,Incidental Underground Storage,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
1,neD1,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,wadeId2,Surface Water,,,,4326,,101800090402,,42.00158,-104.04618,,,POD,,wadeId2,,,NE,,,,,,,,,,0.16000,Active,2,Steve Pam Dickey,1933-05-29,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
2,neD2,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,wadeId2,Surface Water,,,,4326,,101800090402,,42.00158,-104.04618,,,POD,,wadeId2,,,NE,,,,,,,,,,0.16000,Active,2,Steve Pam Dickey,1933-05-29,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
3,neD3,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,wadeId2,Surface Water,,,,4326,,101800090402,,42.00158,-104.04618,,,POD,,wadeId2,,,NE,,,,,,,,,,0.16000,Active,2,Steve Pam Dickey,1933-05-29,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
4,neD4,NEwr_M1,NEwr_V1,NEwr_O1,,,,Glenn Springs,wadeId2,Surface Water,,,,4326,,101800090402,,42.00158,-104.04618,,,POD,,wadeId2,,,NE,,,,,,,,,,0.16000,Active,2,Steve Pam Dickey,1933-05-29,,,,,,Irrigation from Natural Stream,,,,,,0,,,,,,,,,,https://nednr.nebraska.gov/dynamic/WaterRights...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218180,d188888,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,wadeId1535,Groundwater,,,Washington,4326,,,,41.54450,-96.19074,,,POD,NWSW S9-T18-R11East,PODgw276201,,Connected to pump into a common carrier,NE,,2024-01-04T00:00:00,,,,,,,,,Active Registered Well,G-199137,,NaT,,,,,,Ground Heat Exchanger well - Closed Loop Heat ...,,,,10/04/2023,,1,,,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
218181,d188889,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,wadeId1535,Groundwater,,,Lancaster,4326,,,,40.87147,-96.68617,,,POD,NENE S1-T10-R6East,PODgw276202,,Connected to pump into a common carrier,NE,,2024-01-05T00:00:00,,,,,,,,,Active Registered Well,G-199138,,NaT,,,,,,Ground Heat Exchanger well - Closed Loop Heat ...,,,,10/04/2023,,1,,,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
218182,d188890,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,wadeId1535,Groundwater,,,Keya Paha,4326,,,,42.84489,-99.90424,,,POD,NWSW S10-T33-R22West,PODgw276203,,Single Project,NE,,2024-01-05T00:00:00,,,,,,,,,Active Registered Well,G-199139,,NaT,,,,,15.00000,Domestic,,,,10/04/2023,,1,,,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...
218183,d188891,NEwr_M2,NEwr_V1,NEwr_O1,,,Fresh,,wadeId1535,Groundwater,,,Rock,4326,,,,42.51755,-99.66147,,,POD,NWNE S4-T29-R20West,PODgw276204,,Single Project,NE,,2024-01-05T00:00:00,,,,,,,,,Active Registered Well,G-199141,,NaT,,,,,850.00000,Irrigation,,,,10/04/2023,,1,,67.00000,,,,,,,,https://nednr.nebraska.gov/Dynamic/Wells/Wells...


In [53]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pwr_neMain.zip', compression=dict(method='zip', archive_name='Pwr_neMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.

In [None]:
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json

In [None]:
# already done, skip ahead

%%time
# API retrieval
# shoot for 30 pages, that seemed to be all that AllSurfaceWaterPoints offered.
df = pd.DataFrame()
countPage = 1
while countPage < 30:
    url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(countPage)
    print(url)

    # store in dataframe
    try:
        responseD = json.loads(requests.get(url).text)
        DtL = responseD['Results']
        length = len(DtL)
        for i in range(length):
            row = pd.DataFrame([DtL[i]])
            df = pd.concat([df, row])
    except:
        print("Error, issue with API return.")
    
    countPage = countPage + 1

# # Use only NeNDR Active provided sites
# df = df[df['SourceName'] == 'NeDNR']

print(len(df))
df.head()

In [None]:
    url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(countPage)
    print(url)

    # store in dataframe
    try:
        responseD = json.loads(requests.get(url).text)
        DtL = responseD['Results']
        length = len(DtL)
        for i in range(length):
            row = pd.DataFrame([DtL[i]])
            df = pd.concat([df, row])