# Pre-processing Nebraska Allocation data for WaDEQA upload.
Date Updated: 10/04/2023
Purpose:  To pre-process the Nebraska data into one master file for simple DataFrame creation and extraction

### Notes:
- asfd

In [5]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [6]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/Nebraska/WaterAllocation" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

The working Directory is: G:/Shared drives/WaDE Data/Nebraska/WaterAllocation


# POD Surface Water Data
- data already pulled from api, used saved csv instead

In [None]:
# # already done, skip ahead

# %%time
# # API retrieval
# # shoot for 30 pages, that seemed to be all that AllSurfaceWaterPoints offered.
# df = pd.DataFrame()
# countPage = 1
# while countPage < 30:
#     url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(countPage)
#     print(url)

#     # store in dataframe
#     try:
#         responseD = json.loads(requests.get(url).text)
#         DtL = responseD['Results']
#         length = len(DtL)
#         for i in range(length):
#             row = pd.DataFrame([DtL[i]])
#             df = pd.concat([df, row])
#     except:
#         print("Error, issue with API return.")
    
#     countPage = countPage + 1

# # # Use only NeNDR Active provided sites
# # df = df[df['SourceName'] == 'NeDNR']

# print(len(df))
# df.head()

In [None]:
# # already done, skip ahead

# # explode these list....
# dftemp = df.copy()
# dftemp = dftemp.explode('NoticeExemptions')
# dftemp = dftemp.explode('Notices')
# dftemp = dftemp.explode('PointOfDiversions')
# dftemp = dftemp.explode('Contacts')

# print(len(dftemp))
# dftemp.head(1)

In [None]:
# # already done, skip ahead

# # To unpack column's dictionary value new into separate columns -> contact to existing dataframe -> drop unpacked column
# dftemp = pd.concat([dftemp, dftemp["RightUse"].apply(pd.Series)], axis=1).drop(columns="RightUse")
# dftemp = pd.concat([dftemp, dftemp["NoticeExemptions"].apply(pd.Series)], axis=1).drop(columns="NoticeExemptions")
# dftemp = pd.concat([dftemp, dftemp["Notices"].apply(pd.Series)], axis=1).drop(columns="Notices")
# dftemp = pd.concat([dftemp, dftemp["PointOfDiversions"].apply(pd.Series)], axis=1).drop(columns="PointOfDiversions")
# dftemp = pd.concat([dftemp, dftemp["Contacts"].apply(pd.Series)], axis=1).drop(columns="Contacts")

# print(len(dftemp))
# dftemp.head(1)

In [None]:
# # already done, skip ahead

# # Clean Data
# # we don't really need the 'NoticeExemptions' at this time
# dropList = ['PumpSheets', 'SpecialConditions', 'NoticeID', 'NoticeType', 'NoticeDate', 'EffectiveDate', 'ReasonForAdminAction', 'Notes', 'DeleteNotice']
# dftemp = dftemp.drop(dropList, axis=1).drop_duplicates().reset_index(drop=True)
# #dftemp = dftemp.drop(dropList, axis=1)
# print(len(dftemp))
# dftemp.head()

In [None]:
# # already done, skip ahead

# # export api data
# dftemp.to_csv('RawInputData/AllSurfaceWaterPoints.zip', compression=dict(method='zip', archive_name='AllSurfaceWaterPoints.csv'), index=False)  # The output, save as a zip

In [None]:
# POD Data
# read in the above API data that was saved to hard file
PoDAAInput = "RawInputData/AllSurfaceWaterPoints.zip"
dfPoD = pd.read_csv(PoDAAInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoD:
    dfPoD['WaDEUUID'] = "neD" + dfPoD.index.astype(str)
    dfPoD.to_csv('RawInputData/AllSurfaceWaterPoints.zip', compression=dict(method='zip', archive_name='AllSurfaceWaterPoints.csv'), index=False)
    
print(len(dfPoD))
dfPoD.head(1)

In [None]:
# Drop non-Active AllocationLegalStatusCV Water Rights
# We only want Active water rights
# We only want active POD sites
dfPoD = dfPoD[dfPoD['RightStatus'] == 'Active'].reset_index(drop=True)
dfPoD = dfPoD[dfPoD['PODStatus'] == 'Active'].reset_index(drop=True)
print(len(dfPoD))
dfPoD.head(1)

In [None]:
# assign VariableSpecificUUID

def assignVariableSpecificUUID(colvA):
    outList = ''
    colvA = str(colvA).strip()
    if colvA == "CFS":
        outList = "NEwr_V1"
    if colvA == "AF":
        outList = "NEwr_V2"

    return outList

dfPoD['in_VariableSpecificUUID'] = dfPoD.apply(lambda row: assignVariableSpecificUUID(row['Units']), axis=1)
dfPoD['in_VariableSpecificUUID'].unique()

In [None]:
# AllocationFlow_CFS - based on reporeted Unit

def assignAllocationFlow_CFS(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "CFS":
            outList = colvA
        else:
            outList = ''
    return outList

dfPoD['AllocationFlow_CFS'] = dfPoD.apply(lambda row: assignAllocationFlow_CFS(row['ProGrant'], row['Units']), axis=1)
dfPoD['AllocationFlow_CFS'].unique()

In [None]:
# AllocationVolume_AF - based on reporeted Unit

def assignAllocationVolume_AF(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "AF":
            outList = colvA
        else:
            outList = ''
    return outList

dfPoD['AllocationVolume_AF'] = dfPoD.apply(lambda row: assignAllocationVolume_AF(row['ProGrant'], row['Units']), axis=1)
dfPoD['AllocationVolume_AF'].unique()

In [None]:
dfPoD['WaterAllocationNativeURL'] = 'https://nednr.nebraska.gov/dynamic/WaterRights/WaterRights/SWRDetailPage?RightId=' + dfPoD['RightID'].astype(str)
dfPoD.head(1)

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPoD['WaDEUUID']

# # Method Info
df['in_MethodUUID'] = "NEwr_M1" # for surface water

# Variable Info
df['in_VariableSpecificUUID'] = dfPoD['in_VariableSpecificUUID']

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfPoD['SourceName']
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "Surface Water"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfPoD['HUC12']
df['in_HUC8'] = ""
df['in_Latitude'] = dfPoD['LatitudeDecimalDegrees']
df['in_Longitude'] = dfPoD['LongitudeDecimalDegrees']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
#df['in_SiteNativeID'] = "POD" + dfPoD['PointOfDiversionID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SiteNativeID'] = ""
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfPoD['AllocationFlow_CFS']
df['in_AllocationLegalStatusCV'] = dfPoD['RightStatus']
df['in_AllocationNativeID'] =  dfPoD['RightID'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = dfPoD['FirstName'] + " " + dfPoD['LastName']
df['in_AllocationPriorityDate'] = dfPoD['PriorityDate']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPoD['AllocationVolume_AF']
df['in_BeneficialUseCategory'] = dfPoD['UseDescription']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPoD['WaterAllocationNativeURL']

outdfPoD = df.copy()
outdfPoD = outdfPoD.drop_duplicates().reset_index(drop=True)
print(len(outdfPoD))
outdfPoD.head()

# POU Surface Water Data
- will extract and share some elements from above POD surface water data via RightID

In [None]:
# Input File - PoU Shapefile Data
# export dataframe as zipped csv
pouInput = 'RawInputData/shapefile/BND_SurfaceWaterRights_DNR.zip'
dfPOU = gpd.read_file(pouInput).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfPOU = dfPOU.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPOU:
    dfPOU['WaDEUUID'] = "waU" + dfPOU.index.astype(str)
    dfPOU.to_csv('RawInputData/BND_SurfaceWaterRights_DNR.zip', compression=dict(method='zip', archive_name='BND_SurfaceWaterRights_DNR.csv'), index=False)

print(len(dfPOU))
dfPOU.head(1)

In [None]:
# assign VariableSpecificUUID

def assignVariableSpecificUUID(colvA):
    outList = ''
    colvA = str(colvA).strip()
    if colvA == "CFS":
        outList = "NEwr_V1"
    if colvA == "AF":
        outList = "NEwr_V2"

    return outList

dfPOU['in_VariableSpecificUUID'] = dfPOU.apply(lambda row: assignVariableSpecificUUID(row['Units']), axis=1)
dfPOU['in_VariableSpecificUUID'].unique()

In [None]:
# AllocationFlow_CFS - based on reporeted Unit

def assignAllocationFlow_CFS(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "CFS":
            outList = colvA
        else:
            outList = ''
    return outList

dfPOU['AllocationFlow_CFS'] = dfPOU.apply(lambda row: assignAllocationFlow_CFS(row['ProGrant'], row['Units']), axis=1)
dfPOU['AllocationFlow_CFS'].unique()

In [None]:
# AllocationVolume_AF - based on reporeted Unit

def assignAllocationVolume_AF(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "AF":
            outList = colvA
        else:
            outList = ''
    return outList

dfPOU['AllocationVolume_AF'] = dfPOU.apply(lambda row: assignAllocationVolume_AF(row['ProGrant'], row['Units']), axis=1)
dfPOU['AllocationVolume_AF'].unique()

In [None]:
dfPOU['WaterAllocationNativeURL'] = 'https://nednr.nebraska.gov/dynamic/WaterRights/WaterRights/SWRDetailPage?RightId=' + dfPOU['RightID'].astype(str)
dfPOU.head(1)

In [None]:
dfPOU['RightUse'].unique()

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPOU['WaDEUUID']

# # Method Info
df['in_MethodUUID'] = "NEwr_M1" # for surface water

# Variable Info
df['in_VariableSpecificUUID'] = dfPOU['in_VariableSpecificUUID']

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfPOU['SourceName']
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "Surface Water"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfPOU['HUC12']
df['in_HUC8'] = ""
df['in_Latitude'] = dfPOU['Latitude']
df['in_Longitude'] = dfPOU['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfPOU['wadeID'].replace("", 0).fillna(0).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfPOU['AllocationFlow_CFS']
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfPOU['RightID'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = ""
df['in_AllocationPriorityDate'] = dfPOU['PriorityDa']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPOU['AllocationVolume_AF']
df['in_BeneficialUseCategory'] = dfPOU['RightUse']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPOU['WaterAllocationNativeURL']

outdfPoU = df.copy()
outdfPoU = outdfPoU.drop_duplicates().reset_index(drop=True)
print(len(outdfPoU))
outdfPoU.head()

# POD Groundwater Data
- data already pulled from api, used saved csv instead

In [None]:
# %%time
# # API retrieval
# # Checked api website and would allow up to 270 input as page. 

# columns_to_keep = ['WellID','RegistrationNumber','Status','WellUseDescription','NRDName','CountyName','Location','SurfaceWaterAppropriation','Acres','SeriesType','Pump','OwnerContact','RegistrationDate','CompletionDate','LastChangeDate','Latitude','Longitude']

# df = pd.DataFrame(columns=columns_to_keep)

# countPage = 1
# while countPage <= 260:
#    url = "https://nednr.nebraska.gov/IwipApi/api/v1/Wells/AllWells?page=" + str(countPage)
#    print(url)

#    # Store data in DataFrame
#    try:
#        response = requests.get(url)
#        response.raise_for_status()  # Raise an exception for bad status codes (e.g., 404)
#        responseD = json.loads(response.text)
#        DtL = responseD['Results']
#        length = len(DtL)
#        for i in range(length):
#            row = pd.DataFrame([DtL[i]])
           
#            # Check if the 'Status' is 'Active Registered Well' before adding it to the DataFrame
#            if row['Status'].iloc[0] == 'Active Registered Well':
#                # Select only the desired columns
#                row = row[columns_to_keep]
#                df = pd.concat([df, row])
#    except requests.exceptions.RequestException as e:
#        print(f"Request error: {e}")
#    except json.JSONDecodeError as e:
#        print(f"JSON decoding error: {e}")
#    except KeyError as e:
#        print(f"KeyError: {e}")
    
#    countPage = countPage + 1

# # Reset the DataFrame index
# df.reset_index(drop=True, inplace=True)

# print(len(df))
# df.head()

In [None]:
# # already done, skip ahead

# # To unpack column's dictionary value new into separate columns -> contact to existing dataframe -> drop unpacked column
# dftemp = df.copy()
# dftemp = pd.concat([dftemp, dftemp["Pump"].apply(pd.Series)], axis=1).drop(columns=["Pump","PumpColumn_Diameter","PumpDepth","PumpInstallationDate","PumpInstallerContact"])
# dftemp = pd.concat([dftemp, dftemp["OwnerContact"].apply(pd.Series)], axis=1).drop(columns=["OwnerContact","ContactId","ContactType","SeqNum","BeginDate","EndDate","Address1","Address2","City","State","Zip","Phone1","Phone2","Phone3","LicenseNumber","IsDeleted"])

# print(len(dftemp))
# dftemp.head(5)

In [None]:
# # already done, skip ahead

# # export api data
# dftemp.to_csv('RawInputData/AllWells.zip', compression=dict(method='zip', archive_name='AllWells.csv'), index=False)  # The output, save as a zip"

In [None]:
# Input File
FIgw_PoD = "RawInputData/AllWells.zip"
dfgwinPOD = pd.read_csv(FIgw_PoD, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfgwinPOD:
    dfgwinPOD['WaDEUUID'] = "d" + dfgwinPOD.index.astype(str)
    dfgwinPOD.to_csv('RawInputData/AllWells.zip', compression=dict(method='zip', archive_name='AllWells.csv'), index=False)

print(len(dfgwinPOD))
dfgwinPOD.head()

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfgwinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NEwr_M2" # for groundwater

# Variable Info
df['in_VariableSpecificUUID'] = "NEwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NEwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = "Fresh"
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" # auto fill in below if not provdied
df['in_WaterSourceTypeCV'] = "Groundwater"

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = dfgwinPOD['CountyName']
df['in_EPSGCodeCV'] = "4326"
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfgwinPOD['Latitude']
df['in_Longitude'] = dfgwinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"  # "Point of Diversion"
df['in_SiteName'] = dfgwinPOD['Location']
df['in_SiteNativeID'] = "PODgw" + dfgwinPOD['WellID'].astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfgwinPOD['SeriesType']
df['in_StateCV'] = "NE"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = dfgwinPOD['RegistrationDate']
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfgwinPOD['Status']
df['in_AllocationNativeID'] = dfgwinPOD['RegistrationNumber']
df['in_AllocationOwner'] = dfgwinPOD['FirstName'] + " " + dfPoD['LastName']
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfgwinPOD['PumpRate']
df['in_BeneficialUseCategory'] = dfgwinPOD['WellUseDescription']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = "10/04/2023"
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = "1"
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfgwinPOD['Acres']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = "https://nednr.nebraska.gov/Dynamic/Wells/Wells/WellDetails?WellId=" + dfgwinPOD['WellID'].astype(str).str.strip()

outgwPOD = df.copy()
outgwPOD = outgwPOD.drop_duplicates().reset_index(drop=True)
print(len(outgwPOD))
outgwPOD.head()

## Concatenate Data

In [None]:
# Concatenate dataframes
frames = [outdfPoD, outdfPoU, outgwPOD]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

## Clean Data & WaDE Custom Elements

In [None]:
# Making Sure datatype of HUC12 is int.

def assignHUC12(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        outList = int(colrowValue)
    return outList

outdf['in_HUC12'] = outdf.apply(lambda row: assignHUC12(row['in_HUC12']), axis=1)
outdf['in_HUC12'].unique()

In [None]:
# solving a long benefical use
def longBenUseTempFix(val):
    if val == "Supplemental Cooling (an app. for water for cooling through a system that has a prior app. for cooling)":
        outList = "Supplemental Cooling (app for water for cooling through a system that has a prior app for cooling)"
    elif val == "Supplemental Irrigation (irrig. from reservoir on lands also covered by a natural flow appropriation)":
        outList = "Supplemental Irrigation (irrig. from reservoir on lands covered by a natural flow appropriation)"
    else:
        outList = val
    return outList
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: longBenUseTempFix(row['in_BeneficialUseCategory']), axis=1)

for x in outdf['in_BeneficialUseCategory'].sort_values().unique():
    print(f'"' + str(x) + '",')

In [None]:
# Clean owner name up
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

In [None]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [None]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

In [None]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

In [None]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
uniqueList = list(set([i.strip() for i in ','.join(outdf['in_BeneficialUseCategory'].astype(str)).split(',')]))
uniqueList.sort()
uniqueList

In [None]:
# Ensure Latitude entry is either numireic or a 0
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

In [None]:
# Ensure Longitude entry is either numireic or a 0
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

In [None]:
# Changing datatype of Priority Date to date fields entry
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

In [None]:
# Ensure Flow entry is either numireic or a 0
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

In [None]:
# Ensure Volume entry is either numireic or a 0
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# use unique WaterSourceName and WaterSourceType values
# ----------------------------------------------------------------------------------------------------

# Create temp in_WaterSourceNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_WaterSourceName'] = outdf['in_WaterSourceName'].astype(str).str.strip()
dfTempID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_WaterSourceNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_WaterSourceName'].astype(str) + dfTempID['in_WaterSourceTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_WaterSourceNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_WaterSourceNativeID'], 
                                                                              row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

In [None]:
# Creating WaDE Custom site native ID for easy site identification
# use Unique Latitude, Longitude, SiteName and SiteTypeCV values
# ----------------------------------------------------------------------------------------------------

# Create temp in_SiteNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_Latitude'] = outdf['in_Latitude'].astype(str).str.strip()
dfTempID['in_Longitude'] = outdf['in_Longitude'].astype(str).str.strip()
dfTempID['in_SiteName'] = outdf['in_SiteName'].astype(str).str.strip()
dfTempID['in_SiteTypeCV'] = outdf['in_SiteTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_SiteNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_Latitude'].astype(str) + dfTempID['in_Longitude'].astype(str) + dfTempID['in_SiteName'].astype(str)+ dfTempID['in_SiteTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_SiteNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB, valC, valD):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip() + str(valC).strip() + str(valD).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_SiteNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_SiteNativeID'], 
                                                                       row['in_Latitude'], row['in_Longitude'],
                                                                       row['in_SiteName'], row['in_SiteTypeCV']), axis=1)
outdf['in_SiteNativeID'].unique()

## Drop non-Active AllocationLegalStatusCV Water Rights
- For this {state name / organization}, we don't want water rights that are considered: Cancelled

In [None]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = ["Cancelled"] # enter string entries here

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

## Shapefile Data
- For attaching geometry to csv inputs.

In [None]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('RawInputData/shapefile/BND_SurfaceWaterRights_DNR.zip')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head()

In [None]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['wadeID'].replace("", 0).fillna(0).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head()

### Exporting to Finished File

In [None]:
outdf.info()

In [None]:
outdf

In [None]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pwr_neMain.zip', compression=dict(method='zip', archive_name='Pwr_neMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.

In [8]:
# %%time
# df = pd.DataFrame()
# countPage = 1
# resultOffset = 0
# while countPage < 300:
#     url = "https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=" + str(resultOffset)
#     print(url)

#     # store in dataframe
#     try:
#         responseD = json.loads(requests.get(url).text)
#         DtL = responseD['features']
#         length = len(DtL)
#         for i in range(length):
#             row = pd.DataFrame([DtL[i]])
#             df = pd.concat([df, row])
#     except:
#         print("Error, issue with API return.")
    
#     countPage = countPage + 1
#     resultOffset = resultOffset + 2000


# print(len(df))
# df.head()

https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=0
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=2000
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=4000
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=6000
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=8000
https://gis.ne.gov/Enterprise/rest/services/SurfaceWaterRightsDiversionsExternal_DNR/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json&resultOffset=10000


Unnamed: 0,attributes,geometry
0,"{'OBJECTID': 23, 'GlobalID': '{F5294716-BEC7-4...","{'x': -103.92501289372676, 'y': 41.93842978290..."
0,"{'OBJECTID': 24, 'GlobalID': '{E7F3BE15-63A7-4...","{'x': -103.92501289372676, 'y': 41.93842978290..."
0,"{'OBJECTID': 25, 'GlobalID': '{F9E61E13-17F1-4...","{'x': -98.44713379086971, 'y': 40.44817767278397}"
0,"{'OBJECTID': 26, 'GlobalID': '{F06C7349-355D-4...","{'x': -98.44007578933547, 'y': 40.45935079494864}"
0,"{'OBJECTID': 27, 'GlobalID': '{308E93CF-5885-4...","{'x': -97.03523863985306, 'y': 40.04469841045417}"


In [9]:
# dftemp = df.copy()
# dftemp = pd.concat([dftemp, dftemp["attributes"].apply(pd.Series)], axis=1).drop(columns="attributes")
# dftemp = pd.concat([dftemp, dftemp["geometry"].apply(pd.Series)], axis=1).drop(columns="geometry")
# print(len(dftemp))
# dftemp.head()

12368


Unnamed: 0,OBJECTID,GlobalID,RightId,RightStatus,PointOfDiversionStatus,PrimaryIndicator,Upstream,DownstreamOrder,ApplicationNumber,LegalDescription,WaterDivision,FieldOffice,SourceName,CarrierA,GallonsPerMinute,InstantaneousGrantCounted,VolumetricGrantCounted,ReservoirStorageGrantCounted,AcresCounted,Rate,AnnualReportRequired,Annotation,CountyName,NRDName,SpecialConditions1,SpecialConditions2,SpecialConditions3,NoticeExemptions,PriorityDate,BeneficialDate,ApprovalDate,FirstName,LastName,LastOrderDate,IrrigationDistrict,CanalSystem,PermitGroup,PumpsheetId,InspectedBy,DateInspected,PumpSiteSetup,NoticeId,NoticeType,NoticeEffectiveDate,ReasonAdministrativeAction,PurposeOfUse,FlowType,IsSupplemental,IsIncidental,IsOffChannelStorage,InstantaneousCurrentGrant,VolumetricCurrentGrant,ReservoirStorageCurrentGrant,CurrentTotalAcres,HUC12,x,y,0
0,23,{F5294716-BEC7-4ABE-A110-195CEBF0BD4B},72,Active,Active,Primary,Yes,7300.0,D-920,SW NW S27 T23N-R57W,1A,Bridgeport,North Platte River,Enterprise Canal,51526.0,Yes,No,No,Yes,43.0,No,M-1,Scotts Bluff,North Platte,,,,,-2548584000000.0,,-2303035200000.0,,Enterprise Irrigation District,882532800000.0,Enterprise Irrigation District,Enterprise Canal,Public,31734.0,jeff.nichols@Nebraska.gov,1632916800000.0,No,2332.0,Open,1662724800000.0,Canal Administration,Irrigation,Direct Flow,No,No,No,111.51,,,4826.96,101800090605,-103.92501,41.93843,
0,24,{E7F3BE15-63A7-48CF-ACC3-1A2750075CCF},92,Active,Active,Primary,Yes,7305.0,D-920,SW NW S27 T23N-R57W,1A,Bridgeport,North Platte River,Enterprise Canal,8163.0,Yes,No,No,No,,No,"U-31, M-1",Scotts Bluff,North Platte,,,,,-2548584000000.0,,-2303035200000.0,,Enterprise Irrigation District,882532800000.0,Enterprise Irrigation District,Enterprise Canal,Public,,,,,2332.0,Open,1662724800000.0,Canal Administration,Incidental Underground Storage,Direct Flow,No,Yes,No,18.19,,,0.0,101800090605,-103.92501,41.93843,
0,25,{F9E61E13-17F1-4FF9-91A7-A00D1937EE44},1889,Active,Active,Primary,Yes,214100.0,A-8582,SW NE S33 T06N-R10W,1C,Lincoln,"Blue River, Little",Pump,49.0,Yes,No,No,Yes,70.0,No,,Adams,Little Blue,,,,,-429364800000.0,,-418478400000.0,Royce,Rehtus,-19483200000.0,,,Private,36254.0,jordan.koerwitz@Nebraska.gov,1686139200000.0,No,681.0,Open,1556020800000.0,Natural Flow (Reminder),Irrigation,Direct Flow,No,No,No,0.11,,,15.0,102702060307,-98.44713,40.44818,
0,26,{F06C7349-355D-478F-A050-F97E9CEC189E},1911,Active,Active,Primary,Yes,216000.0,A-1904,SE NE S28 T06N-R10W,1C,Lincoln,"Blue River, Little",Pump,453.0,Yes,No,No,Yes,70.0,No,,Adams,Little Blue,,,,,-1352376000000.0,,-1349438400000.0,,Triple AAA Ranch Inc,-19483200000.0,,,Private,36255.0,jordan.koerwitz@Nebraska.gov,1686139200000.0,No,681.0,Open,1556020800000.0,Natural Flow (Reminder),Irrigation,Direct Flow,No,No,No,1.01,,,71.0,102702060410,-98.44008,40.45935,
0,27,{308E93CF-5885-49D4-A54D-21AEB2B84310},2550,Active,Active,Primary,Yes,279700.0,A-11454,NW NE S24 T01N-R03E,1C,Lincoln,"Blue River, Little",Pump,372.0,Yes,No,No,Yes,70.0,No,,Jefferson,Little Blue,,,,,-48340800000.0,,,Steve R & Beth A,Block,,,,Private,36247.0,Jacob.Coon@nebraska.gov,1686052800000.0,No,681.0,Open,1556020800000.0,Natural Flow (Reminder),Irrigation,Direct Flow,No,No,No,0.83,,,58.0,102702070302,-97.03524,40.0447,


In [10]:
# dftemp.to_csv('RawInputData/SurfaceWaterRightsDiversionsExternal_DNR.zip', compression=dict(method='zip', archive_name='SurfaceWaterRightsDiversionsExternal_DNR.csv'), index=False)  # The output, save as a zip