# Pre-processing Nebraska Allocation data for WaDEQA upload.
Date Updated: 05/25/2023
Purpose:  To pre-process the Nebraska data into one master file for simple DataFrame creation and extraction

### Notes:
- asfd

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
#Working Directory
workingDir = "G:/Shared drives/WaDE Data/Nebraska/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Input Data

In [3]:
%%time
# API retrieval
# shoot for 10 pages
df = pd.DataFrame()
countPage = 1
while countPage < 11:
    url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(countPage)
    print(url)
    responseD = json.loads(requests.get(url).text)
    DtL = responseD['Results']
    length = len(DtL)

    # store in dataframe
    for i in range(length):
        row = pd.DataFrame([DtL[i]])
        df = pd.concat([df, row])
    
    countPage = countPage + 1

# # Use only NeNDR Active provided sites
# df = df[df['SourceName'] == 'NeDNR']

print(len(df))
df.head()

https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=1
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=2
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=3
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=4
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=5
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=6
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=7
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=8
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=9
https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=10
4635
Wall time: 9min 59s


Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,RightUse,PumpSheets,NoticeExemptions,SpecialConditions,Notices,PointOfDiversions,Contacts
0,1,Active,A-365R,8955,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17.0,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,"{'UseCode': 'US', 'UseDescription': 'Incidenta...",[],[],[],"[{'NoticeID': 2440, 'NoticeType': 'Regulating'...","[{'PointOfDiversionID': 14166, 'Section': 28, ...","[{'ContactId': 754, 'FirstName': None, 'LastNa..."
0,2,Active,A-2324,100,1A,Bridgeport,Glenn Canal,Glenn Springs,0.16,CFS,,,,71.0,Yes,Yes,70.0,11.5,No,,1933-05-29T00:00:00,,1934-02-05T00:00:00,1934-02-05T00:00:00,,,Private,False,42.00158,-104.04618,"{'UseCode': 'IR', 'UseDescription': 'Irrigatio...",[],[{'ExemptionDescription': 'Does not flow to ri...,[],"[{'NoticeID': 359, 'NoticeType': 'Open', 'Noti...","[{'PointOfDiversionID': 10982, 'Section': 3, '...","[{'ContactId': 6459, 'FirstName': 'Steve & Pam..."
0,3,Active,A-3917,200,1A,Bridgeport,Schuppe Canal No. 1,"Schuppe Creek, West",0.01,CFS,,,,4.0,Yes,Yes,70.0,1.0,No,,1946-06-17T00:00:00,,1947-02-26T00:00:00,1947-02-26T00:00:00,,,Private,False,41.99952,-104.04244,"{'UseCode': 'IR', 'UseDescription': 'Irrigatio...",[],[{'ExemptionDescription': 'Does not flow to ri...,[],"[{'NoticeID': 621, 'NoticeType': 'Open', 'Noti...","[{'PointOfDiversionID': 10772, 'Section': 3, '...","[{'ContactId': 1421, 'FirstName': 'Charlene I'..."
0,4,Active,A-3918,300,1A,Bridgeport,Schuppe Canal No. 2,"Schuppe Creek, East",0.02,CFS,,,,8.0,Yes,Yes,70.0,1.77,No,,1946-06-17T00:00:00,,1946-09-13T00:00:00,1947-02-26T00:00:00,,,Private,False,41.99853,-104.04042,"{'UseCode': 'IR', 'UseDescription': 'Irrigatio...",[],[{'ExemptionDescription': 'Does not flow to ri...,[],"[{'NoticeID': 621, 'NoticeType': 'Open', 'Noti...","[{'PointOfDiversionID': 10777, 'Section': 3, '...","[{'ContactId': 1421, 'FirstName': 'Charlene I'..."
0,5,Cancelled,A-9867,400,1A,Bridgeport,Petsch Canal,Rogers Spring,,,,,,,Yes,Yes,70.0,,No,,1960-09-28T00:00:00,,1962-03-19T00:00:00,2006-12-20T00:00:00,,,Private,False,41.97998,-104.03679,"{'UseCode': 'IR', 'UseDescription': 'Irrigatio...",[],[{'ExemptionDescription': 'Does not flow to ri...,[],[],"[{'PointOfDiversionID': 6483, 'Section': 10, '...","[{'ContactId': 7805, 'FirstName': None, 'LastN..."


In [4]:
# %%time

# # API retrieval
# url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints"
# responseD = json.loads(requests.get(url).text)
# DtL = responseD['Results']
# length = len(DtL)

# # create dataframe and store
# df = pd.DataFrame()
# for i in range(length):
#     row = pd.DataFrame([DtL[i]])
#     df = pd.concat([df, row])

# # # Use only NeNDR Active provided sites
# # df = df[df['SourceName'] == 'NeDNR']

# # # Exporting output files.
# # df.to_csv('StreamGageGetStationList.csv', index=False)  # The output.

In [5]:
# explode these list....
dftemp = df.copy()
dftemp = dftemp.explode('NoticeExemptions')
dftemp = dftemp.explode('Notices')
dftemp = dftemp.explode('PointOfDiversions')
dftemp = dftemp.explode('Contacts')

print(len(dftemp))
dftemp.head(1)

85181


Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,RightUse,PumpSheets,NoticeExemptions,SpecialConditions,Notices,PointOfDiversions,Contacts
0,1,Active,A-365R,8955,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,"{'UseCode': 'US', 'UseDescription': 'Incidenta...",[],,[],"{'NoticeID': 2440, 'NoticeType': 'Regulating',...","{'PointOfDiversionID': 14166, 'Section': 28, '...","{'ContactId': 754, 'FirstName': None, 'LastNam..."


In [6]:
# To unpack column's dictionary value new into separate columns -> contact to existing dataframe -> drop unpacked column
dftemp = pd.concat([dftemp, dftemp["RightUse"].apply(pd.Series)], axis=1).drop(columns="RightUse")
dftemp = pd.concat([dftemp, dftemp["NoticeExemptions"].apply(pd.Series)], axis=1).drop(columns="NoticeExemptions")
dftemp = pd.concat([dftemp, dftemp["Notices"].apply(pd.Series)], axis=1).drop(columns="Notices")
dftemp = pd.concat([dftemp, dftemp["PointOfDiversions"].apply(pd.Series)], axis=1).drop(columns="PointOfDiversions")
dftemp = pd.concat([dftemp, dftemp["Contacts"].apply(pd.Series)], axis=1).drop(columns="Contacts")

print(len(dftemp))
dftemp.head(1)

85181


Unnamed: 0,RightID,RightStatus,ApplicationName,StartDownStream,WaterDivision,FieldOffice,CarrierA,SourceName,ProGrant,Units,InstantaneousGrant,VolumetricGrant,ReservoirCapacityGrant,GPM,GrantCounted,AcresCounted,Rate,CurrentTotalAcres,AnnualReportRequired,Annotation,PriorityDate,BeneficialDate,ApprovalDate,LastOrderDate,IrrigationDistrict,IrrigationProject,PermitGroup,IsDeleted,LatitudeDecimalDegrees,LongitudeDecimalDegrees,PumpSheets,SpecialConditions,UseCode,UseDescription,SendNotices,UseTypeDescription,0,ExemptionDescription,NoticeID,NoticeType,NoticeDate,EffectiveDate,ReasonForAdminAction,Notes,DeleteNotice,0.1,PointOfDiversionID,Section,SubSection,Township,Range,RangeDirection,CountyName,NrdName,HUC12,Upstream,PrimaryIndicator,PODStatus,LegalDescription,LatitudeDecimalDegrees.1,LongitudeDecimalDegrees.1,ContactId,FirstName,LastName,ContactType,SeqNum,BeginDate,EndDate,Address1,Address2,City,State,Zip,Phone1,Phone2,Phone3,LicenseNumber,IsDeleted.1
0,1,Active,A-365R,8955,1A,Bridgeport,Central Canal,North Platte River,0.04,CFS,,,,17,Yes,No,,0.0,No,U-29,1897-03-15T00:00:00,,1996-03-05T00:00:00,2011-04-26T00:00:00,Central Irrigation District,Central Canal,Public,False,41.85584,-103.70574,[],[],US,Incidental Underground Storage,Yes,Natural Flow,,,2440.0,Regulating,2023-05-25T14:10:55.1666667,2023-05-25T00:00:00,Gage Height Posting,Natural Flow Limit. Updated shift from measur...,No,,14166.0,28,NE NW,22,55,West,Scotts Bluff,North Platte,101800090810.0,Yes,Primary,Active,NE NW S28 T22N-R55W,41.85584,-103.70574,754,,Central Irrigation District,Owner,1.0,,,230450 Crow Road,,Gering,Nebraska,69341,3086413425,,,,No


In [7]:
# Clean Data
# we don't really need the 'NoticeExemptions' at this time
dropList = ['NoticeID', 'NoticeType', 'NoticeDate', 'EffectiveDate', 'ReasonForAdminAction', 'Notes', 'DeleteNotice']
dftemp = dftemp.drop(dropList, axis=1).drop_duplicates().reset_index(drop=True)
print(len(dftemp))
dftemp.head()

TypeError: unhashable type: 'list'

In [None]:
dftemp['BeginDate'].unique()

In [None]:
dftemp.to_csv('checkdf.zip', compression=dict(method='zip', archive_name='checkdf.csv'), index=False)  # The output, save as a zip

In [None]:
# POD sites Data
PoDAAInput = "POD AllApps_2_input.zip"
dfPoD = pd.read_csv(PoDAAInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoD:
    dfPoD['WaDEUUID'] = "nvD" + dfPoD.index.astype(str)
    dfPoD.to_csv('POD AllApps_2_input.zip', compression=dict(method='zip', archive_name='POD AllApps_2_input.csv'), index=False)
    
dfPoD['in_PODorPOUSite'] = "POD"
dfPoD['in_SiteNativeID'] = "POD" + dfPoD.index.astype(str) # creating custom site Native iD for POD

print(len(dfPoD))
dfPoD.head(1)

In [None]:
# Input File - PoU Shapefile Data
# export dataframe as zipped csv
PoUAAInput = 'shapefile/NVwr_POU.zip'
dfPoU = gpd.read_file(PoUAAInput).replace(np.nan, "")
dfPoU = dfPoU.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoU:
    dfPoU['WaDEUUID'] = "nvU" + dfPoU.index.astype(str)
    dfPoU.to_csv('NVwr_POU.zip', compression=dict(method='zip', archive_name='NVwr_POU.csv'), index=False)
    
dfPoU['in_PODorPOUSite'] = "POU"
dfPoU['in_SiteNativeID'] = "POU" + dfPoU['wadeSiteID']

print(len(dfPoU))
dfPoU.head(1)

In [None]:
# Owner Data
OwnTemp = "Permit_Owners_5temp.zip"
dfown = pd.read_csv(OwnTemp).replace(np.nan, "")

# With owner sort and merge columns by 'app' field.
dfown = dfown.groupby('app', sort=False).agg(lambda x: ', '.join([str(elem) for elem in (list(set(x)))]))
dfown = dfown.drop_duplicates().reset_index()
print(len(dfown))
dfown.head(1)

# POD Data

In [None]:
# Merge owner data with POD data 
dfPoD = pd.merge(dfPoD, dfown, left_on='app', right_on='app', how='left')
dfPoD.head()

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPoD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NVwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "NVwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NVwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = " "# auto fill in below
df['in_WaterSourceTypeCV'] = dfPoD['source']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = "Digitized"
df['in_County'] = dfPoD['county_x']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfPoD['y']
df['in_Longitude'] = dfPoD['x']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = dfPoD['in_PODorPOUSite']
df['in_SiteName'] = dfPoD['site_name']
df['in_SiteNativeID'] = dfPoD['in_SiteNativeID']
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfPoD['source']
df['in_StateCV'] = "NV"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfPoD['app_status']
df['in_AllocationNativeID'] =  dfPoD['app'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = dfPoD['owner_name']
df['in_AllocationPriorityDate'] = dfPoD['prior_dt']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPoD['duty_balance']
df['in_BeneficialUseCategory'] = dfPoD['mou']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPoD['permit_info']

outdfPoD = df.copy()
outdfPoD = outdfPoD.drop_duplicates().reset_index(drop=True)
print(len(outdfPoD))
outdfPoD.head()

# POU Data

In [None]:
# Merge owner data with POU data 
dfPoU = pd.merge(dfPoU, dfown, left_on='app', right_on='app', how='left')
dfPoU.head()

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfPoU['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NVwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "NVwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NVwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = " "# auto fill in below
df['in_WaterSourceTypeCV'] = dfPoU['source']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = "Digitized"
df['in_County'] = dfPoU['county_x']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfPoU['Latitude']
df['in_Longitude'] = dfPoU['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = dfPoU['in_PODorPOUSite']
df['in_SiteName'] = dfPoU['site_name']
df['in_SiteNativeID'] = dfPoU['in_SiteNativeID']
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfPoU['source']
df['in_StateCV'] = "NV"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfPoU['app_status']
df['in_AllocationNativeID'] =  dfPoU['app'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = dfPoU['owner_name']
df['in_AllocationPriorityDate'] = dfPoU['prior_dt']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfPoU['duty_balan']
df['in_BeneficialUseCategory'] = dfPoU['mou']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfPoU['permit_inf']

outdfPoU = df.copy()
outdfPoU = outdfPoU.drop_duplicates().reset_index(drop=True)
print(len(outdfPoU))
outdfPoU.head()

## Concatenate POD & POU

In [None]:
# Concatenate dataframes
frames = [outdfPoD, outdfPoU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

In [None]:
# For creating County
CountyDict = {
    "HU" : "Humboldt",
    "CC" : "Carson City",
    "CH" : "Churchill",
    "CL" : "Clark",
    "DO" : "Douglas",
    "EL" : "Elko",
    "ES" : "Esmerelda",
    "EU" : "Eureka",
    "LA" : "Lander",
    "LI" : "Lincoln",
    "LY": "Lyon",
    "MI": "Mineral",
    "NY": "Nye",
    "PE": "Pershing",
    "ST": "Storey",
     "": "Unknown",
    "WA": "Washoe",
    "WP": "White Pine",
    "UK": "Unknown"}
def assignCounty(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = CountyDict[String1]
        except:
            outList = ""
    return outList

outdf['in_County'] = outdf.apply(lambda row: assignCounty(row['in_County']), axis=1)
outdf['in_County'].unique() 

In [None]:
# For creating AllocationLegalStatusCV
LegalDict = {
"ABN" : "Abandoned",
"ABR" : "Abrogated",
"APP" : "Application",
"CAN" : "Canceled",
"CER" : "Certificate",
"CUR" : "Curtailed",
"DEC" : "Decreed",
"DEN": "Denied",
"EXP": "Expired",
"FOR": "Forfeited",
"PER": "Permit",
"REJ": "Rejected",
"REL": "Relinquished",
"RES": "Reserved",
"RFA": "Ready For Action",
"RFP": "Ready for Action (Protested)",
"RLP": "Relinquish a Portion",
"RSC": "Rescinded",
"RVK": "Revoked",
"RVP": "Revocable Permit",
"SUP": "Supersceded",
"SUS": "Suspended",
"VST": "Vested Rights",
"WDR": "Withdrawn"}
def assignAllocationLegalStatusCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue) == True :
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = LegalDict[String1]
        except:
            outList = ""
    return outList
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: assignAllocationLegalStatusCV(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

In [None]:
# For creating BeneficialUse
BeneficialUseDict = {
"COM" : "Commercial",
"CON" : "Construction",
"DEC" : "As Decreed",
"DOM" : "Domestic",
"DWR" : "Dewatering",
"ENV" : "Environmental",
"EVP" : "Evaporation",
"IND": "Industrial",
"IRC": "Irrigation-Carey Act",
"IRD": "Irrigation-DLE",
"IRR": "Irrigation",
"MM": "Mining and Milling",
"MMD": "Mining Milling and Dewatering",
"MUN": "Municipal",
"OTH": "Other",
"PWR": "Power",
"QM": "Quasi-Municipal",
"REC": "Recreational",
"STK": "Stockwatering",
"STO": "Storage",
"UKN": "Unknown",
"WLD": "Wildlife"}
def assignBeneficialUse(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = BeneficialUseDict[String1]
        except:
            outList = ""

    return outList
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: assignBeneficialUse(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

In [None]:
# For creating WaterSourceTypeCV
UnknownWSCVDict = {
"EFF" : "Reuse",
"GEO" : "Groundwater",
"LAK" : "Surface Water",
"OGW" : "Groundwater",
"OSW" : "Surface Water",
"RES" : "Reservoir",
"SPR" : "Surface Water",
"STO" : "Storage",
"STR" : "Surface Water",
"UG" : "Groundwater",
"UKN" : "Unknown"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownWSCVDict[String1]
        except:
            outList = ""
    return outList

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: assignWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
# For creating SiteTypeCV
UnknownSTCVDict = {
    "EFF":"Effluent",
    "GEO":"Geothermal",
    "LAK":"lake",
    "OGW":"Other Ground Water",
    "OSW":"Other Surface Water",
    "RES":"Reservoir",
    "SPR":"Spring",
    "STO":"Storage",
    "STR":"stream",
    "UG":"Underground",
    "UKN":"Unknown"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownSTCVDict[String1]
        except:
            outList = ""
    return outList

outdf['in_SiteTypeCV'] = outdf.apply(lambda row: assignSiteTypeCV(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

## Clean Data & WaDE Custom Elements

In [None]:
# Clean owner name up
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;,/\)(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
# some POD source data has a few names that contain a ',' in them, but should still be okay
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [None]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

In [None]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

In [None]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

In [None]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna("")
outdf['in_Latitude'].unique()

In [None]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna("")
outdf['in_Longitude'].unique()

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

In [None]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

In [None]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

## Drop non-Active AllocationLegalStatusCV Water Rights
- For NV, we don't want water rights that are considered: Abandoned, Abrogated, Application, Canceled, Denied, Expired, Forfeited, Ready For Action, Ready for Action (Protested), Rejected, Revoked, Supersceded, Withdrawn

In [None]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = ["Abandoned", "Abrogated", "Application", "Canceled", "Denied", "Expired", "Forfeited", "Ready For Action", "Ready for Action (Protested)", "Rejected", "Revoked", "Supersceded", "Withdrawn"]

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

## Shapefile Data
- For attaching gemetry to csv inputs.

In [None]:
# PoU Shapefile Data, Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/NVwr_POU.zip')
#dfPoUshapetemp = pd.DataFrame(dfPoUshapetemp)
dfPoUshapetemp.head(3)

In [None]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['wadeSiteID']
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

### Exporting to Finished File

In [None]:
outdf.info()

In [None]:
outdf

In [None]:
# Export the output dataframe
outdf.to_csv('Pwr_nvMain.zip', compression=dict(method='zip', archive_name='Pwr_nvMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.