# Preprocessing Nebraska Allocation data for WaDEQA upload.
- Date Updated: 08/26/2020
- Purpose:  To preprocess the Nebraska data into one master file for simple DataFrame creation and extraction.
- Joining API surface water data - to POD shapefile data via **RightID**.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
import requests
import json
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Nebraska/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [None]:
#Retrieve POD shapefile data
sfInput = "ActiveSWPODs08242020_input.csv"
dfpodsftemp = pd.read_csv(sfInput)
dfpodsf = dfpodsftemp[['RightID', 'HUC_12', 'Longitude', 'Latitude']] #columns of interest
dfpodsf.head()

In [None]:
%%time
# Get all surface water points from NeDNR API.
# Note: API has lots of NULL values, have to put a hard stop of what to search.
# May get duplicates, should be roughly 120,000 good records to work with (according to NE).
dfsw = pd.DataFrame()
page = 1

# for i in range(length):
while page < 20:
    url = "https://nednr.nebraska.gov/IwipApi/api/v1/WaterRights/AllSurfaceWaterPoints?page=" + str(page)
    page = 1 + page
    responseD = json.loads(requests.get(url).text)
    DLtL = responseD['Results']
    length = len(DLtL)
    for i in range(length):
        row = pd.DataFrame([DLtL[i]])
        dfsw = dfsw.append(row)

dfapisw = dfsw.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfapisw.to_csv('NESWPData.csv', index=False)  # The output.

In [None]:
#Merging dataframes into one, using left-join.

df = pd.merge(dfapisw, dfpodsf, left_on='RightID', right_on='RightID', how='inner')
print(len(df))
df.head()

In [None]:
# Changing datatype of used date fields.

df['PriorityDate'] = pd.to_datetime(df['PriorityDate'], errors = 'coerce')
df['PriorityDate'] = pd.to_datetime(df["PriorityDate"].dt.strftime('%m/%d/%Y'))
df.head(3)

In [None]:
# Making Sure datatype of HUC12 is int.

def assignHUC12(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        outList = int(colrowValue)
    return outList

df['HUC12'] = df.apply(lambda row: assignHUC12(row['HUC12']), axis=1)
df.head(3)

In [None]:
#Creating Beneficial Use from NeDWR Provdied code.  See metaData_SurfaceWaterWebSimpleSearch.pdf for details.

NebraskaBenUseCodeDict = {
"CO" : "Cooling",
"DG" : "Dredge",
"DI" : "Domestic, Irrigation and Manufacturing",
"DO" : "Domestic",
"DS" : "Domestic Storage",
"FC" : "Fish Culture",
"FL" : "Flood Control",
"FW" : "Fish and Wildlife",
"IF" : "Instream Flow",
"IG" : "Induced Ground Water Recharge",
"IN" : "Intentional Underground Storage",
"IR" : "Irrigation from Natural Stream",
"IS" : "Irrigation and Storage (an appropriation approved for both uses)",
"IU" : "Irrigation and Incidental Underground Storage",
"MF" : "Manufacturing",
"ML" : "Maintain Level of a Lake",
"MU" : "Municipal",
"OU" : "Incidental Underground Storage",
"PI" : "Power and Incidental Underground Storage",
"PR" : "Power",
"PS" : "Supplemental Power and Incidental Underground Storage",
"PW" : "Public Water Supply",
"RC" : "Groundwater Recharge",
"RD" : "Raise Dam (for increase in head for power production)",
"SC" : "Supplemental Cooling",
"SD" : "Supplemental Domestic",
"SF" : "Supplemental Fish Culture",
"SI" : "Supplemental Irrigation",
"SO" : "Storage Use-only",
"SP" : "Supplemental Power",
"SS" : "Supplemental Storage",
"ST" : "Storage",
"SU" : "Storage and Incidental Underground Storage",
"TI" : "Temporary Transfer to In-Stream Use",
"UI" : "Supplemental Irrigation and Incidental Underground Storage",
"US" : "Incidental Underground Storage",
"WS" : "Waste Storage",
"WT" : "Wetlands"}

def assignRightUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = NebraskaBenUseCodeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['BeneficialUseCategory'] = df.apply(lambda row: assignRightUse(row['RightUse']), axis=1)
df.head(3)

In [None]:
# AllocationFlow_CFS - based on reporeted Unit

def assignAllocationFlow_CFS(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "CFS":
            outList = colvA
        else:
            outList = ''
    return outList

df['AllocationFlow_CFS'] = df.apply(lambda row: assignAllocationFlow_CFS(row['ProGrant'], row['Units']), axis=1)
df.head(3)

In [None]:
# AllocationVolume_AF - based on reporeted Unit

def assignAllocationVolume_AF(colvA, colvB):
    if colvA == '' or pd.isnull(colvA):
        outList = ''
    else:
        colvB = colvB.strip()
        if colvB == "AF":
            outList = colvA
        else:
            outList = ''
    return outList

df['AllocationVolume_AF'] = df.apply(lambda row: assignAllocationVolume_AF(row['ProGrant'], row['Units']), axis=1)
df.head(3)

In [None]:
# For creating WaterSourceName

def assignWaterSourceName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        outList = colrowValue.strip()
    return outList

df['in_WaterSourceName'] = df.apply(lambda row: assignWaterSourceName(row['SourceName']), axis=1)
df['in_WaterSourceName'].unique()

In [None]:
df['WaterAllocationNativeURL'] = 'https://nednr.nebraska.gov/dynamic/WaterRights/WaterRights/SWRDetailPage?RightId=' + df['RightID'].astype(str)
df.head(3)

## WaDE Custom Elements (due to missing state site info)

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENE_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = df['in_WaterSourceName']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_WaterSourceNativeID'] = df.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceName']), axis=1)
df

In [None]:
# Exporting output files.
df.to_csv('P_NebraskaMaster.csv', index=False)  # The output.