# Pre-processing Nevada Allocation data for WaDEQA upload.
Date Updated: 08/03/2020
Purpose:  To pre-process the Nevada data into one master file for simple DataFrame creation and extraction

### Notes:
To incldue owners, made a temporary Permit_Owners_5temp.csv file by removing previous onwers from list.  Plan on left-join to POD AllApps_2.csv by 'app' field.


https://ndwr.maps.arcgis.com/home/item.html?id=0d050f7b79724404b80bf29589f67363
https://arcgis.shpo.nv.gov/arcgis/rest/services/Water_Resources_Public_Data/WaterRights_POD_POU/FeatureServer
https://arcgis.shpo.nv.gov/arcgis/rest/services/Water_Resources_Public_Data/SE_HydrographicBasins/FeatureServer
http://water.nv.gov/CodeDefinitions.aspx

In [None]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [None]:
#Working Directory
workingDir = "G:/Shared drives/WaDE Data/Nevada/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Input Data

In [None]:
# POD sites Data
PoDAAInput = "POD AllApps_2_input.csv"
dfPoD = pd.read_csv(PoDAAInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoD:
    dfPoD['WaDEUUID'] = "nvD" + dfPoD.index.astype(str)
    dfPoD.to_csv('POD AllApps_2_input.zip', compression=dict(method='zip', archive_name='POD AllApps_2_input.csv'), index=False)
    
dfPoD['in_PODorPOUSite'] = "POD"
dfPoD['in_SiteNativeID'] = "POD" + dfPoD.index.astype(str) # creating custom site Native iD for POD

print(len(dfPoD))
dfPoD.head(1)

In [None]:
# PoU Data
PoUAAInput = "PoU AllApps_3_input.csv"
dfPoU = pd.read_csv(PoUAAInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPoU:
    dfPoU['WaDEUUID'] = "nvU" + dfPoU.index.astype(str)
    dfPoU.to_csv('PoU AllApps_3_input.zip', compression=dict(method='zip', archive_name='PoU AllApps_3_input.csv'), index=False)
    
dfPoU['in_PODorPOUSite'] = "POU"
dfPoU['in_SiteNativeID'] = "POU" + dfPoU.index.astype(str) # creating custom site Native iD for POU

print(len(dfPoU))
dfPoU.head(1)

In [None]:
# concat POD and POU data
dfin = pd.concat([dfPoD, dfPoU], ignore_index=True)
dfin = dfin.drop_duplicates().reset_index().replace(np.nan, "")
print(len(dfin))
dfin.head(1)

In [None]:
# Owner Data
OwnTemp = "Permit_Owners_5temp.csv"
dfown = pd.read_csv(OwnTemp)
print(len(dfown))
dfown.head(1)

In [None]:
# Clean Owner info. Remove special characters
import re

def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).title().strip()
    return Val

dfown['owner_name'] = dfown.apply(lambda row: cleanOwnerDataFunc(row['owner_name']), axis=1)
dfown['owner_name'].unique()

In [None]:
# With owner sort and merge columns by 'app' field.
dfown = dfown.groupby('app', sort=False).agg(lambda x: ', '.join([str(elem) for elem in (list(set(x)))]))
dfown = dfown.drop_duplicates().reset_index()
print(len(dfown))
dfown.head(1)

In [None]:
#Merge data with Owner by app field
dfin = pd.merge(dfin, dfown, left_on='app', right_on='app', how='left') # Joinning PoD data
dfin.head()

# Data

In [None]:
# For creating County
CountyDict = {
    "HU" : "Humboldt",
    "CC" : "Carson City",
    "CH" : "Churchill",
    "CL" : "Clark",
    "DO" : "Douglas",
    "EL" : "Elko",
    "ES" : "Esmerelda",
    "EU" : "Eureka",
    "LA" : "Lander",
    "LI" : "Lincoln",
    "LY": "Lyon",
    "MI": "Mineral",
    "NY": "Nye",
    "PE": "Pershing",
    "ST": "Storey",
     "": "Unknown",
    "WA": "Washoe",
    "WP": "White Pine",
    "UK": "Unknown"}
def assignCounty(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = "WaDE Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = CountyDict[String1]
        except:
            outList = "WaDE Unspecified"
    return outList

dfin['County'] = dfin.apply(lambda row: assignCounty(row['county_x']), axis=1)

In [None]:
# For creating AllocationLegalStatusCV
LegalDict = {
"ABN" : "Abandoned",
"ABR" : "Abrogated",
"APP" : "Application",
"CAN" : "Canceled",
"CER" : "Certificate",
"CUR" : "Curtailed",
"DEC" : "Decreed",
"DEN": "Denied",
"EXP": "Expired",
"FOR": "Forfeited",
"PER": "Permit",
"REJ": "Rejected",
"REL": "Relinquished",
"RES": "Reserved",
"RFA": "Ready For Action",
"RFP": "Ready for Action (Protested)",
"RLP": "Relinquish a Portion",
"RSC": "Rescinded",
"RVK": "Revoked",
"RVP": "Revocable Permit",
"SUP": "Supersceded",
"SUS": "Suspended",
"VST": "Vested Rights",
"WDR": "Withdrawn"}
def assignAllocationLegalStatusCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue) == True :
        outList = "WaDE Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = LegalDict[String1]
        except:
            outList = "WaDE Unspecified"
    return outList
dfin['AllocationLegalStatusCV'] = dfin.apply(lambda row: assignAllocationLegalStatusCV(row['app_status']), axis=1)

In [None]:
# For creating BeneficialUse
BeneficialUseDict = {
"COM" : "Commercial",
"CON" : "Construction",
"DEC" : "As Decreed",
"DOM" : "Domestic",
"DWR" : "Dewatering",
"ENV" : "Environmental",
"EVP" : "Evaporation",
"IND": "Industrial",
"IRC": "Irrigation-Carey Act",
"IRD": "Irrigation-DLE",
"IRR": "Irrigation",
"MM": "Mining and Milling",
"MMD": "Mining Milling and Dewatering",
"MUN": "Municipal",
"OTH": "Other",
"PWR": "Power",
"QM": "Quasi-Municipal",
"REC": "Recreational",
"STK": "Stockwatering",
"STO": "Storage",
"UKN": "Unknown",
"WLD": "Wildlife"}
def assignBeneficialUse(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = 'WaDE Unspecified'
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = BeneficialUseDict[String1]
        except:
            outList = "WaDE Unspecified"

    return outList
dfin['BeneficialUseCategory'] = dfin.apply(lambda row: assignBeneficialUse(row['mou']), axis=1)

In [None]:
# For creating WaterSourceTypeCV
UnknownWSCVDict = {
"EFF" : "Reuse",
"GEO" : "Groundwater",
"LAK" : "Surface Water",
"OGW" : "Groundwater",
"OSW" : "Surface Water",
"RES" : "Reservoir",
"SPR" : "Surface Water",
"STO" : "Storage",
"STR" : "Surface Water",
"UG" : "Groundwater",
"UKN" : "Unknown"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "WaDE Unspecified"
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownWSCVDict[String1]
        except:
            outList = "WaDE Unspecified"
    return outList

dfin['in_WaterSourceTypeCV'] = dfin.apply(lambda row: assignWaterSourceTypeCV(row['source']), axis=1)

In [None]:
# For creating SiteTypeCV
UnknownSTCVDict = {
    "EFF":"Effluent",
    "GEO":"Geothermal",
    "LAK":"lake",
    "OGW":"Other Ground Water",
    "OSW":"Other Surface Water",
    "RES":"Reservoir",
    "SPR":"Spring",
    "STO":"Storage",
    "STR":"stream",
    "UG":"Underground",
    "UKN":"Unknown"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = "WaDE Unspecified"
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownSTCVDict[String1]
        except:
            outList = "WaDE Unspecified"
    return outList

dfin['in_SiteTypeCV'] = dfin.apply(lambda row: assignSiteTypeCV(row['source']), axis=1)
dfin.head(1)

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfin['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NVwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "NVwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NVwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = "WaDE Unspecified"
df['in_WaterSourceNativeID'] = " "# auto fill in below
df['in_WaterSourceTypeCV'] = dfin['in_WaterSourceTypeCV']

# Site Info
df['in_CoordinateAccuracy'] = "WaDE Unspecified"
df['in_CoordinateMethodCV'] = "Digitized"
df['in_County'] = dfin['County']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfin['y']
df['in_Longitude'] = dfin['x']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = dfin['in_PODorPOUSite']
df['in_SiteName'] = dfin['site_name']
df['in_SiteNativeID'] = dfin['in_SiteNativeID']
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfin['in_SiteTypeCV']
df['in_StateCV'] = "NV"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfin['AllocationLegalStatusCV']
df['in_AllocationNativeID'] =  dfin['app'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfin['owner_name']
df['in_AllocationPriorityDate'] = dfin['prior_dt']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfin['duty_balance']
df['in_BeneficialUseCategory'] = dfin['BeneficialUseCategory']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfin['permit_info']

# alreayd merged POD and POU records above for NV
outdf = df.copy()
outdf = outdf.drop_duplicates().reset_index(drop=True)
print(len(outdf))
outdf.head()

## WaDE Custom Elements (due to missing state site info)

In [None]:
# Fixing empty string names

def fixEmptyString(val):
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [None]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: fixEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
outdf['in_County'] = outdf.apply(lambda row: fixEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

In [None]:
outdf['in_SiteName'] = outdf.apply(lambda row: fixEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: fixEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

In [None]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

In [None]:
# in_Latitude & in_Longitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna(0)
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna(0)
outdf.head(1)

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

In [None]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
outdf['in_AllocationFlow_CFS'].unique()

In [None]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').fillna(0)
outdf['in_AllocationVolume_AF'].unique()

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

## Shapefile Data
- For attaching gemetry to csv inputs.

In [None]:
# PoU Shapefile Data, Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/NVwr_POU.shp')
#dfPoUshapetemp = pd.DataFrame(dfPoUshapetemp)
dfPoUshapetemp.head(3)

In [None]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp.index.astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

### Exporting to Finished File

In [None]:
outdf.info()

In [None]:
outdf

In [None]:
# Export the output dataframe
outdf.to_csv('Pwr_nvMain.zip', index=False, compression="zip")  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.