# Pre-processing South Dakota Allocation data for WaDEQA upload.
Date Updated: 01/19/2021
Purpose: To pre-process the South Dakota data into one master file for simple DataFrame creation and extraction

Notes: N/A

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/SouthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

Input = "waterights_input.csv"
df = pd.read_csv(Input)
df.head(3)

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
df['PRIORDATE'] = pd.to_datetime(df['PRIORDATE'])
df['PRIORDATE'] = pd.to_datetime(df["PRIORDATE"].dt.strftime('%m/%d/%Y'))
df.head(3)

In [None]:
#Creating WaDE Owner Field.  

def retrieveOwner(FN, LN):
    FN = str(FN).strip()
    LN = str(LN).strip()
    outList = FN + ", " + LN
    return outList
df['WaDEOwner'] = df.apply(lambda row: retrieveOwner(row['FIRST_NAME'], row['LAST_NAME']), axis=1)
df.head(3)

In [None]:
#Creating Beneficial Use.
#Need to translate SD abbreviatoins to a workable format.

BenUseDict = {
"COM" : "Commercial",
"DOM" : "Domestic",
"FCP" : "Flood Control Permit",
"FWP" : "Fish And Wildlife Propagation",
"GEO" : "Geothermal",
"GWR" : "Ground Water Remediation",
"IND" : "Industrial",
"INS" : "Institutional",
"IRR" : "Irrigation",
"MUN" : "Municipal",
"REC" : "Recreation",
"RWS" : "Rural Water System",
"SHD" : "Suburban Housing Development"}

def retrieveBenUse(A, B, C, D):
    A = str(A).strip()
    B = str(B).strip()
    C = str(C).strip()
    D = str(D).strip()

    if A == "":
        outA = ""
    else:
        try:
            outA = BenUseDict[A]
        except:
            outA = "Unspecified"

    if B == "":
        outB = ""
    else:
        try:
            outB = ", " + BenUseDict[B]
        except:
            outB = ", " + "Unspecified"

    if C == "":
        outC = ""
    else:
        try:
            outC = ", " + BenUseDict[C]
        except:
            outC = ", " + "Unspecified"

    if D == "":
        outD = ""
    else:
        try:
            outD = ", " + BenUseDict[D]
        except:
            outD = ", " + "Unspecified"

    outList = outA + outB + outC + outD
    outList = outList.strip()

    return outList

df['input_Benuse'] = df.apply(lambda row: retrieveBenUse(row['USE_TYPE1'], 
                                                          row['USE_TYPE2'], 
                                                          row['USE_TYPE4'], 
                                                          row['USE_TYPE5']), axis=1)
df

In [None]:
#Creating SiteTypeDict field

SiteTypeDict = {
    "S" : "Surface Water",
    "G" : "Groundwater",
    "B" : "Both"}

def retrieveSiteType(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = SiteTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['input_SiteType'] = df.apply(lambda row: retrieveSiteType(row['SOURCE']), axis=1)
df.head(3)

In [None]:
#Creating allocation status

AlloStatusDict = {
"CA" : "Cancelled",
"DF" : "Deferred",
"DN" : "Denied",
"FU" : "Future Use",
"HD" : "Hold",
"IP" : "Incorporated",
"LC" : "License",
"OC" : "Owner Change",
"PE" : "Permit",
"WI" : "Withdrawn"}

def retrieveStatus(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = AlloStatusDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['input_Status'] = df.apply(lambda row: retrieveStatus(row['STATUS']), axis=1)
df.head(3)

## WaDE Custom Elements (due to missing sate info)

In [None]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = df['LATITUDE']
dfSiteNativeID['in_Longitude'] = df['LONGITUDE']
dfSiteNativeID['in_SiteName'] = df['DIVERSION1']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B, D):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B) &
                                (dfSiteNativeID['in_SiteName'] == D), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_SiteNativeID'] = df.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE'], row['DIVERSION1']), axis=1)
df

In [None]:
# Creating WaDE Custom water source native ID for easy watersource identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = df['BASIN']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_WaterSourceNativeID'] = df.apply(lambda row: retrieveWaterSourceNativeID( row['BASIN']), axis=1)
df

## Export Outputs

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

In [None]:
#Exporting to Finished File
df.to_csv('P_SouthDakotaMaster.csv', index=False)  # The output