# Pre-processing South Dakota Allocation data for WaDEQA upload.
Date Updated: 01/19/2021
Purpose: To pre-process the South Dakota data into one master file for simple DataFrame creation and extraction

Notes: N/A

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/SouthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [3]:
# input file
Input = "waterights_input.csv"
df = pd.read_csv(Input)
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,12/8/1988 0:00,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,7/21/1989 0:00,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,4/1/1980 0:00,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf


In [4]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
df['PRIORDATE'] = pd.to_datetime(df['PRIORDATE'])
df['PRIORDATE'] = pd.to_datetime(df["PRIORDATE"].dt.strftime('%m/%d/%Y'))
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf


In [5]:
# first & last name funciton
def assignownerName(fName, lName):
    
    # Cleaning Text
    fName = str(fName)
    lName = str(lName)
    fName = fName.replace("*", "")
    lName = lName.replace("*", "")      
    
    # Check if first or last name are empty
    if fName == "" or pd.isnull(fName):
        outList1 = ""
    else:
        outList1 = fName.strip()
        
    if lName == "" or pd.isnull(lName):
        outList2 = ""
    else:
        outList2 = lName.strip()

    # ouput
    if outList1 == "" and outList2 == "":
        outList = ""
    elif outList1 == "":
        outList = outList2
    elif outList2 == "":
        outList = outList1
    else:
        outList = " ".join(map(str, [fName, lName]))
    return outList

df['in_AllocationOwner'] = df.apply(lambda row: assignownerName(row['FIRST_NAME'], row['LAST_NAME']), axis=1)
df

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner
0,1,FC10-3,43.71384,-97.60780,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.00,0.00,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.00,0.00,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON
3,4,FC12-3,45.08301,-97.63190,RAYMER,BURTON,RR 1 BOX 1A,BRADLEY,SD,57217,,CK,JR,10170201,1989-08-04,LC,S,,,RUNOFF,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,8/6/1992 0:00,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC12-3.pdf,BURTON RAYMER
4,5,FC13-3,43.60704,-96.63436,NORTHERN STATES POWER CO,,414 NICOLLET MALL,MINNEAPOLIS,MN,55401,1927,MA,SR,10170203,1992-04-30,CA,S,,,BIG SIOUX RIVER,FCP,,,,,0.00,0.00,0.0,0.0,4/28/1997 0:00,,7/21/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC13-3.pdf,NORTHERN STATES POWER CO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19825,19826,8410-3,42.91329,-97.55569,HARMELINK,JASON,55334 HWY 121,CROFTON,NE,68730,,YA,MI,10170101,2005-07-28,LC,G,DKOT,,GROUNDWATER,COM,,,,LCO,0.29,0.42,0.0,0.0,,2/5/2020 0:00,9/5/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8410-3.pdf,JASON HARMELINK
19826,19827,2808-2,43.99818,-103.79458,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC
19827,19828,2808-2,44.00311,-103.79004,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC
19828,19829,8412-3,42.97987,-97.41150,CONCRETE MATERIALS COMPANY,,1500 N SWEETMAN PLACE,SIOUX FALLS,SD,57107,,YA,JR,10160011,2019-10-21,LC,G,LJM,,GROUNDWATER,IND,,,,,3.80,7.80,0.0,0.0,,2/6/2020 0:00,8/29/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8412-3.pdf,CONCRETE MATERIALS COMPANY


In [6]:
#Creating Beneficial Use.
#Need to translate SD abbreviatoins to a workable format.

BenUseDict = {
"COM" : "Commercial",
"DOM" : "Domestic",
"FCP" : "Flood Control Permit",
"FWP" : "Fish And Wildlife Propagation",
"GEO" : "Geothermal",
"GWR" : "Ground Water Remediation",
"IND" : "Industrial",
"INS" : "Institutional",
"IRR" : "Irrigation",
"MUN" : "Municipal",
"REC" : "Recreation",
"RWS" : "Rural Water System",
"SHD" : "Suburban Housing Development"}

def retrieveBenUse(A, B, C, D):
    A = str(A).strip()
    B = str(B).strip()
    C = str(C).strip()
    D = str(D).strip()

    if A == "":
        outA = ""
    else:
        try:
            outA = BenUseDict[A]
        except:
            outA = "Unspecified"

    if B == "":
        outB = ""
    else:
        try:
            outB = ", " + BenUseDict[B]
        except:
            outB = ", " + "Unspecified"

    if C == "":
        outC = ""
    else:
        try:
            outC = ", " + BenUseDict[C]
        except:
            outC = ", " + "Unspecified"

    if D == "":
        outD = ""
    else:
        try:
            outD = ", " + BenUseDict[D]
        except:
            outD = ", " + "Unspecified"

    outList = outA + outB + outC + outD
    outList = outList.strip()

    return outList

df['input_Benuse'] = df.apply(lambda row: retrieveBenUse(row['USE_TYPE1'], 
                                                          row['USE_TYPE2'], 
                                                          row['USE_TYPE4'], 
                                                          row['USE_TYPE5']), axis=1)
df

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse
0,1,FC10-3,43.71384,-97.60780,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.00,0.00,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.00,0.00,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit
3,4,FC12-3,45.08301,-97.63190,RAYMER,BURTON,RR 1 BOX 1A,BRADLEY,SD,57217,,CK,JR,10170201,1989-08-04,LC,S,,,RUNOFF,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,8/6/1992 0:00,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC12-3.pdf,BURTON RAYMER,Flood Control Permit
4,5,FC13-3,43.60704,-96.63436,NORTHERN STATES POWER CO,,414 NICOLLET MALL,MINNEAPOLIS,MN,55401,1927,MA,SR,10170203,1992-04-30,CA,S,,,BIG SIOUX RIVER,FCP,,,,,0.00,0.00,0.0,0.0,4/28/1997 0:00,,7/21/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC13-3.pdf,NORTHERN STATES POWER CO,Flood Control Permit
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19825,19826,8410-3,42.91329,-97.55569,HARMELINK,JASON,55334 HWY 121,CROFTON,NE,68730,,YA,MI,10170101,2005-07-28,LC,G,DKOT,,GROUNDWATER,COM,,,,LCO,0.29,0.42,0.0,0.0,,2/5/2020 0:00,9/5/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8410-3.pdf,JASON HARMELINK,Commercial
19826,19827,2808-2,43.99818,-103.79458,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC,Commercial
19827,19828,2808-2,44.00311,-103.79004,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC,Commercial
19828,19829,8412-3,42.97987,-97.41150,CONCRETE MATERIALS COMPANY,,1500 N SWEETMAN PLACE,SIOUX FALLS,SD,57107,,YA,JR,10160011,2019-10-21,LC,G,LJM,,GROUNDWATER,IND,,,,,3.80,7.80,0.0,0.0,,2/6/2020 0:00,8/29/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8412-3.pdf,CONCRETE MATERIALS COMPANY,Industrial


In [7]:
#Creating WaterSourceTypeCV field

WSTypeDict = {
    "S" : "Surface Water",
    "G" : "Groundwater",
    "B" : "Surface and Groundwater"}

def retrieveWSType(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        try:
            outString = WSTypeDict[colrowValue]
        except:
            outString = "Unspecified"
    return outString

df['in_WaterSourceTypeCV']  = df.apply(lambda row: retrieveWSType(row['SOURCE']), axis=1)
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse,in_WaterSourceTypeCV
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit,Surface Water
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit,Surface Water
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit,Surface Water


In [8]:
#Creating allocation status

AlloStatusDict = {
"CA" : "Cancelled",
"DF" : "Deferred",
"DN" : "Denied",
"FU" : "Future Use",
"HD" : "Hold",
"IP" : "Incorporated",
"LC" : "License",
"OC" : "Owner Change",
"PE" : "Permit",
"WI" : "Withdrawn"}

def retrieveStatus(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = AlloStatusDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['input_Status'] = df.apply(lambda row: retrieveStatus(row['STATUS']), axis=1)
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse,in_WaterSourceTypeCV,input_Status
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit,Surface Water,Permit
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit,Surface Water,License
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit,Surface Water,License


In [9]:
df = df.drop_duplicates()
df

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse,in_WaterSourceTypeCV,input_Status
0,1,FC10-3,43.71384,-97.60780,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.00,0.00,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit,Surface Water,Permit
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit,Surface Water,License
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.00,0.00,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit,Surface Water,License
3,4,FC12-3,45.08301,-97.63190,RAYMER,BURTON,RR 1 BOX 1A,BRADLEY,SD,57217,,CK,JR,10170201,1989-08-04,LC,S,,,RUNOFF,FCP,,,,,0.00,0.00,0.0,0.0,9/28/1994 0:00,8/6/1992 0:00,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC12-3.pdf,BURTON RAYMER,Flood Control Permit,Surface Water,License
4,5,FC13-3,43.60704,-96.63436,NORTHERN STATES POWER CO,,414 NICOLLET MALL,MINNEAPOLIS,MN,55401,1927,MA,SR,10170203,1992-04-30,CA,S,,,BIG SIOUX RIVER,FCP,,,,,0.00,0.00,0.0,0.0,4/28/1997 0:00,,7/21/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC13-3.pdf,NORTHERN STATES POWER CO,Flood Control Permit,Surface Water,Cancelled
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19825,19826,8410-3,42.91329,-97.55569,HARMELINK,JASON,55334 HWY 121,CROFTON,NE,68730,,YA,MI,10170101,2005-07-28,LC,G,DKOT,,GROUNDWATER,COM,,,,LCO,0.29,0.42,0.0,0.0,,2/5/2020 0:00,9/5/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8410-3.pdf,JASON HARMELINK,Commercial,Groundwater,License
19826,19827,2808-2,43.99818,-103.79458,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC,Commercial,Groundwater,License
19827,19828,2808-2,44.00311,-103.79004,MT MEADOWS STORE & CAMPGROUND LLC,,PO BOX 129,HILL CITY,SD,57745,,PE,UC,10120110,1992-09-25,LC,G,CRSL,,GROUNDWATER,COM,,,,,0.06,0.11,0.0,0.0,,1/10/2020 0:00,6/17/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\2808-2.pdf,MT MEADOWS STORE & CAMPGROUND LLC,Commercial,Groundwater,License
19828,19829,8412-3,42.97987,-97.41150,CONCRETE MATERIALS COMPANY,,1500 N SWEETMAN PLACE,SIOUX FALLS,SD,57107,,YA,JR,10160011,2019-10-21,LC,G,LJM,,GROUNDWATER,IND,,,,,3.80,7.80,0.0,0.0,,2/6/2020 0:00,8/29/2019 0:00,KD,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div3\8412-3.pdf,CONCRETE MATERIALS COMPANY,Industrial,Groundwater,License


## WaDE Custom Elements (due to missing sate info)

In [10]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDESD_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = df['LATITUDE']
dfSiteNativeID['in_Longitude'] = df['LONGITUDE']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_SiteNativeID'] = df.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE']), axis=1)
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse,in_WaterSourceTypeCV,input_Status,in_SiteNativeID
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit,Surface Water,Permit,WaDESD_S1
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit,Surface Water,License,WaDESD_S2
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit,Surface Water,License,WaDESD_S3


In [11]:
# Creating WaDE Custom water source native ID for easy watersource identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDESD_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = df['DIVERSION1']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = df['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) &
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_WaterSourceNativeID'] = df.apply(lambda row: retrieveWaterSourceNativeID( row['DIVERSION1'], row['in_WaterSourceTypeCV']), axis=1)
df.head(3)

Unnamed: 0,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK,in_AllocationOwner,input_Benuse,in_WaterSourceTypeCV,input_Status,in_SiteNativeID,in_WaterSourceNativeID
0,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550.0,MC,JR,10160010,1988-12-08,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf,MCCOOK COUNTY,Flood Control Permit,Surface Water,Permit,WaDESD_S1,WaDESD_WS1
1,2,FC11-3,43.31191,-96.60393,DYKSTRA,HAROLD,102 W 5TH # 201,CANTON,SD,57013,,LN,SR,10170203,1989-07-21,LC,S,,,BEAVER CREEK,FCP,,,,,0.0,0.0,0.0,0.0,9/28/1994 0:00,10/8/1997 0:00,7/22/1997 0:00,LB,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC11-3.pdf,HAROLD DYKSTRA,Flood Control Permit,Surface Water,License,WaDESD_S2,WaDESD_WS2
2,3,FC1-2,44.03808,-101.67197,JOHNSON,M C,607 W BRIDGE,PHILIP,SD,57567,,HK,BD,10140102,1980-04-01,LC,S,,,BAD RIVER,FCP,,,,,0.0,0.0,0.0,0.0,,8/20/2019 0:00,6/9/2019 0:00,SQ,GPS,DIVERSION POINT,10,R:\work\wr\imaging\wrinfo\wr_div2\FC1-2.pdf,M C JOHNSON,Flood Control Permit,Surface Water,License,WaDESD_S3,WaDESD_WS3


## Export Outputs

In [12]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

OID_                               int64
PERMIT_NO                         object
LATITUDE                         float64
LONGITUDE                        float64
LAST_NAME                         object
FIRST_NAME                        object
ADDRESS2                          object
CITY                              object
STATE                             object
ZIP                               object
PLUS4                             object
COUNTY_1                          object
BASIN                             object
HYDROUNIT1                        object
PRIORDATE                 datetime64[ns]
STATUS                            object
SOURCE                            object
AQUIFER                           object
MNG_UNIT                          object
DIVERSION1                        object
USE_TYPE1                         object
USE_TYPE2                         object
USE_TYPE4                         object
USE_TYPE5                         object
USE_TYPE6       

In [13]:
#Exporting to Finished File
df.to_csv('P_SouthDakotaMaster.csv', index=False)  # The output