# Pre-processing South Dakota Allocation data for WaDEQA upload.
Purpose: To pre-process the South Dakota data into one master file for simple DataFrame creation and extraction

Notes: N/A

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/SouthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Data: waterights

In [3]:
# Input File
fileInput = "waterights_input.csv"
df = pd.read_csv(fileInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df:
    df['WaDEUUID'] = "sdwr" + df.index.astype(str)
    df.to_csv('waterights_input.csv', index=False)

print(len(df))
df.head(1)

19238


  df = pd.read_csv(fileInput).replace(np.nan, "")


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,PERMIT_NO,LATITUDE,LONGITUDE,LAST_NAME,FIRST_NAME,ADDRESS2,CITY,STATE,ZIP,PLUS4,COUNTY_1,BASIN,HYDROUNIT1,PRIORDATE,STATUS,SOURCE,AQUIFER,MNG_UNIT,DIVERSION1,USE_TYPE1,USE_TYPE2,USE_TYPE4,USE_TYPE5,USE_TYPE6,PER_CFS,LIC_CFS,PER_ACRES,LIC_ACRES,CMPLTN_DTE,LIC_DTE,INSPT_DATE,INSPECTOR,METHODCODE,REFERENCE,ACCURACY,LINK
0,sdwr0,,,1,FC10-3,43.71384,-97.6078,MCCOOK COUNTY,,BOX 550,SALEM,SD,57058,550,MC,JR,10160010,12/8/1988 0:00,PE,S,,,WOLF CREEK,FCP,,,,,0.0,0.0,0.0,0.0,3/1/1994 0:00,,,,TRSOTH100,DIVERSION POINT,140,R:\work\wr\imaging\wrinfo\wr_div3\FC10-3.pdf


In [4]:
# first & last name funciton
def assignownerName(fName, lName):
    
    # Cleaning Text
    fName = str(fName)
    lName = str(lName)
    fName = fName.replace("*", "")
    lName = lName.replace("*", "")      
    
    # Check if first or last name are empty
    if fName == "" or pd.isnull(fName):
        outList1 = ""
    else:
        outList1 = fName.strip()
        
    if lName == "" or pd.isnull(lName):
        outList2 = ""
    else:
        outList2 = lName.strip()

    # ouput
    if outList1 == "" and outList2 == "":
        outList = ""
    elif outList1 == "":
        outList = outList2
    elif outList2 == "":
        outList = outList1
    else:
        outList = " ".join(map(str, [fName, lName]))
    return outList

df['in_AllocationOwner'] = df.apply(lambda row: assignownerName(row['FIRST_NAME'], row['LAST_NAME']), axis=1)


import re
def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).strip()
    return Val

df['in_AllocationOwner'] = df.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
df['in_AllocationOwner'].unique()

array(['MCCOOK COUNTY', 'HAROLD DYKSTRA', 'MERIDIAN MINERALS CO', ...,
       'AMBROSE HEIMER', 'RAY MASON', 'CHARLES CAPP'], dtype=object)

In [5]:
#Creating Beneficial Use.
#Need to translate SD abbreviatoins to a workable format.

BenUseDict = {
"COM" : "Commercial",
"DOM" : "Domestic",
"FCP" : "Flood Control Permit",
"FWP" : "Fish And Wildlife Propagation",
"GEO" : "Geothermal",
"GWR" : "Ground Water Remediation",
"IND" : "Industrial",
"INS" : "Institutional",
"IRR" : "Irrigation",
"MUN" : "Municipal",
"REC" : "Recreation",
"RWS" : "Rural Water System",
"SHD" : "Suburban Housing Development"}

def retrieveBenUse(A, B, C, D):
    A = str(A).strip()
    B = str(B).strip()
    C = str(C).strip()
    D = str(D).strip()

    if A == "":
        outA = ""
    else:
        try:
            outA = BenUseDict[A]
        except:
            outA = "WaDE Unspecified"

    if B == "":
        outB = ""
    else:
        try:
            outB = ", " + BenUseDict[B]
        except:
            outB = ", " + "WaDE Unspecified"

    if C == "":
        outC = ""
    else:
        try:
            outC = ", " + BenUseDict[C]
        except:
            outC = ", " + "WaDE Unspecified"

    if D == "":
        outD = ""
    else:
        try:
            outD = ", " + BenUseDict[D]
        except:
            outD = ", " + "WaDE Unspecified"

    outList = outA + outB + outC + outD
    outList = outList.strip()

    return outList

df['in_BeneficialUseCategory'] = df.apply(lambda row: retrieveBenUse(row['USE_TYPE1'], 
                                                          row['USE_TYPE2'], 
                                                          row['USE_TYPE4'], 
                                                          row['USE_TYPE5']), axis=1)
df['in_BeneficialUseCategory'].unique()

array(['Flood Control Permit', 'Irrigation', 'Commercial, Industrial',
       'Industrial', 'Municipal', 'Rural Water System',
       'Irrigation, Rural Water System, Fish And Wildlife Propagation, Domestic',
       'Commercial', 'Commercial, Domestic',
       'Fish And Wildlife Propagation, Domestic',
       'Commercial, Recreation', 'Ground Water Remediation',
       'Fish And Wildlife Propagation', 'Recreation',
       'Suburban Housing Development, Commercial', 'Irrigation, Domestic',
       'Domestic', 'Suburban Housing Development',
       'Municipal, Industrial, Rural Water System, Suburban Housing Development',
       'Recreation, Domestic', 'Domestic, Commercial',
       'Fish And Wildlife Propagation, Recreation',
       'Fish And Wildlife Propagation, Irrigation',
       'Recreation, Municipal', 'Industrial, Domestic',
       'Municipal, Industrial, Suburban Housing Development, Commercial',
       'Commercial, Irrigation', 'Geothermal', 'Institutional',
       'Domestic, Re

In [6]:
#Creating WaterSourceTypeCV field

WSTypeDict = {
    "S" : "Surface Water",
    "G" : "Groundwater",
    "B" : "Surface Water and Groundwater"}

def retrieveWSType(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "WaDE Unspecified"
    else:
        try:
            outString = WSTypeDict[colrowValue]
        except:
            outString = "WaDE Unspecified"
    return outString

df['in_WaterSourceTypeCV'] = df.apply(lambda row: retrieveWSType(row['SOURCE']), axis=1)
df['in_WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater', 'Surface Water and Groundwater',
       'WaDE Unspecified'], dtype=object)

In [7]:
#Creating allocation status

AlloStatusDict = {
"CA" : "Cancelled",
"DF" : "Deferred",
"DN" : "Denied",
"FU" : "Future Use",
"HD" : "Hold",
"IP" : "Incorporated",
"LC" : "License",
"OC" : "Owner Change",
"PE" : "Permit",
"WI" : "Withdrawn"}

def retrieveStatus(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "WaDE Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = AlloStatusDict[String1]
        except:
            outList = "WaDE Unspecified"
    return outList

df['in_AllocationLegalStatusCV'] = df.apply(lambda row: retrieveStatus(row['STATUS']), axis=1)
df['in_AllocationLegalStatusCV'].unique()

array(['Permit', 'License', 'Cancelled', 'Deferred', 'Denied',
       'Withdrawn', 'WaDE Unspecified', 'Future Use', 'Incorporated',
       'Owner Change'], dtype=object)

In [8]:
# Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(index=df.index)

# Data Assessment UUID
dfPOD['WaDEUUID'] = df['WaDEUUID']

# Water Source
dfPOD["in_WaterSourceName"] = df['DIVERSION1']
dfPOD["in_WaterSourceTypeCV"] = df['in_WaterSourceTypeCV']

# Site
dfPOD["in_CoordinateAccuracy"] = "WaDE Unspecified"
dfPOD["in_CoordinateMethodCV"] = "WaDE Unspecified"
dfPOD['in_HUC12'] = ""
dfPOD['in_HUC8'] = df['HYDROUNIT1']
dfPOD['in_County'] = "WaDE Unspecified"
dfPOD["in_Latitude"] = df['LATITUDE']
dfPOD["in_Longitude"] = df['LONGITUDE']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = "WaDE Unspecified"
dfPOD["in_SiteNativeID"] = ""  #make custom id below
dfPOD["in_SiteTypeCV"] = "WaDE Unspecified"
dfPOD["in_StateCV"] = "SD"

# Allocation
dfPOD["in_AllocationApplicationDate"] = ""
dfPOD["in_AllocationExpirationDate"] = ""
dfPOD["in_AllocationFlow_CFS"] = df['PER_CFS'].astype(float)
dfPOD["in_AllocationVolume_AF"] = df['PER_ACRES'].astype(float)
dfPOD['in_AllocationLegalStatusCV'] = df['in_AllocationLegalStatusCV']
dfPOD["in_AllocationNativeID"] = df['PERMIT_NO']
dfPOD['in_AllocationOwner'] = df['in_AllocationOwner']
dfPOD['in_AllocationPriorityDate'] = df['PRIORDATE']
dfPOD['in_AllocationTimeframeEnd'] = ""
dfPOD['in_AllocationTimeframeStart'] = ""
dfPOD['in_AllocationTypeCV'] = "WaDE Unspecified"
dfPOD["in_BeneficialUseCategory"] = df['in_BeneficialUseCategory']
dfPOD['in_CommunityWaterSupplySystem'] = ""
dfPOD['in_ExemptOfVolumeFlowPriority'] = "0"
dfPOD["in_IrrigatedAcreage"] = ""
dfPOD["in_IrrigationMethodCV"] = ""
dfPOD["in_WaterAllocationNativeURL"] = "https://danr.sd.gov/wrimage/wrinfo/wr_div3/" + df['PERMIT_NO'] + ".pdf"

dfPOD = dfPOD.drop_duplicates().reset_index(drop=True)
print(len(dfPOD))
dfPOD.head(1)

19238


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL
0,sdwr0,WOLF CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10160010,WaDE Unspecified,43.71384,-97.6078,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,0.0,0.0,Permit,FC10-3,MCCOOK COUNTY,12/8/1988 0:00,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...


## WaDE Custom Elements (due to missing sate info)

In [9]:
# Fixing empty string names

def fixEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [10]:
dfPOD['in_WaterSourceName'] = dfPOD.apply(lambda row: fixEmptyString(row['in_WaterSourceName']), axis=1)
dfPOD['in_WaterSourceName'].unique()

array(['WOLF CREEK', 'BEAVER CREEK', 'DRY DRAW', ..., 'LAKE',
       'W BRANCH BULL CREEK', 'UNNAMED DRY CREEK'], dtype=object)

In [11]:
dfPOD['in_AllocationLegalStatusCV'] = dfPOD.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
dfPOD['in_AllocationLegalStatusCV'].unique()

array(['Permit', 'License', 'Cancelled', 'Deferred', 'Denied',
       'Withdrawn', 'WaDE Unspecified', 'Future Use', 'Incorporated',
       'Owner Change'], dtype=object)

In [12]:
dfPOD['in_BeneficialUseCategory'] = dfPOD.apply(lambda row: fixEmptyString(row['in_BeneficialUseCategory']), axis=1)
dfPOD['in_BeneficialUseCategory'].unique()

array(['Flood Control Permit', 'Irrigation', 'Commercial, Industrial',
       'Industrial', 'Municipal', 'Rural Water System',
       'Irrigation, Rural Water System, Fish And Wildlife Propagation, Domestic',
       'Commercial', 'Commercial, Domestic',
       'Fish And Wildlife Propagation, Domestic',
       'Commercial, Recreation', 'Ground Water Remediation',
       'Fish And Wildlife Propagation', 'Recreation',
       'Suburban Housing Development, Commercial', 'Irrigation, Domestic',
       'Domestic', 'Suburban Housing Development',
       'Municipal, Industrial, Rural Water System, Suburban Housing Development',
       'Recreation, Domestic', 'Domestic, Commercial',
       'Fish And Wildlife Propagation, Recreation',
       'Fish And Wildlife Propagation, Irrigation',
       'Recreation, Municipal', 'Industrial, Domestic',
       'Municipal, Industrial, Suburban Housing Development, Commercial',
       'Commercial, Irrigation', 'Geothermal', 'Institutional',
       'Domestic, Re

In [13]:
# Changing datatype of date fields to fit WaDE.
dfPOD['in_AllocationPriorityDate'] = pd.to_datetime(dfPOD['in_AllocationPriorityDate'], errors = 'coerce')
dfPOD['in_AllocationPriorityDate'] = pd.to_datetime(dfPOD["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
dfPOD.head()

Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL
0,sdwr0,WOLF CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10160010,WaDE Unspecified,43.71384,-97.6078,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,0.0,0.0,Permit,FC10-3,MCCOOK COUNTY,1988-12-08,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...
1,sdwr1,BEAVER CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.31191,-96.60393,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,0.0,0.0,License,FC11-3,HAROLD DYKSTRA,1989-07-21,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...
2,sdwr10,DRY DRAW,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.63269,-96.56461,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,0.0,0.0,Cancelled,FC7-3,MERIDIAN MINERALS CO,1986-12-29,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC7...
3,sdwr100,GROUNDWATER,Groundwater,WaDE Unspecified,WaDE Unspecified,,10170202,WaDE Unspecified,44.42416,-96.90175,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,1.79,125.0,License,1017-3,NATASHA SWIER,1963-02-11,,,WaDE Unspecified,Irrigation,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/101...
4,sdwr1000,BULL CREEK TRIBUTARY,Surface Water,WaDE Unspecified,WaDE Unspecified,,10140101,WaDE Unspecified,43.26114,-99.58091,POD,WaDE Unspecified,,WaDE Unspecified,SD,,,1.89,279.9,License,1422-2,GERALD E GERGEN,1976-08-20,,,WaDE Unspecified,Irrigation,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/142...


In [14]:
# Fixing in_AllocationFlow_CFS datatype
dfPOD['in_AllocationFlow_CFS'] = pd.to_numeric(dfPOD['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
dfPOD['in_AllocationFlow_CFS'].unique()

array([0.000e+00, 1.790e+00, 1.890e+00, 1.190e+00, 9.000e-01, 5.260e+00,
       1.880e+00, 4.000e+00, 3.410e+00, 2.000e+00, 1.000e+00, 7.800e-01,
       2.220e+00, 5.500e-02, 7.000e-01, 2.200e-02, 1.780e+00, 3.500e-02,
       1.100e-01, 5.000e-01, 1.910e+00, 8.900e-02, 2.200e-01, 2.700e-01,
       6.000e-02, 3.550e-01, 4.500e-02, 2.070e+00, 2.400e-01, 7.100e-01,
       1.550e-01, 6.700e-01, 1.800e-01, 5.600e-01, 1.220e+00, 1.660e+00,
       1.000e-01, 6.600e-02, 8.900e-01, 7.000e-02, 2.120e+00, 5.900e-01,
       1.670e+00, 1.770e+00, 1.450e+00, 4.300e-01, 8.500e-02, 2.890e+00,
       1.110e+00, 2.900e+00, 9.300e-01, 3.700e-01, 3.350e+00, 1.330e-01,
       2.450e-01, 1.830e+00, 7.500e-01, 7.860e-01, 2.100e-01, 1.370e+00,
       5.400e-01, 7.100e-02, 9.700e-01, 3.340e+00, 1.560e+00, 1.310e+00,
       2.670e+00, 4.400e-01, 4.400e-02, 2.080e+00, 3.400e-01, 1.400e-01,
       1.410e+00, 3.330e-01, 4.600e-01, 1.670e-01, 3.610e+00, 1.560e-01,
       3.300e-02, 8.340e+00, 1.503e+01, 2.110e+00, 

In [15]:
# Fixing in_AllocationVolume_AF datatype
dfPOD['in_AllocationVolume_AF'] = pd.to_numeric(dfPOD['in_AllocationVolume_AF'], errors='coerce').fillna(0)
dfPOD['in_AllocationVolume_AF'].unique()

array([  0.  , 125.  , 279.9 , ..., 111.7 , 159.4 ,  89.95])

In [16]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDESD_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPOD['in_Latitude']
dfSiteNativeID['in_Longitude'] = dfPOD['in_Longitude']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOD['in_SiteNativeID'] = dfPOD.apply(lambda row: retrieveSiteNativeID( row['in_Latitude'], row['in_Longitude']), axis=1)
dfPOD.head(3)

Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL
0,sdwr0,WOLF CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10160010,WaDE Unspecified,43.71384,-97.6078,POD,WaDE Unspecified,WaDESD_S1,WaDE Unspecified,SD,,,0.0,0.0,Permit,FC10-3,MCCOOK COUNTY,1988-12-08,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...
1,sdwr1,BEAVER CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.31191,-96.60393,POD,WaDE Unspecified,WaDESD_S2,WaDE Unspecified,SD,,,0.0,0.0,License,FC11-3,HAROLD DYKSTRA,1989-07-21,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...
2,sdwr10,DRY DRAW,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.63269,-96.56461,POD,WaDE Unspecified,WaDESD_S3,WaDE Unspecified,SD,,,0.0,0.0,Cancelled,FC7-3,MERIDIAN MINERALS CO,1986-12-29,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC7...


In [17]:
# Creating WaDE Custom water source native ID for easy watersource identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDESD_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = dfPOD['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfPOD['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) &
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOD['in_WaterSourceNativeID'] = dfPOD.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
dfPOD.head(3)

Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL,in_WaterSourceNativeID
0,sdwr0,WOLF CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10160010,WaDE Unspecified,43.71384,-97.6078,POD,WaDE Unspecified,WaDESD_S1,WaDE Unspecified,SD,,,0.0,0.0,Permit,FC10-3,MCCOOK COUNTY,1988-12-08,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...,WaDESD_WS1
1,sdwr1,BEAVER CREEK,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.31191,-96.60393,POD,WaDE Unspecified,WaDESD_S2,WaDE Unspecified,SD,,,0.0,0.0,License,FC11-3,HAROLD DYKSTRA,1989-07-21,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC1...,WaDESD_WS2
2,sdwr10,DRY DRAW,Surface Water,WaDE Unspecified,WaDE Unspecified,,10170203,WaDE Unspecified,43.63269,-96.56461,POD,WaDE Unspecified,WaDESD_S3,WaDE Unspecified,SD,,,0.0,0.0,Cancelled,FC7-3,MERIDIAN MINERALS CO,1986-12-29,,,WaDE Unspecified,Flood Control Permit,,0,,,https://danr.sd.gov/wrimage/wrinfo/wr_div3/FC7...,WaDESD_WS3


## Export Outputs

In [18]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

WaDEUUID                       object
ReasonRemoved                  object
IncompleteField                object
OID_                            int64
PERMIT_NO                      object
LATITUDE                      float64
LONGITUDE                     float64
LAST_NAME                      object
FIRST_NAME                     object
ADDRESS2                       object
CITY                           object
STATE                          object
ZIP                            object
PLUS4                          object
COUNTY_1                       object
BASIN                          object
HYDROUNIT1                     object
PRIORDATE                      object
STATUS                         object
SOURCE                         object
AQUIFER                        object
MNG_UNIT                       object
DIVERSION1                     object
USE_TYPE1                      object
USE_TYPE2                      object
USE_TYPE4                      object
USE_TYPE5   

In [19]:
# Export the output dataframe
dfPOD.to_csv('Pwr_SouthDakotaMain.zip', index=False, compression="zip")  # The output, save as a zip