# Pre-processing North Dakaota Allocation data for WaDEQA upload.
Date Updated: 03/28/2023
Purpose:  To pre-process the ND data into one master file for simple DataFrame creation and extraction

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/NorthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Data: Permit

In [3]:
# Input File
fileInput = "Permits_input.csv"
df = pd.read_csv(fileInput, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df:
    df['WaDEUUID'] = "ndwr" + df.index.astype(str)
    df.to_csv('Permits_input.csv', index=False)

print(len(df))
df.head(1)

12181


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,permit_ind,permit_num,pod_index,pod,permit_hol,priority_d,use_type,status,date_issue,date_cance,beneficial,county,hu_sub_bas,aquifer,subaquifer,req_acft,req_acre,req_rate,req_storag,app_acft,app_acre,app_rate,app_storag,pod_status,source,irrigation,source_nam,mainstem,impound_lo,impound_na,return_des,discharge_,period_sta,period_end,return_qua,held_acft,held_acre,held_rate,held_stora,longitude,latitude
0,ndwr0,,,1,4407,1,13007302B,"KETTERLING, ROLAND & LORRAINE",3/4/1991,Irrigation,Denied,,,1111-11-11,McIntosh,Beaver,,,204.0,135.2,1000.0,0.0,0.0,0.0,0.0,0.0,Denied,Ground Water,Sprinkler,,0,,,,,,,0,0.0,0.0,0.0,0.0,-99.78988,46.1113


In [4]:
# swapping order owner name
import re

def createOwnerName(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        val = str(val)
        val = val.strip()
        if "," in val:
            x = val.split(",")
            outString = str(x[0]).strip() + " " + str(x[1]).strip()
        else:
            outString = val     
    return outString
df['in_AllocationOwner'] = df.apply(lambda row: createOwnerName(row['permit_hol']), axis=1)


def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).strip()
    return Val
df['in_AllocationOwner'] = df.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
df['in_AllocationOwner'].unique()

array(['KETTERLING ROLAND  LORRAINE', 'HYDE GEORGE H', 'SENERIUS FRED',
       ..., 'IGLEHART DOUG', 'BROTEN ERIC', 'NEW ROCKFORD CITY OF'],
      dtype=object)

In [5]:
# Changing format to Title to clean up text.
df['source_nam'] = df['source_nam'].str.title()
df['source'] = df['source'].str.title()
df['permit_hol'] = df['permit_hol'].str.title()

# Removing white space from certain text fields to help clean up text.
df['source_nam'] = df['source_nam'].str.strip()
df['source'] = df['source'].str.strip()
df['permit_hol'] = df['permit_hol'].str.strip()

df['county'] = df['county'].str.strip()
df['aquifer'] = df['aquifer'].str.strip()
df['pod'] = df['pod'].str.strip()
df['status'] = df['status'].str.strip()
df['use_type'] = df['use_type'].str.strip()
df['permit_num'] = df['permit_num'].str.strip()
df.head()

Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,permit_ind,permit_num,pod_index,pod,permit_hol,priority_d,use_type,status,date_issue,date_cance,beneficial,county,hu_sub_bas,aquifer,subaquifer,req_acft,req_acre,req_rate,req_storag,app_acft,app_acre,app_rate,app_storag,pod_status,source,irrigation,source_nam,mainstem,impound_lo,impound_na,return_des,discharge_,period_sta,period_end,return_qua,held_acft,held_acre,held_rate,held_stora,longitude,latitude,in_AllocationOwner
0,ndwr0,,,1,4407,1,13007302B,"Ketterling, Roland & Lorraine",3/4/1991,Irrigation,Denied,,,1111-11-11,McIntosh,Beaver,,,204.0,135.2,1000.0,0.0,0.0,0.0,0.0,0.0,Denied,Ground Water,Sprinkler,,0,,,,,,,0,0.0,0.0,0.0,0.0,-99.78988,46.1113,KETTERLING ROLAND LORRAINE
1,ndwr1,,,2,1E,2,15310236CC,"Hyde, George H.",8/15/1901,Irrigation,Cancelled,,,,McKenzie,Lake Sakakawea,,,80.0,80.0,448.8,0.0,0.0,0.0,0.0,0.0,Cancelled,Surface Water,Combination,,1,,,,,,,0,0.0,0.0,0.0,0.0,-103.75216,48.02622,HYDE GEORGE H
2,ndwr10,,,11,8E,11,14408820BB,"Senerius, Fred",12/22/1937,Irrigation,Cancelled,,,6/12/1938,Mercer,Knife,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cancelled,Surface Water,Flooding,,0,,,,,,,0,0.0,0.0,0.0,0.0,-101.86756,47.28288,SENERIUS FRED
3,ndwr100,,,72,91C,102,15608415BD,"Kuda, Alan P. And Hughes/Kuda, Kathleen A.",6/9/1915,Irrigation,Perfected,6/9/1915,,6/9/1919,Ward,Moose Mountain Creek-Souris River,,,17.0,8.5,46.0,0.0,17.0,8.5,46.1,0.0,InActive,Surface Water,Flooding,,1,,,,,,,0,0.0,0.0,0.0,0.0,-101.46082,48.33611,KUDA ALAN P and HUGHESKUDA
4,ndwr1000,,,679,902,1047,15810110AC,"Grev, Henry",4/10/1961,Irrigation,Cancelled,,2/9/1994,7/1/1967,Williams,Little Muddy,,,50.0,31.0,4488.0,0.0,0.0,0.0,0.0,0.0,Cancelled,Surface Water,Flooding,,0,,,,,,,0,0.0,0.0,0.0,0.0,-103.71934,48.52694,GREV HENRY


In [6]:
df['in_WaterAllocationNativeURL'] = "https://www.swc.nd.gov/info_edu/map_data_resources/waterpermits/single.php?id=" + df['permit_ind'].astype(str)
df.head(1)

Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,permit_ind,permit_num,pod_index,pod,permit_hol,priority_d,use_type,status,date_issue,date_cance,beneficial,county,hu_sub_bas,aquifer,subaquifer,req_acft,req_acre,req_rate,req_storag,app_acft,app_acre,app_rate,app_storag,pod_status,source,irrigation,source_nam,mainstem,impound_lo,impound_na,return_des,discharge_,period_sta,period_end,return_qua,held_acft,held_acre,held_rate,held_stora,longitude,latitude,in_AllocationOwner,in_WaterAllocationNativeURL
0,ndwr0,,,1,4407,1,13007302B,"Ketterling, Roland & Lorraine",3/4/1991,Irrigation,Denied,,,1111-11-11,McIntosh,Beaver,,,204.0,135.2,1000.0,0.0,0.0,0.0,0.0,0.0,Denied,Ground Water,Sprinkler,,0,,,,,,,0,0.0,0.0,0.0,0.0,-99.78988,46.1113,KETTERLING ROLAND LORRAINE,https://www.swc.nd.gov/info_edu/map_data_resou...


In [7]:
# Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(index=df.index)

# Data Assessment UUID
dfPOD['WaDEUUID'] = df['WaDEUUID']

# Water Source
dfPOD["in_WaterSourceName"] = df['source_nam']
dfPOD["in_WaterSourceTypeCV"] = df['source']

# Site
dfPOD["in_CoordinateAccuracy"] = "WaDE Unspecified"
dfPOD["in_CoordinateMethodCV"] = "Centroid of Area"
dfPOD['in_HUC12'] = ""
dfPOD['in_HUC8'] = ""
dfPOD['in_County'] = df['county']
dfPOD["in_Latitude"] = df['latitude']
dfPOD["in_Longitude"] = df['longitude']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = "WaDE Unspecified"
dfPOD["in_SiteNativeID"] = "POD" + df['pod'].astype(str)
dfPOD["in_SiteTypeCV"] = "WaDE Unspecified"
dfPOD["in_StateCV"] = "ND"

# Allocation
dfPOD["in_AllocationApplicationDate"] = df['date_issue']
dfPOD["in_AllocationExpirationDate"] = df['date_cance']
dfPOD["in_AllocationFlow_CFS"] = df['req_rate'].astype(float)
dfPOD["in_AllocationVolume_AF"] = df['req_acft'].astype(float)
dfPOD['in_AllocationLegalStatusCV'] = df['pod_status'].astype(str)
dfPOD["in_AllocationNativeID"] = df['permit_num'].astype(str)
dfPOD['in_AllocationOwner'] = df['in_AllocationOwner']
dfPOD['in_AllocationPriorityDate'] = df['priority_d']
dfPOD['in_AllocationTimeframeEnd'] = df['period_end']
dfPOD['in_AllocationTimeframeStart'] = df['period_sta']
dfPOD['in_AllocationTypeCV'] = ""
dfPOD["in_BeneficialUseCategory"] = df['use_type']
dfPOD['in_CommunityWaterSupplySystem'] = ""
dfPOD['in_ExemptOfVolumeFlowPriority'] = "0"
dfPOD["in_IrrigatedAcreage"] = ""
dfPOD["in_IrrigationMethodCV"] = ""
dfPOD["in_WaterAllocationNativeURL"] = df['in_WaterAllocationNativeURL']

dfPOD = dfPOD.drop_duplicates().reset_index(drop=True)
print(len(dfPOD))
dfPOD.head(1)

12181


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL
0,ndwr0,,Ground Water,WaDE Unspecified,Centroid of Area,,,McIntosh,46.1113,-99.78988,POD,WaDE Unspecified,POD13007302B,WaDE Unspecified,ND,,,1000.0,204.0,Denied,4407,KETTERLING ROLAND LORRAINE,3/4/1991,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...


## WaDE Custom Elements (due to missing sate info)

In [8]:
# Fixing empty string names

def fixEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [9]:
dfPOD['in_WaterSourceName'] = dfPOD.apply(lambda row: fixEmptyString(row['in_WaterSourceName']), axis=1)
dfPOD['in_WaterSourceName'].unique()

array(['WaDE Unspecified', 'Unnamed Stream Trib. Of Little Muddy',
       'Missouri River', 'Goschke Dam On Tongue River',
       'Goshke Dam On Busse Coulee', 'Goschke Dam On Busse Coulee',
       'Unnamed Tributary', 'Unamed Coulee Trib. Of Lower Des Lacs',
       'Souris River', 'Flat Creek Trib. Of N. Fork Grand River',
       'Cannonball River', 'Chimney Butte Creek',
       'Unnamed Coulee Trib. Of The Heart River',
       'Unnamed Coulee A Trib. Of T Heart River',
       'Unnamed Creek, Trib Of Wolf Creek', 'Lake Sakakawea',
       'Forest River', "Gibb'S Spring, Trib. Of Little Knife",
       'White Earth Creek', 'Spring Creek, Trib. Of Knife River',
       'Willow Creek', 'Little Bull Creek',
       'Unnamed Tributary Of North Branch Park',
       'Unnamed Creek, Trib.Of Stony Slough', 'Sheyene River',
       'Unnamed Waterway, Trib Of Des Lacs River', 'Goschke Dam',
       'Unnamed Creek, Trib. Of The Tongue River',
       'Bussee Coulee, Trib. Of The Tongue River',
       'W

In [10]:
dfPOD['in_WaterSourceTypeCV'] = dfPOD.apply(lambda row: fixEmptyString(row['in_WaterSourceTypeCV']), axis=1)
dfPOD['in_WaterSourceTypeCV'].unique()

array(['Ground Water', 'Surface Water', 'WaDE Unspecified'], dtype=object)

In [11]:
dfPOD['in_County'] = dfPOD.apply(lambda row: fixEmptyString(row['in_County']), axis=1)
dfPOD['in_County'].unique()

array(['McIntosh', 'McKenzie', 'Mercer', 'Ward', 'Williams', 'Oliver',
       'Renville', 'Dickey', 'Sargent', 'Dunn', 'Grand Forks', 'McHenry',
       'Logan', 'Grant', 'Pembina', 'McLean', 'Kidder', 'Bowman',
       'Stutsman', 'Adams', 'LaMoure', 'Sioux', 'Morton', 'Mountrail',
       'Griggs', 'Burleigh', 'Ransom', 'Bottineau', 'Steele', 'Billings',
       'Divide', 'Burke', 'Benson', 'Cavalier', 'Wells', 'Emmons',
       'Stark', 'Nelson', 'Cass', 'Eddy', 'Barnes', 'Towner', 'Walsh',
       'Foster', 'Richland', 'Slope', 'Golden Valley', 'Traill',
       'Hettinger', 'Rolette', 'Ramsey', 'Sheridan', 'Pierce'],
      dtype=object)

In [12]:
dfPOD['in_SiteTypeCV'] = dfPOD.apply(lambda row: fixEmptyString(row['in_SiteTypeCV']), axis=1)
dfPOD['in_SiteTypeCV'].unique()

array(['WaDE Unspecified'], dtype=object)

In [13]:
dfPOD['in_AllocationLegalStatusCV'] = dfPOD.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
dfPOD['in_AllocationLegalStatusCV'].unique()

array(['Denied', 'Cancelled', 'InActive', 'Active', 'Deferred', 'Void',
       'Held In Abeyance', 'WaDE Unspecified'], dtype=object)

In [14]:
dfPOD['in_BeneficialUseCategory'] = dfPOD.apply(lambda row: fixEmptyString(row['in_BeneficialUseCategory']), axis=1)
dfPOD['in_BeneficialUseCategory'].unique()

array(['Irrigation', 'Industrial', 'Rural Water', 'Fish and Wildlife',
       'Recreation', 'Stock', 'Municipal', 'Multiple Use',
       'Flood Control', 'Undefined', 'Power Generation', 'Domestic'],
      dtype=object)

In [15]:
# Changing datatype of date fields to fit WaDE.
dfPOD['in_AllocationApplicationDate'] = pd.to_datetime(dfPOD['in_AllocationApplicationDate'], errors = 'coerce')
dfPOD['in_AllocationApplicationDate'] = pd.to_datetime(dfPOD["in_AllocationApplicationDate"].dt.strftime('%m/%d/%Y'))

dfPOD['in_AllocationExpirationDate'] = pd.to_datetime(dfPOD['in_AllocationExpirationDate'], errors = 'coerce')
dfPOD['in_AllocationExpirationDate'] = pd.to_datetime(dfPOD["in_AllocationExpirationDate"].dt.strftime('%m/%d/%Y'))

dfPOD['in_AllocationPriorityDate'] = pd.to_datetime(dfPOD['in_AllocationPriorityDate'], errors = 'coerce')
dfPOD['in_AllocationPriorityDate'] = pd.to_datetime(dfPOD["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
dfPOD.head()

Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL
0,ndwr0,WaDE Unspecified,Ground Water,WaDE Unspecified,Centroid of Area,,,McIntosh,46.1113,-99.78988,POD,WaDE Unspecified,POD13007302B,WaDE Unspecified,ND,NaT,NaT,1000.0,204.0,Denied,4407,KETTERLING ROLAND LORRAINE,1991-03-04,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...
1,ndwr1,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,McKenzie,48.02622,-103.75216,POD,WaDE Unspecified,POD15310236CC,WaDE Unspecified,ND,NaT,NaT,448.8,80.0,Cancelled,1E,HYDE GEORGE H,1901-08-15,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...
2,ndwr10,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,Mercer,47.28288,-101.86756,POD,WaDE Unspecified,POD14408820BB,WaDE Unspecified,ND,NaT,NaT,0.0,0.0,Cancelled,8E,SENERIUS FRED,1937-12-22,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...
3,ndwr100,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,Ward,48.33611,-101.46082,POD,WaDE Unspecified,POD15608415BD,WaDE Unspecified,ND,1915-06-09,NaT,46.0,17.0,InActive,91C,KUDA ALAN P and HUGHESKUDA,1915-06-09,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...
4,ndwr1000,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,Williams,48.52694,-103.71934,POD,WaDE Unspecified,POD15810110AC,WaDE Unspecified,ND,NaT,1994-02-09,4488.0,50.0,Cancelled,902,GREV HENRY,1961-04-10,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...


In [16]:
dfPOD['in_AllocationTimeframeEnd'] = pd.to_datetime(dfPOD['in_AllocationTimeframeEnd'], errors = 'coerce')
dfPOD['in_AllocationTimeframeEnd'] = dfPOD["in_AllocationTimeframeEnd"].dt.strftime('%m/%d')
dfPOD['in_AllocationTimeframeEnd'].unique()

array([nan, '10/15', '06/15', '04/01', '05/15', '05/01'], dtype=object)

In [17]:
dfPOD['in_AllocationTimeframeStart'] = pd.to_datetime(dfPOD['in_AllocationTimeframeStart'], errors = 'coerce')
dfPOD['in_AllocationTimeframeStart'] = dfPOD["in_AllocationTimeframeStart"].dt.strftime('%m/%d')
dfPOD['in_AllocationTimeframeStart'].unique()

array([nan, '04/01'], dtype=object)

In [18]:
# Fixing in_AllocationFlow_CFS datatype
dfPOD['in_AllocationFlow_CFS'] = pd.to_numeric(dfPOD['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
dfPOD['in_AllocationFlow_CFS'].unique()

array([1.000000e+03, 4.488000e+02, 0.000000e+00, 4.600000e+01,
       4.488000e+03, 5.000000e+02, 6.500000e+03, 1.750000e+02,
       1.500000e+03, 2.000000e+03, 3.000000e+02, 8.750000e+01,
       9.000000e+02, 2.000000e+02, 1.175000e+03, 1.174000e+03,
       1.671000e+03, 1.500000e+01, 2.000000e+01, 2.445000e+03,
       8.500000e+02, 4.000000e+02, 2.730000e+02, 4.500000e+01,
       9.500000e+02, 7.000000e+02, 1.600000e+03, 4.178400e+04,
       7.500000e+01, 1.050000e+03, 3.500000e+02, 7.500000e+03,
       2.400000e+03, 2.450000e+03, 6.000000e+02, 1.250000e+02,
       8.010000e+03, 4.000000e+01, 3.750000e+02, 1.800000e+03,
       6.000000e+00, 5.000000e+01, 5.000000e+03, 1.346400e+04,
       4.250000e+02, 3.500000e+03, 8.000000e+02, 4.380000e+02,
       2.301000e+03, 2.500000e+02, 3.600000e+02, 1.850000e+03,
       2.240000e+03, 1.890000e+03, 1.211700e+03, 1.240000e+03,
       2.500000e+03, 1.550000e+03, 1.300000e+03, 8.750000e+02,
       2.550000e+03, 4.500000e+03, 7.300000e+01, 4.4800

In [19]:
# Fixing in_AllocationVolume_AF datatype
dfPOD['in_AllocationVolume_AF'] = pd.to_numeric(dfPOD['in_AllocationVolume_AF'], errors='coerce').fillna(0)
dfPOD['in_AllocationVolume_AF'].unique()

array([ 204.  ,   80.  ,    0.  , ..., 4480.  ,  391.65, 2420.  ])

In [20]:
%%time

# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEND_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = dfPOD['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfPOD['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOD['in_WaterSourceNativeID'] = dfPOD.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
dfPOD.head(3)

Wall time: 4.89 s


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_HUC12,in_HUC8,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_StateCV,in_AllocationApplicationDate,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ExemptOfVolumeFlowPriority,in_IrrigatedAcreage,in_IrrigationMethodCV,in_WaterAllocationNativeURL,in_WaterSourceNativeID
0,ndwr0,WaDE Unspecified,Ground Water,WaDE Unspecified,Centroid of Area,,,McIntosh,46.1113,-99.78988,POD,WaDE Unspecified,POD13007302B,WaDE Unspecified,ND,NaT,NaT,1000.0,204.0,Denied,4407,KETTERLING ROLAND LORRAINE,1991-03-04,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...,WaDEND_WS1
1,ndwr1,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,McKenzie,48.02622,-103.75216,POD,WaDE Unspecified,POD15310236CC,WaDE Unspecified,ND,NaT,NaT,448.8,80.0,Cancelled,1E,HYDE GEORGE H,1901-08-15,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...,WaDEND_WS2
2,ndwr10,WaDE Unspecified,Surface Water,WaDE Unspecified,Centroid of Area,,,Mercer,47.28288,-101.86756,POD,WaDE Unspecified,POD14408820BB,WaDE Unspecified,ND,NaT,NaT,0.0,0.0,Cancelled,8E,SENERIUS FRED,1937-12-22,,,,Irrigation,,0,,,https://www.swc.nd.gov/info_edu/map_data_resou...,WaDEND_WS2


# Export the Output 

In [21]:
#technique to check datatype of long dataframes.
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

WaDEUUID                        object
ReasonRemoved                   object
IncompleteField                 object
permit_ind                       int64
permit_num                      object
pod_index                        int64
pod                             object
permit_hol                      object
priority_d                      object
use_type                        object
status                          object
date_issue                      object
date_cance                      object
beneficial                      object
county                          object
hu_sub_bas                      object
aquifer                         object
subaquifer                      object
req_acft                       float64
req_acre                       float64
req_rate                       float64
req_storag                     float64
app_acft                       float64
app_acre                       float64
app_rate                       float64
app_storag               

In [22]:
# Export the output dataframe
dfPOD.to_csv('P_NorthDakotaMaster.zip', index=False, compression="zip")  # The output, save as a zip