# Pre-processing Montana Water Right data for WaDEQA upload.

Date Updated: 08/05/2022

Purpose:  To pre-process the Montana data into one master file for simple DataFrame creation and extraction.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Montana/WaterAllocation/RawInputData"
os.chdir(workingDir)

## POD Water Budget Data

In [3]:
# Input File
fileInput = "WaDE_PODs_input.zip"
dfinPOD = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "mtD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('WaDE_PODs_input.zip', compression=dict(method='zip', archive_name='WaDE_PODs_input.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head()

  dfinPOD = pd.read_csv(fileInput)


631581


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long
0,mtD0,,,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1/11/1966 0:00:00,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.65274,-111.69757
1,mtD1,,,1,2,41QJ 2 00,STATEMENT OF CLAIM,ACTIVE,PETER M MELOY; JANICE S VAN RIPER,IRRIGATION,1,SURFACE,MISSOURI RIVER,Prewett Creek-Missouri River,100301020601,PUMP,12/31/1935 0:00:00,,85.0,0.18,0.0,5.2,04/01 to 10/15,04/01 to 10/15,04/01,10/15,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,POST DECREE,4-2,0,NESWSW,16N2W 10,CASCADE,0.0,0.0,N,421952.6298,325261.0558,16N2W,10,694927,242562,6/21/2017 0:00:00,41QJ,2,0,4,2,767521,3/5/2021 0:00:00,ADJ,47.15313,-111.8495
2,mtD10,,,10,11,42J 7 00,POWDER RIVER DECLARATION,ACTIVE,BILL R LAMBERT; KARA L LAMBERT,DOMESTIC; IRRIGATION,1,GROUNDWATER,GROUNDWATER,Coyote Creek-Powder River,100902070306,WELL,10/31/1946 0:00:00,,10.0,0.02,2.0,0.5,01/01 to 12/31; 01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,17-1,0,,4S51E 34,POWDER RIVER,0.0,0.0,N,919687.6526,140877.9697,4S51E,34,669757,270495,7/1/1973 0:00:00,42J,7,0,17,1,124715,3/5/2021 0:00:00,PR,45.44247,-105.41108
3,mtD100,,,100,101,41M 45 00,PROVISIONAL PERMIT,ACTIVE,HOLDEN HEREFORDS,IRRIGATION; STOCK,1,SURFACE,UNNAMED TRIBUTARY OF DUPUYER CREEK,Lower Dupuyer Creek,100302010504,DITCH,7/30/1973 0:00:00,,1795.2,4.0,300.0,100.0,05/01 to 10/15; 01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,135-1,0,SENW,29N6W 21,PONDERA,0.0,0.0,N,385232.5386,449571.8782,29N6W,21,1670704,279334,6/20/1974 0:00:00,41M,45,0,135,1,384496,3/5/2021 0:00:00,,48.26009,-112.39425
4,mtD1000,,,1000,1001,40J 590 00,STATEMENT OF CLAIM,ACTIVE,LOIS B SIMPSON; TEDDY L SIMPSON,STOCK,1,SURFACE,CLEAR CREEK,Lower Clear Creek,100500040303,LIVESTOCK DIRECT FROM SOURCE,12/29/1896 0:00:00,,0.0,0.0,0.0,0.0,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,POST DECREE,1643-2,0,NESENE,32N18E 9,BLAINE,0.0,0.0,N,608103.8363,477801.3624,32N18E,9,1581448,177151,6/1/2015 0:00:00,40J,590,0,1643,2,710117,3/5/2021 0:00:00,ADJ,48.54968,-109.39022


In [4]:
# Clean Owner info.  Remove special characters
# need to separate out by ; into list
# remove special characters
# convert list to string, separate by ,

def cleanOwnerDataFunc(Val):
    Val = Val.strip()
    ValList = Val.split(';')
    for i, s in enumerate(ValList):
        ValList[i] = re.sub("[$@&.,;/\)(-]", "", s).title().strip()
    outString = ','.join(ValList)
    return outString

dfinPOD['ALL_OWNERS'] = dfinPOD.apply(lambda row: cleanOwnerDataFunc(row['ALL_OWNERS']), axis=1)
dfinPOD['ALL_OWNERS'].unique()

array(['Jerome F Croteau,Katherine P Croteau',
       'Peter M Meloy,Janice S Van Riper',
       'Bill R Lambert,Kara L Lambert', ...,
       'Karen M Sparacino,Vincent J Sparacino', 'Dan Hammond',
       'Irma P Dwyer,Kelly T Dwyer'], dtype=object)

In [5]:
def createNativeLandingURLMTFunct(xVal):
    # convert to list
    xList = xVal.split(' ')
    
    # add '20' value to 2nd & 3rd positoin
    try:
        xList[1] = "20" + xList[1]
        xList[2] = "20" + xList[2]
        outVal = ' '.join(xList).replace(' ', '%')
    except:
        outVal = ' '.join(xList).replace(' ', '%')
    
    # concatenate with url
    outstring = "http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=" + outVal + "&status=ACTV!SEVR"
    
    return outstring

dfinPOD['in_WaterAllocationNativeURL'] = dfinPOD.apply(lambda row: createNativeLandingURLMTFunct(row['WR_NUMBER']), axis=1)
dfinPOD['in_WaterAllocationNativeURL'].unique()

array(['http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=41I%201%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=41QJ%202%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=42J%207%2000&status=ACTV!SEVR',
       ...,
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=41Q%2094356%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=41Q%2094358%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=76N%2094358%2000&status=ACTV!SEVR'],
      dtype=object)

In [6]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "MTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "MTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "MTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['SOURCE_NAM'].str.title()
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOD['SOURCE_TYP'].str.title()

# Site Info
df['in_CoordinateAccuracy'] = "WaDE Unspecified"
df['in_CoordinateMethodCV'] = "WaDE Unspecified"
df['in_County'] = dfinPOD['LLDS_COUNT'].str.title()
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfinPOD['HUC_12']
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Lat']
df['in_Longitude'] = dfinPOD['Long']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = dfinPOD['DITCH_NAME'].str.title()
df['in_SiteNativeID'] = "POD" + dfinPOD['PODV_ID_SE'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['MEANS_OF_D'].str.title()
df['in_StateCV'] = "MT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['FLW_RT_CFS'].astype(float)
df['in_AllocationLegalStatusCV'] = dfinPOD['WR_STATUS'].str.title()
df['in_AllocationNativeID'] =  dfinPOD['WR_NUMBER'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOD['ALL_OWNERS']
df['in_AllocationPriorityDate'] = dfinPOD['ENF_PRIORI']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOD['PER_DIV_EN']
df['in_AllocationTimeframeStart'] = dfinPOD['PER_DIV_BG']
#df['in_AllocationTypeCV'] = dfinPOD['WR_TYPE'] # skip for now
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['VOLUME']
df['in_BeneficialUseCategory'] = dfinPOD['PURPOSES'].str.replace("; ", ",").str.replace(", ", ",").str.title()
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfinPOD['MAX_ACRES']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['in_WaterAllocationNativeURL']

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True)
print(len(outPOD))
outPOD.head()

631581


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,mtD0,MTwr_M1,MTwr_V1,MTwr_O1,,,,Groundwater,,Groundwater,WaDE Unspecified,WaDE Unspecified,Lewis And Clark,4326,,100301011205,,46.65274,-111.69757,,,POD,,POD124705,,Well,MT,,,,,,,,,,0.02,Active,41I 1 00,"Jerome F Croteau,Katherine P Croteau",1/11/1966 0:00:00,,12/31,01/01,,0.5,Multiple Domestic,,,,,,0,,0.51,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
1,mtD1,MTwr_M1,MTwr_V1,MTwr_O1,,,,Missouri River,,Surface,WaDE Unspecified,WaDE Unspecified,Cascade,4326,,100301020601,,47.15313,-111.8495,,,POD,,POD767521,,Pump,MT,,,,,,,,,,0.18,Active,41QJ 2 00,"Peter M Meloy,Janice S Van Riper",12/31/1935 0:00:00,,10/15,04/01,,0.0,Irrigation,,,,,,0,,5.2,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
2,mtD10,MTwr_M1,MTwr_V1,MTwr_O1,,,,Groundwater,,Groundwater,WaDE Unspecified,WaDE Unspecified,Powder River,4326,,100902070306,,45.44247,-105.41108,,,POD,,POD124715,,Well,MT,,,,,,,,,,0.02,Active,42J 7 00,"Bill R Lambert,Kara L Lambert",10/31/1946 0:00:00,,12/31,01/01,,2.0,"Domestic,Irrigation",,,,,,0,,0.5,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
3,mtD100,MTwr_M1,MTwr_V1,MTwr_O1,,,,Unnamed Tributary Of Dupuyer Creek,,Surface,WaDE Unspecified,WaDE Unspecified,Pondera,4326,,100302010504,,48.26009,-112.39425,,,POD,,POD384496,,Ditch,MT,,,,,,,,,,4.0,Active,41M 45 00,Holden Herefords,7/30/1973 0:00:00,,12/31,01/01,,300.0,"Irrigation,Stock",,,,,,0,,100.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
4,mtD1000,MTwr_M1,MTwr_V1,MTwr_O1,,,,Clear Creek,,Surface,WaDE Unspecified,WaDE Unspecified,Blaine,4326,,100500040303,,48.54968,-109.39022,,,POD,,POD710117,,Livestock Direct From Source,MT,,,,,,,,,,0.0,Active,40J 590 00,"Lois B Simpson,Teddy L Simpson",12/29/1896 0:00:00,,12/31,01/01,,0.0,Stock,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...


## PoU Water Budget Data

In [7]:
# Input File
fileInput = "WaDE_PoUs_input.zip"
dfinPOU = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "mtU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('WaDE_PoUs_input.zip', compression=dict(method='zip', archive_name='WaDE_PoUs_input.csv'), index=False)

print(len(dfinPOU))
dfinPOU.head()

14571


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue
0,mtU0,"Unused Site Record,Incomplete or bad entry for...","Statement Of Claim,STATEMENT OF CLAIM",0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2592224.897,6440.12851,-115.08591,47.9525
1,mtU1,"Unused Site Record,Incomplete or bad entry for...","Statement Of Claim,STATEMENT OF CLAIM",1,2155835,25N27W3,,0,,,,,25N27W,3,547296,148034,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,4,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2595401.386,6443.8213,-115.06442,47.95246
2,mtU10,"Unused Site Record,Incomplete or bad entry for...","Statement Of Claim,STATEMENT OF CLAIM",10,1886433,25N27W17,,0,,,,,25N27W,17,547323,148061,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,18,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2587654.217,6434.43209,-115.1074,47.92358
3,mtU100,"Unused Site Record,Incomplete or bad entry for...","Statement Of Claim,STATEMENT OF CLAIM",100,153150,22N26W5,,0,,,,,22N26W,5,547612,148350,76N116399 00,;204782-1;,76N,76N 116399 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,LITTLE THOMPSON RIVER,50.0,0.11,1.0,0.0,FIRE PROTECTION,28,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204782-1,10/30/2019 0:00,N,2580734.955,6425.72717,-114.95731,47.69436
4,mtU1000,"Unused Site Record,Incomplete or bad entry for...","Statement Of Claim,STATEMENT OF CLAIM",1000,1549316,11N33E5,,0,,,,,11N33E,5,556431,157169,40C30106416,;443332-1;,40C,40C 30106416,STATEMENT OF CLAIM,ACTIVE,1/22/1927 0:00,"MONTANA, STATE OF BOARD OF LAND COMMISSIONERS",ORIG,SURFACE,ANTELOPE CREEK,0.0,0.0,0.0,0.0,STOCK,1,0.0,ROSEBUD,MT,,01/01 to 12/31,1-Jan,31-Dec,232.0,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,443332-1,10/30/2019 0:00,N,2587837.905,6434.14591,-107.59879,46.73537


In [8]:
def createNativeLandingURLMTFunct(xVal):
    # convert to list
    xList = xVal.split(' ')
    
    # add '20' value to 2nd & 3rd positoin
    try:
        xList[1] = "20" + xList[1]
        xList[2] = "20" + xList[2]
        outVal = ' '.join(xList).replace(' ', '%')
    except:
        outVal = ' '.join(xList).replace(' ', '%')
    
    # concatenate with url
    outstring = "http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=" + outVal + "&status=ACTV!SEVR"
    
    return outstring

dfinPOU['in_WaterAllocationNativeURL'] = dfinPOU.apply(lambda row: createNativeLandingURLMTFunct(row['WRNUMBER']), axis=1)
dfinPOU['in_WaterAllocationNativeURL'].unique()

array(['http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=76N%20116400%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=76N%20116399%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=40C%2030106416&status=ACTV!SEVR',
       ...,
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=40B%20186214%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=40B%20186230%2000&status=ACTV!SEVR',
       'http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=simple&index=8&wrnumber=40B%20186257%2000&status=ACTV!SEVR'],
      dtype=object)

In [9]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "MTwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "MTwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "MTwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOU['SOURC_NAME'].str.title()
df['in_WaterSourceNativeID'] = "" #auto fill in below
df['in_WaterSourceTypeCV'] = dfinPOU['SRCTYPE']

# Site Info
df['in_CoordinateAccuracy'] = "WaDE Unspecified"
df['in_CoordinateMethodCV'] = "Centroid of Area"
df['in_County'] = dfinPOU['COUNTY']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['Latitdue']
df['in_Longitude'] = dfinPOU['Longitdue']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = "WaDE Unspecified"
df['in_SiteNativeID'] = "POU" + dfinPOU['OBJECTID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] =  "WaDE Unspecified"
df['in_StateCV'] = "MT"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOU['FLW_RT_CFS'].astype(float)
df['in_AllocationLegalStatusCV'] = dfinPOU['STATUS'].str.title()
df['in_AllocationNativeID'] =  dfinPOU['WRNUMBER'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOU['ALL_OWNERS'].str.title()
df['in_AllocationPriorityDate'] = dfinPOU['ENF_PRIORI']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOU['PER_USE_EN']
df['in_AllocationTimeframeStart'] = dfinPOU['PER_USE_BG']
#df['in_AllocationTypeCV'] = dfinPOU['WRTYPE'].str.title() # skip for now
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOU['VOLUME']
df['in_BeneficialUseCategory'] = dfinPOU['PURPOSE'].str.replace("; ", ",").str.replace(", ", ",").str.title()
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfinPOU['MAX_ACRES']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOU['in_WaterAllocationNativeURL']

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True)
print(len(outPOU))
outPOU.head()

14571


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,mtU0,MTwr_M1,MTwr_V1,MTwr_O1,,,,Indian Creek,,SURFACE,WaDE Unspecified,Centroid of Area,SANDERS,4326,,,,47.9525,-115.08591,,,POU,WaDE Unspecified,POU2301927,,WaDE Unspecified,MT,,,,,,,,,,0.11,Active,76N 116400 00,Weyerhaeuser Company,12/31/1955 0:00,,31-Oct,1-May,,1.0,Fire Protection,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
1,mtU1,MTwr_M1,MTwr_V1,MTwr_O1,,,,Indian Creek,,SURFACE,WaDE Unspecified,Centroid of Area,SANDERS,4326,,,,47.95246,-115.06442,,,POU,WaDE Unspecified,POU2155835,,WaDE Unspecified,MT,,,,,,,,,,0.11,Active,76N 116400 00,Weyerhaeuser Company,12/31/1955 0:00,,31-Oct,1-May,,1.0,Fire Protection,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
2,mtU10,MTwr_M1,MTwr_V1,MTwr_O1,,,,Indian Creek,,SURFACE,WaDE Unspecified,Centroid of Area,SANDERS,4326,,,,47.92358,-115.1074,,,POU,WaDE Unspecified,POU1886433,,WaDE Unspecified,MT,,,,,,,,,,0.11,Active,76N 116400 00,Weyerhaeuser Company,12/31/1955 0:00,,31-Oct,1-May,,1.0,Fire Protection,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
3,mtU100,MTwr_M1,MTwr_V1,MTwr_O1,,,,Little Thompson River,,SURFACE,WaDE Unspecified,Centroid of Area,SANDERS,4326,,,,47.69436,-114.95731,,,POU,WaDE Unspecified,POU153150,,WaDE Unspecified,MT,,,,,,,,,,0.11,Active,76N 116399 00,Weyerhaeuser Company,12/31/1955 0:00,,31-Oct,1-May,,1.0,Fire Protection,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
4,mtU1000,MTwr_M1,MTwr_V1,MTwr_O1,,,,Antelope Creek,,SURFACE,WaDE Unspecified,Centroid of Area,ROSEBUD,4326,,,,46.73537,-107.59879,,,POU,WaDE Unspecified,POU1549316,,WaDE Unspecified,MT,,,,,,,,,,0.0,Active,40C 30106416,"Montana, State Of Board Of Land Commissioners",1/22/1927 0:00,,31-Dec,1-Jan,,0.0,Stock,,,,,,0,,0.0,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...


## Concatenate POD and POU
## Fix Elements

In [10]:
# Merge dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True)
print(len(outdf))

646152


In [11]:
# Fixing empty string names

def fixEmptyString(val):
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [12]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: fixEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Groundwater', 'Missouri River',
       'Unnamed Tributary Of Dupuyer Creek', ...,
       'Unnamed Tributary Of Unnamed Tributary Of West Fork Basin Creek',
       'Unnamed Tributary Of Unnamed Tributary Of South Fork Big Timber Creek',
       'Unnamed Tributary Of Little Thompson Creek'], dtype=object)

In [13]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: fixEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Well', 'Pump', 'Ditch', 'Livestock Direct From Source',
       'Headgate', 'Dam', 'Flowing', 'Spring Box', 'Infiltration Gallery',
       'Developed Spring', 'Instream', 'Pipeline', 'Diversion Dam',
       'Dike', 'Drain Ditch', 'Multiple', 'Electric Pump', 'Pit/Dam',
       'Pit', 'Bucket', 'Undeveloped Spring', 'Fueled Pump', 'Other',
       'Subirrigation',
       'Pump/Headgate W/Ditch Or Pipeline/Flood And Dike',
       'Pump/Headgate W/Ditch Or Pipeline', 'Other Diversion',
       'Direct From Source', 'Natural Overflow',
       'Headgate W/Ditch Or Pipeline/Flood And Dike',
       'Gravity Flow/Direct', 'Ditch/Gravity Flow', 'Inlake', 'Sump',
       'Pump/Flood And Dike', 'Unknown', 'Dam/Pit', 'Pump/Gravity Flow',
       'Hand Pump', 'Wildlife Direct From Source', 'Redundant Well',
       'Natural Carrier', 'Windmill', 'Inwetland', 'Any Means',
       'WaDE Unspecified'], dtype=object)

In [14]:
outdf['in_AllocationTypeCV'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationTypeCV']), axis=1)
outdf['in_AllocationTypeCV'].unique()

array(['WaDE Unspecified'], dtype=object)

In [15]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Active'], dtype=object)

In [16]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Jerome F Croteau,Katherine P Croteau',
       'Peter M Meloy,Janice S Van Riper',
       'Bill R Lambert,Kara L Lambert', ...,
       'Kori L Mccormick, Thomas E Mccormick',
       'Helland Farms Inc, Helland, Lawrence & Sons Inc',
       'Christina P Stieber, Henry Stieber, Jr'], dtype=object)

In [17]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Multiple Domestic', 'Irrigation', 'Domestic,Irrigation',
       'Irrigation,Stock', 'Stock', 'Power Generation',
       'Domestic,Lawn And Garden,Stock,Wildlife/Waterfowl,Irrigation',
       'Lawn And Garden', 'Domestic', 'Storage', 'Domestic,Stock',
       'Domestic,Lawn And Garden', 'Domestic,Lawn And Garden,Stock',
       'Commercial', 'Lawn And Garden,Multiple Domestic', 'Fishery',
       'Instream Fishery', 'Unknown,Unknown', 'Lawn And Garden,Domestic',
       'Fish And Wildlife,Stock', 'Industrial', 'Multiple Domestic,Stock',
       'Municipal,Industrial', 'Commercial,Domestic', 'Fire Protection',
       'Municipal', 'Domestic,Other Purpose', 'Stock,Domestic',
       'Domestic,Irrigation,Lawn And Garden,Stock',
       'Domestic,Irrigation,Lawn And Garden', 'Other Purpose,Stock',
       'Lawn And Garden,Multiple Domestic,Stock,Irrigation',
       'Agricultural Spraying,Domestic,Lawn And Garden,Stock',
       'Domestic,Lawn And Garden,Stock,Irrigation', 'Fish And Wildlife',

In [18]:
# in_Latitude & in_Longitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna(0)
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna(0)
outdf.head(1)

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,mtD0,MTwr_M1,MTwr_V1,MTwr_O1,,,,Groundwater,,Groundwater,WaDE Unspecified,WaDE Unspecified,Lewis And Clark,4326,,100301011205,,46.65274,-111.69757,,,POD,,POD124705,,Well,MT,,,,,,,,,,0.02,Active,41I 1 00,"Jerome F Croteau,Katherine P Croteau",1/11/1966 0:00:00,,12/31,01/01,WaDE Unspecified,0.5,Multiple Domestic,,,,,,0,,0.51,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...


In [19]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['1966-01-11T00:00:00.000000000', '1935-12-31T00:00:00.000000000',
       '1946-10-31T00:00:00.000000000', ...,
       '1879-01-24T00:00:00.000000000', '1882-04-14T00:00:00.000000000',
       '1955-05-28T00:00:00.000000000'], dtype='datetime64[ns]')

In [20]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
outdf['in_AllocationFlow_CFS'].unique()

array([2.000e-02, 1.800e-01, 4.000e+00, ..., 8.607e+01, 5.300e+02,
       1.327e+01])

In [21]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').fillna(0)
outdf['in_AllocationVolume_AF'].unique()

array([5.0000e-01, 0.0000e+00, 2.0000e+00, ..., 4.6830e+02, 2.7636e+02,
       2.0250e+03])

In [22]:
# Fixing in_IrrigatedAcreage datatype
outdf['in_IrrigatedAcreage'] = pd.to_numeric(outdf['in_IrrigatedAcreage'], errors='coerce').fillna(0)
outdf['in_IrrigatedAcreage'].unique()

array([5.10000e-01, 5.20000e+00, 5.00000e-01, ..., 1.72926e+03,
       1.68700e+02, 1.41200e+03])

In [23]:
# fix WaterSourceTypeCV
# use WaDE specific terms only

waterSourceTypeDict = {
"SURFACE" : "Surface Water",
"GROUNDWATER" : "Groundwater",
"ALL NATURALLY OCCURING WATER" : "Surface Water"
}

def fixWaterSourceTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        String1 = val.strip()
        try:
            outString = waterSourceTypeDict[String1]
        except:
            outString = "WaDE Unspecified"
    return outString

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: fixWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['WaDE Unspecified', 'Surface Water', 'Groundwater'], dtype=object)

In [24]:
# Creating TimeframeStart & TimeframeEnd
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframe(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

outdf['in_AllocationTimeframeEnd'] = outdf.apply(lambda row: createTimeframe(row['in_AllocationTimeframeEnd']), axis=1)
outdf['in_AllocationTimeframeStart'] = outdf.apply(lambda row: createTimeframe(row['in_AllocationTimeframeStart']), axis=1)

outdf['in_AllocationTimeframeEnd'].unique()
outdf['in_AllocationTimeframeStart'].unique()

array(['', '04/1', '01/1', '05/1', '05/15', '03/15', '07/1', '05/25',
       '04/15', '06/1', '03/1', '11/1', '05/20', '02/1', '02/15', '05/5',
       '10/15', '07/5', '10/1', '10/31', '03/31', '12/1', '09/1', '04/31',
       '01/31', '05/10', '04/25', '05/28', '05/30', '08/15', '08/1',
       '05/16', '03/21', '04/10', '04/28', '07/16', '07/15', '06/15',
       '07/20', '06/28', '09/15', '04/16', '06/16', '03/28', '12/31',
       '01/28', '06/25', '04/20', '03/20', '09/30', '02/10', '09/28',
       '12/15', '08/28', '11/10'], dtype=object)

In [25]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2', 'wadeID3', ..., 'wadeID19847', 'wadeID19848',
       'wadeID19849'], dtype=object)

## Shapefile Data
- For attaching gemetry to POU csv inputs.

In [26]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('PoUShp/MT_PoU2.shp')
dfPoUshapetemp.head(3)

Unnamed: 0,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue,geometry
0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,05/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,2019-10-30,N,2592224.89731,6440.12851,-115.08591,47.9525,"POLYGON ((-115.09665 47.94526, -115.09667 47.9..."
1,2155835,25N27W3,,0,,,,,25N27W,3,547296,148034,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,4,0.0,SANDERS,MT,,05/01 to 10/31,05/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,2019-10-30,N,2595401.38596,6443.8213,-115.06442,47.95246,"POLYGON ((-115.07516 47.94522, -115.07517 47.9..."
2,2942909,25N26W6,,0,,,,,25N26W,6,547299,148037,76N116396 00,;204777-1;,76N,76N 116396 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,"MONTANA, STATE OF DEPT OF FISH WILDLIFE & PARK...",ORIG,SURFACE,THOMPSON RIVER,50.0,0.11,2.0,0.0,FIRE PROTECTION,53,0.0,SANDERS,MT,,04/01 to 10/31,04/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204777-1,2019-10-30,N,2548555.43327,6384.60915,-114.99998,47.95244,"POLYGON ((-115.01055 47.94518, -115.01058 47.9..."


In [27]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['OBJECTID'].replace("", 0).fillna(0).astype(int).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POU2301927,"POLYGON ((-115.09665 47.94526, -115.09667 47.9..."
1,POU2155835,"POLYGON ((-115.07516 47.94522, -115.07517 47.9..."
2,POU2942909,"POLYGON ((-115.01055 47.94518, -115.01058 47.9..."


## Export Data

In [28]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 646152 entries, 0 to 646151
Data columns (total 63 columns):
 #   Column                                        Non-Null Count   Dtype         
---  ------                                        --------------   -----         
 0   WaDEUUID                                      646152 non-null  object        
 1   in_MethodUUID                                 646152 non-null  object        
 2   in_VariableSpecificUUID                       646152 non-null  object        
 3   in_OrganizationUUID                           646152 non-null  object        
 4   in_Geometry                                   646152 non-null  object        
 5   in_GNISFeatureNameCV                          646152 non-null  object        
 6   in_WaterQualityIndicatorCV                    646152 non-null  object        
 7   in_WaterSourceName                            646152 non-null  object        
 8   in_WaterSourceNativeID                        646152 n

In [29]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,mtD0,MTwr_M1,MTwr_V1,MTwr_O1,,,,Groundwater,wadeID1,WaDE Unspecified,WaDE Unspecified,WaDE Unspecified,Lewis And Clark,4326,,100301011205,,46.65274,-111.69757,,,POD,,POD124705,,Well,MT,,,,,,,,,,0.02000,Active,41I 1 00,"Jerome F Croteau,Katherine P Croteau",1966-01-11,,,,WaDE Unspecified,0.50000,Multiple Domestic,,,,,,0,,0.51000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
1,mtD1,MTwr_M1,MTwr_V1,MTwr_O1,,,,Missouri River,wadeID2,WaDE Unspecified,WaDE Unspecified,WaDE Unspecified,Cascade,4326,,100301020601,,47.15313,-111.84950,,,POD,,POD767521,,Pump,MT,,,,,,,,,,0.18000,Active,41QJ 2 00,"Peter M Meloy,Janice S Van Riper",1935-12-31,,,,WaDE Unspecified,0.00000,Irrigation,,,,,,0,,5.20000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
2,mtD10,MTwr_M1,MTwr_V1,MTwr_O1,,,,Groundwater,wadeID1,WaDE Unspecified,WaDE Unspecified,WaDE Unspecified,Powder River,4326,,100902070306,,45.44247,-105.41108,,,POD,,POD124715,,Well,MT,,,,,,,,,,0.02000,Active,42J 7 00,"Bill R Lambert,Kara L Lambert",1946-10-31,,,,WaDE Unspecified,2.00000,"Domestic,Irrigation",,,,,,0,,0.50000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
3,mtD100,MTwr_M1,MTwr_V1,MTwr_O1,,,,Unnamed Tributary Of Dupuyer Creek,wadeID3,WaDE Unspecified,WaDE Unspecified,WaDE Unspecified,Pondera,4326,,100302010504,,48.26009,-112.39425,,,POD,,POD384496,,Ditch,MT,,,,,,,,,,4.00000,Active,41M 45 00,Holden Herefords,1973-07-30,,,,WaDE Unspecified,300.00000,"Irrigation,Stock",,,,,,0,,100.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
4,mtD1000,MTwr_M1,MTwr_V1,MTwr_O1,,,,Clear Creek,wadeID4,WaDE Unspecified,WaDE Unspecified,WaDE Unspecified,Blaine,4326,,100500040303,,48.54968,-109.39022,,,POD,,POD710117,,Livestock Direct From Source,MT,,,,,,,,,,0.00000,Active,40J 590 00,"Lois B Simpson,Teddy L Simpson",1896-12-29,,,,WaDE Unspecified,0.00000,Stock,,,,,,0,,0.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646147,mtU9995,MTwr_M1,MTwr_V1,MTwr_O1,,,,Spring Creek,wadeID17795,Surface Water,WaDE Unspecified,Centroid of Area,FERGUS,4326,,,,46.81153,-108.78259,,,POU,WaDE Unspecified,POU141801,,WaDE Unspecified,MT,,,,,,,,,,0.00000,Active,40B 186163 00,Wilks Ranch Montana Ltd,1882-12-31,,12/31,01/1,WaDE Unspecified,0.00000,Stock,,,,,,0,,0.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
646148,mtU9996,MTwr_M1,MTwr_V1,MTwr_O1,,,,Potter Creek,wadeID19846,Surface Water,WaDE Unspecified,Centroid of Area,FERGUS,4326,,,,46.86089,-108.99528,,,POU,WaDE Unspecified,POU358349,,WaDE Unspecified,MT,,,,,,,,,,0.00000,Active,40B 186168 00,Wilks Ranch Montana Ltd,1882-12-31,,12/31,01/1,WaDE Unspecified,0.00000,Stock,,,,,,0,,0.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
646149,mtU9997,MTwr_M1,MTwr_V1,MTwr_O1,,,,"Spring, Unnamed Tributary Of Spring Creek",wadeID19847,Surface Water,WaDE Unspecified,Centroid of Area,FERGUS,4326,,,,46.84340,-108.85348,,,POU,WaDE Unspecified,POU141967,,WaDE Unspecified,MT,,,,,,,,,,0.00000,Active,40B 186214 00,Wilks Ranch Montana Ltd,1882-12-31,,12/31,01/1,WaDE Unspecified,0.00000,Stock,,,,,,0,,0.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...
646150,mtU9998,MTwr_M1,MTwr_V1,MTwr_O1,,,,"Spring, Unnamed Tributary Of Lucky Ford Coulee",wadeID19848,Surface Water,WaDE Unspecified,Centroid of Area,FERGUS,4326,,,,46.80004,-108.93419,,,POU,WaDE Unspecified,POU365264,,WaDE Unspecified,MT,,,,,,,,,,0.00000,Active,40B 186230 00,Wilks Ranch Montana Ltd,1882-12-31,,12/31,01/1,WaDE Unspecified,0.00000,Stock,,,,,,0,,0.00000,,,,,,,,http://wrqs.dnrc.mt.gov/ResultsWS.aspx?search=...


In [30]:
# Export the output dataframe
outdf.to_csv('Pwr_mtMain.zip', index=False, compression="zip")  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.