# Pre-processing Montana Water Right data for WaDEQA upload.

Date Updated: 08/05/2022

Purpose:  To pre-process the Montana data into one master file for simple DataFrame creation and extraction.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook



In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Montana/WaterAllocation/RawInputData"
os.chdir(workingDir)

## POD Water Budget Data

In [3]:
# Input File
fileInput = "WaDE_PODs_input.csv"
dfpod = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfpod:
    dfpod['WaDEUUID'] = "mtD" + dfpod.index.astype(str)
    dfpod.to_csv('WaDE_PODs_input.csv', index=False)

dfpod.head()

  dfpod = pd.read_csv(fileInput)


Unnamed: 0,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long,WaDEUUID
0,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1/11/1966 0:00:00,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.652738,-111.697572,mtD0
1,1,2,41QJ 2 00,STATEMENT OF CLAIM,ACTIVE,PETER M MELOY; JANICE S VAN RIPER,IRRIGATION,1,SURFACE,MISSOURI RIVER,Prewett Creek-Missouri River,100301020601,PUMP,12/31/1935 0:00:00,,85.0,0.18,0.0,5.2,04/01 to 10/15,04/01 to 10/15,04/01,10/15,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,POST DECREE,4-2,0,NESWSW,16N2W 10,CASCADE,0.0,0.0,N,421952.6298,325261.0558,16N2W,10,694927,242562,6/21/2017 0:00:00,41QJ,2,0,4,2,767521,3/5/2021 0:00:00,ADJ,47.153133,-111.849504,mtD1
2,2,3,41Q 2 00,PROVISIONAL PERMIT,ACTIVE,NANCY VALENTINE,STOCK,1,SURFACE,BIG OTTER CREEK,Upper Big Otter Creek,100301050203,LIVESTOCK DIRECT FROM SOURCE,7/3/1973 0:00:00,,0.0,0.0,2.5,0.0,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,5-1,0,NENE,17N8E 13,JUDITH BASIN,0.0,0.0,N,513596.6849,332897.4719,17N8E,13,1682499,285481,4/15/1974 0:00:00,41Q,2,0,5,1,396497,3/5/2021 0:00:00,,47.24023,-110.641911,mtD2
3,3,4,41I 3 00,STATEMENT OF CLAIM,ACTIVE,PAULETTE S ETCHART; JON O SATRE,DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,12/31/1961 0:00:00,,40.0,0.08,1.0,0.9,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,7-1,0,SWNENW,10N1W 11,LEWIS AND CLARK,0.0,0.0,N,431856.3707,268325.8862,10N1W,11,1099269,183404,7/1/1973 0:00:00,41I,3,0,7,1,124708,3/5/2021 0:00:00,ADJ,46.643477,-111.697811,mtD3
4,4,5,41I 4 00,STATEMENT OF CLAIM,ACTIVE,TERESA HUTTON,IRRIGATION,1,GROUNDWATER,GROUNDWATER,Upper Silver Creek,100301011502,WELL,12/31/1918 0:00:00,,100.0,0.22,0.0,12.5,04/01 to 10/31,04/01 to 10/31,04/01,10/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,9-1,0,SESESW,11N4W 14,LEWIS AND CLARK,0.0,0.0,N,403182.2712,276003.7856,11N4W,14,1417164,261487,7/1/1973 0:00:00,41I,4,0,9,1,124710,3/5/2021 0:00:00,ADJ,46.70469,-112.075658,mtD4


In [4]:
# Clean Owner info.  Remove special characters
# need to separate out by ; into list
# remove special characters
# convert list to string, separate by ,

import re

def cleanOwnerDataFunc(Val):
    Val = Val.strip()
    ValList = Val.split(';')
    for i, s in enumerate(ValList):
        ValList[i] = re.sub("[$@&.,;/\)(-]", "", s).strip()
    outString = ','.join(ValList)
    return outString

dfpod['ALL_OWNERS'] = dfpod.apply(lambda row: cleanOwnerDataFunc(row['ALL_OWNERS']), axis=1)
dfpod['ALL_OWNERS'].unique()

array(['JEROME F CROTEAU,KATHERINE P CROTEAU',
       'PETER M MELOY,JANICE S VAN RIPER', 'NANCY VALENTINE', ...,
       'CHRISTINE H DOWNEY,JEFFREY M DOWNEY', 'MICHAEL C PECK',
       'COLBY E MILLER,SHEILA D MILLER'], dtype=object)

In [5]:
#Creating the output Dataframe for PODs.
# dfPOD = pd.DataFrame(columns=columnsList)
dfPOD = pd.DataFrame()

# Data Assessment UUID
dfPOD['WaDEUUID'] = dfpod['WaDEUUID']

# Water Source
dfPOD["in_WaterSourceName"] = dfpod['SOURCE_NAM']
dfPOD["in_WaterSourceTypeCV"] = dfpod['SOURCE_TYP']

# Site
dfPOD["in_CoordinateAccuracy"] = "Unspecified"
dfPOD["in_CoordinateMethodCV"] = "Unspecified"
dfPOD["in_County"] = dfpod['LLDS_COUNT']
dfPOD["in_HUC12"] = dfpod['HUC_12']
dfPOD["in_Latitude"] = dfpod['Lat']
dfPOD["in_Longitude"] = dfpod['Long']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = dfpod['DITCH_NAME']
dfPOD["in_SiteNativeID"] = dfpod['PODV_ID_SE']
dfPOD["in_SiteTypeCV"] = dfpod['MEANS_OF_D']

# Allocation
dfPOD["in_AllocationFlow_CFS"] = dfpod['FLW_RT_CFS']
dfPOD["in_AllocationLegalStatusCV"] = dfpod['WR_STATUS']
dfPOD["in_AllocationNativeID"] = dfpod['WR_NUMBER']
dfPOD["in_AllocationOwner"] = dfpod['ALL_OWNERS']
dfPOD["in_AllocationPriorityDate"] = dfpod['ENF_PRIORI'] 
dfPOD["in_AllocationTimeframeEnd"] = dfpod['PER_DIV_EN']
dfPOD["in_AllocationTimeframeStart"] = dfpod['PER_DIV_BG']
dfPOD["in_AllocationTypeCV"] = dfpod['WR_TYPE']
dfPOD["in_AllocationVolume_AF"] = dfpod['VOLUME']
dfPOD["in_BeneficialUseCategory"] = dfpod['PURPOSES'].str.replace("; ", ",").str.replace(", ", ",")
dfPOD["in_DataPublicationDOI"] = dfpod['ABST_LINK']
dfPOD["in_IrrigatedAcreage"] = dfpod['MAX_ACRES']

print(len(dfPOD))
dfPOD.head(1)

631581


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_HUC12,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_DataPublicationDOI,in_IrrigatedAcreage
0,mtD0,GROUNDWATER,GROUNDWATER,Unspecified,Unspecified,LEWIS AND CLARK,100301011205,46.652738,-111.697572,POD,,124705,WELL,0.02,ACTIVE,41I 1 00,"JEROME F CROTEAU,KATHERINE P CROTEAU",1/11/1966 0:00:00,12/31,01/01,STATEMENT OF CLAIM,0.5,MULTIPLE DOMESTIC,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,0.51


## PoU Water Budget Data

In [6]:
# Input File
fileInput = "WaDE_PoUs_input.csv"
dfpu = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfpu:
    dfpu['WaDEUUID'] = "mtU" + dfpu.index.astype(str)
    dfpu.to_csv('WaDE_PoUs_input.csv', index=False)

dfpu.head()

Unnamed: 0,OID_,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue,WaDEUUID
0,0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2592224.897,6440.128506,-115.085915,47.952505,mtU0
1,1,2155835,25N27W3,,0,,,,,25N27W,3,547296,148034,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,4,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2595401.386,6443.821302,-115.064419,47.952462,mtU1
2,2,2942909,25N26W6,,0,,,,,25N26W,6,547299,148037,76N116396 00,;204777-1;,76N,76N 116396 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,"MONTANA, STATE OF DEPT OF FISH WILDLIFE & PARK...",ORIG,SURFACE,THOMPSON RIVER,50.0,0.11,2.0,0.0,FIRE PROTECTION,53,0.0,SANDERS,MT,,04/01 to 10/31,1-Apr,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204777-1,10/30/2019 0:00,N,2548555.433,6384.60915,-114.999982,47.952443,mtU2
3,3,2228890,25N27W7,,0,,,,,25N27W,7,547302,148040,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,8,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2577852.604,6422.247563,-115.128935,47.938099,mtU3
4,4,2301925,25N27W9,,0,,,,,25N27W,9,547305,148043,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,10,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2578769.47,6423.36738,-115.085903,47.938008,mtU4


In [7]:
# Creating the output Dataframe for POUs.
#dfPOU = pd.DataFrame(columns=columnsList)
dfPOU = pd.DataFrame()

# Data Assessment UUID
dfPOU['WaDEUUID'] = dfpu['WaDEUUID']

# Water Source
dfPOU["in_WaterSourceName"] = dfpu['SOURC_NAME']
dfPOU["in_WaterSourceTypeCV"] = dfpu['SRCTYPE']

# Site
dfPOU["in_CoordinateAccuracy"] = "Unspecified"
dfPOU["in_CoordinateMethodCV"] = "Centroid"
dfPOU["in_County"] = dfpu['COUNTY']
dfPOU["in_Latitude"] = dfpu['Latitdue']
dfPOU["in_Longitude"] = dfpu['Longitdue']
dfPOU["in_PODorPOUSite"] = "POU"
dfPOU["in_SiteName"] = "Unspecified"
dfPOU["in_SiteNativeID"] = "POU" + dfpu['OBJECTID'].astype(str)
dfPOU["in_SiteTypeCV"] = "Unspecified" 

# Allocation
dfPOU["in_AllocationFlow_CFS"] = dfpu['FLW_RT_CFS']
dfPOU["in_AllocationLegalStatusCV"] = dfpu['STATUS']
dfPOU["in_AllocationNativeID"] = dfpu['WRNUMBER']
dfPOU["in_AllocationOwner"] = dfpu['ALL_OWNERS'] # as is
dfPOU["in_AllocationPriorityDate"] = dfpu['ENF_PRIORI']
dfPOU["in_AllocationTimeframeEnd"] = dfpu['PER_USE_EN']
dfPOU["in_AllocationTimeframeStart"] = dfpu['PER_USE_BG'] 
dfPOU["in_AllocationTypeCV"] = dfpu['WRTYPE']
dfPOU["in_AllocationVolume_AF"] = dfpu['VOLUME']
dfPOU["in_BeneficialUseCategory"] = dfpu['PURPOSE'].str.replace("; ", ",").str.replace(", ", ",")
dfPOU["in_DataPublicationDOI"] = dfpu['NRIS_LINK']
dfPOU["in_IrrigatedAcreage"] = dfpu['MAX_ACRES']

print(len(dfPOU))
dfPOU.head(1)

14571


Unnamed: 0,WaDEUUID,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_DataPublicationDOI,in_IrrigatedAcreage
0,mtU0,INDIAN CREEK,SURFACE,Unspecified,Centroid,SANDERS,47.952505,-115.085915,POU,Unspecified,POU2301927,Unspecified,0.11,ACTIVE,76N 116400 00,WEYERHAEUSER COMPANY,12/31/1955 0:00,31-Oct,1-May,STATEMENT OF CLAIM,1.0,FIRE PROTECTION,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,0.0


## Concatenate POD and POU
## Fix Elements

In [8]:
# Merge dataframes
frames = [dfPOD, dfPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True)
print(len(outdf))

646152


In [9]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = outdf['in_AllocationPriorityDate'].astype(str)
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['1966-01-11T00:00:00.000000000', '1935-12-31T00:00:00.000000000',
       '1973-07-03T00:00:00.000000000', ...,
       '1882-04-14T00:00:00.000000000', '1955-05-28T00:00:00.000000000',
       '1940-02-24T00:00:00.000000000'], dtype='datetime64[ns]')

In [11]:
# Creating easy MethodTypeCV retreival for AllocationsAmounts_fact sheet.

x = datetime(1973, 7, 1)
print(x)

def createMethodTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        if colrowValue < x:
            outList = "Adjudication"
        else:
            outList = "Appropriations"
    return outList

outdf['in_MethodTypeCV'] = outdf.apply(lambda row: createMethodTypeCV(row['in_AllocationPriorityDate']), axis=1)
outdf['in_MethodTypeCV'].unique()

1973-07-01 00:00:00


array(['Adjudication', 'Appropriations'], dtype=object)

In [12]:
# Creating TimeframeStart & TimeframeEnd
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframe(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

outdf['in_AllocationTimeframeEnd'] = outdf.apply(lambda row: createTimeframe(row['in_AllocationTimeframeEnd']), axis=1)
outdf['in_AllocationTimeframeStart'] = outdf.apply(lambda row: createTimeframe(row['in_AllocationTimeframeStart']), axis=1)

outdf['in_AllocationTimeframeEnd'].unique()
outdf['in_AllocationTimeframeStart'].unique()

array(['', '04/1', '05/1', '01/1', '06/1', '04/10', '04/15', '05/15',
       '03/1', '05/10', '07/1', '05/28', '10/1', '09/1', '11/1', '03/15',
       '05/20', '08/1', '05/16', '07/16', '02/15', '07/15', '06/15',
       '02/1', '05/25', '12/1', '07/20', '04/28', '06/28', '10/15',
       '09/15', '04/16', '06/16', '03/28', '12/31', '01/28', '08/15',
       '05/30', '06/25', '04/20', '03/20', '09/30', '02/10', '05/5',
       '09/28', '12/15', '03/31', '03/21', '04/31', '08/28', '11/10',
       '07/5', '10/31', '01/31', '04/25'], dtype=object)

In [13]:
# fix site name

def fixSiteName(val):
    val = str(val).strip()
    if val == '' or pd.isnull(val):
        outString = "Unspecified"
    else:
        outString = val
    return outString

outdf['in_SiteName'] = outdf.apply(lambda row: fixSiteName(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['Unspecified', 'CAMPBELL DITCH', 'HARRER DITCH', ...,
       'CAVAINE DITCH', 'SAM BIRD (HAMILTON) DITCH', 'MADELINE DITCH'],
      dtype=object)

In [14]:
# fix benefical use

def fixBenUse(val):
    val = str(val).strip()
    if val == '' or pd.isnull(val):
        outString = "Unspecified"
    else:
        outString = val
    return outString

outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixBenUse(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['MULTIPLE DOMESTIC', 'IRRIGATION', 'STOCK', 'DOMESTIC',
       'DOMESTIC,STOCK', 'DOMESTIC,IRRIGATION', 'MUNICIPAL',
       'IRRIGATION,STOCK', 'INSTITUTIONAL', 'LAWN AND GARDEN',
       'FISH AND WILDLIFE,IRRIGATION,STOCK', 'FISH AND WILDLIFE',
       'COMMERCIAL', 'STOCK,DOMESTIC', 'INDUSTRIAL', 'MINING',
       'DOMESTIC,IRRIGATION,STOCK', 'IRRIGATION,RECREATION',
       'DOMESTIC,LAWN AND GARDEN', 'IRRIGATION,STOCK,WILDLIFE/WATERFOWL',
       'IRRIGATION,STOCK,DOMESTIC', 'POWER GENERATION',
       'COMMERCIAL,DOMESTIC,IRRIGATION', 'COMMERCIAL,FIRE PROTECTION',
       'FISH AND WILDLIFE,STOCK', 'COMMERCIAL,DOMESTIC',
       'DOMESTIC,STOCK,IRRIGATION',
       'FISH AND WILDLIFE,IRRIGATION,RECREATION', 'FIRE PROTECTION',
       'STOCK,FISH AND WILDLIFE',
       'FISH AND WILDLIFE,IRRIGATION,LAWN AND GARDEN,RECREATION',
       'OTHER PURPOSE',
       'FLOOD CONTROL,IRRIGATION,RECREATION,SEDIMENT CONTROL,FISH AND WILDLIFE',
       'DOMESTIC,STOCK,DOMESTIC', 'FIRE PROTECTION,MUNI

In [15]:
# fix WaterSourceTypeCV
# use WaDE specific terms only

waterSourceTypeDict = {
"SURFACE" : "Surface Water",
"GROUNDWATER" : "Groundwater",
"ALL NATURALLY OCCURING WATER" : "Surface Water"
}

def fixWaterSourceTypeCV(val):
    if val == "" or pd.isnull(val):
        outString = "Unspecified"
    else:
        String1 = val.strip()
        try:
            outString = waterSourceTypeDict[String1]
        except:
            outString = "Unspecified"
    return outString

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: fixWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Unspecified'], dtype=object)

In [16]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEMT_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['WaDEMT_WS1', 'WaDEMT_WS2', 'WaDEMT_WS3', ..., 'WaDEMT_WS19862',
       'WaDEMT_WS19863', 'WaDEMT_WS19864'], dtype=object)

## Shapefile Data
- For attaching gemetry to POU csv inputs.

In [17]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('PoUShp/MT_PoU2.shp')
dfPoUshapetemp.head(3)

NotImplementedError: A polygon does not itself provide the array interface. Its rings do.

In [None]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['OBJECTID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

In [None]:
# # Concatenate dataframes
# frames = [dfPODshape, dfPoUshape]
# dfshape = pd.concat(frames)
# print(len(dfshape))

## Export Data

In [18]:
# Export out to CSV.
outdf.to_csv('P_MontanaMaster.csv', index=False) # The output.
#dfPoUshape.to_csv('P_MontanaGeometry.csv', index=False) # The output geometry.

In [19]:
arrX = outdf['in_BeneficialUseCategory'].unique()
str1 = ','.join(arrX)
def Convert(string):
    li = list(string.split(","))
    return li
list1 = Convert(str1)

for i in list1:
    print(i)


MULTIPLE DOMESTIC
IRRIGATION
STOCK
DOMESTIC
DOMESTIC
STOCK
DOMESTIC
IRRIGATION
MUNICIPAL
IRRIGATION
STOCK
INSTITUTIONAL
LAWN AND GARDEN
FISH AND WILDLIFE
IRRIGATION
STOCK
FISH AND WILDLIFE
COMMERCIAL
STOCK
DOMESTIC
INDUSTRIAL
MINING
DOMESTIC
IRRIGATION
STOCK
IRRIGATION
RECREATION
DOMESTIC
LAWN AND GARDEN
IRRIGATION
STOCK
WILDLIFE/WATERFOWL
IRRIGATION
STOCK
DOMESTIC
POWER GENERATION
COMMERCIAL
DOMESTIC
IRRIGATION
COMMERCIAL
FIRE PROTECTION
FISH AND WILDLIFE
STOCK
COMMERCIAL
DOMESTIC
DOMESTIC
STOCK
IRRIGATION
FISH AND WILDLIFE
IRRIGATION
RECREATION
FIRE PROTECTION
STOCK
FISH AND WILDLIFE
FISH AND WILDLIFE
IRRIGATION
LAWN AND GARDEN
RECREATION
OTHER PURPOSE
FLOOD CONTROL
IRRIGATION
RECREATION
SEDIMENT CONTROL
FISH AND WILDLIFE
DOMESTIC
STOCK
DOMESTIC
FIRE PROTECTION
MUNICIPAL
LAWN AND GARDEN
MULTIPLE DOMESTIC
AGRICULTURAL SPRAYING
DOMESTIC
FIRE PROTECTION
STOCK
COMMERCIAL
DOMESTIC
STOCK
STOCK
IRRIGATION
STOCK
LAWN AND GARDEN
UNKNOWN
IRRIGATION
STOCK
FISH AND WILDLIFE
FISHERY
MITIGATION WA

FISH AND WILDLIFE
WILDLIFE/WATERFOWL
FISHERY
STOCK
IRRIGATION
DOMESTIC
LAWN AND GARDEN
STOCK
LAWN AND GARDEN
FISH AND WILDLIFE
STOCK
DOMESTIC
STOCK
FIRE PROTECTION
LAWN AND GARDEN
FIRE PROTECTION
FISHERY
LAWN AND GARDEN
STOCK
FIRE PROTECTION
RECREATION
STOCK
FIRE PROTECTION
STOCK
RECREATION
LAWN AND GARDEN
FISH AND WILDLIFE
RECREATION
COMMERCIAL
DOMESTIC
COMMERCIAL
DOMESTIC
LAWN AND GARDEN
INDUSTRIAL
MUNICIPAL
MUNICIPAL
MUNICIPAL
NAVIGATION
RECREATION
IRRIGATION
MINING
FIRE PROTECTION
DOMESTIC
STOCK
OTHER PURPOSE
IRRIGATION
FISH AND WILDLIFE
LAWN AND GARDEN
STOCK
LAWN AND GARDEN
DOMESTIC
IRRIGATION
COMMERCIAL
FISH AND WILDLIFE
RECREATION
DOMESTIC
COMMERCIAL
IRRIGATION
LAWN AND GARDEN
IRRIGATION
DOMESTIC
MULTIPLE DOMESTIC
DOMESTIC
STOCK
DOMESTIC
STOCK
FISH AND WILDLIFE
FIRE PROTECTION
IRRIGATION
STOCK
FIRE PROTECTION
STOCK
WATERFOWL
STOCK
STOCK
WETLAND MITIGATION CREDIT
DOMESTIC
IRRIGATION
STOCK
OTHER PURPOSE
DOMESTIC
STOCK
FISH AND WILDLIFE
RECREATION
IRRIGATION
STOCK
RECREATION
FISH A

LAWN AND GARDEN
STOCK
FISH AND WILDLIFE
MULTIPLE DOMESTIC
LAWN AND GARDEN
IRRIGATION
STOCK
MULTIPLE DOMESTIC
LAWN AND GARDEN
MULTIPLE DOMESTIC
RECREATION
FISHERY
LAWN AND GARDEN
MULTIPLE DOMESTIC
RECREATION
DOMESTIC
OBSERVATION AND TESTING
DOMESTIC
EROSION CONTROL
WILDLIFE/WATERFOWL
IRRIGATION
STOCK
POWER GENERATION
FISH AND WILDLIFE
MULTIPLE DOMESTIC
DOMESTIC
LAWN AND GARDEN
LAWN AND GARDEN
DOMESTIC
RECREATION
DOMESTIC
FISHERY
IRRIGATION
STOCK
WILDLIFE/WATERFOWL
FISHERY
STOCK
IRRIGATION
DOMESTIC
LAWN AND GARDEN
IRRIGATION
FISHERY
POWER GENERATION
DOMESTIC
STOCK
FISH AND WILDLIFE
IRRIGATION
LAWN AND GARDEN
IRRIGATION
FISH AND WILDLIFE
DOMESTIC
STOCK
STOCK
OIL WELL FLOODING
INSTITUTIONAL
MULTIPLE DOMESTIC
AUGMENTATION
IRRIGATION
MULTIPLE DOMESTIC
IRRIGATION
STOCK
LAWN AND GARDEN
MITIGATION WATER
OTHER PURPOSE
AUGMENTATION
GEOTHERMAL
GEOTHERMAL
MULTIPLE DOMESTIC
LAWN AND GARDEN
LAWN AND GARDEN
COMMERCIAL
INDUSTRIAL
GEOTHERMAL
OTHER PURPOSE
DOMESTIC
STOCK
DOMESTIC
LAWN AND GARDEN
STOCK
GE

In [20]:
str1 = ','.join(arrX)
str1

'MULTIPLE DOMESTIC,IRRIGATION,STOCK,DOMESTIC,DOMESTIC,STOCK,DOMESTIC,IRRIGATION,MUNICIPAL,IRRIGATION,STOCK,INSTITUTIONAL,LAWN AND GARDEN,FISH AND WILDLIFE,IRRIGATION,STOCK,FISH AND WILDLIFE,COMMERCIAL,STOCK,DOMESTIC,INDUSTRIAL,MINING,DOMESTIC,IRRIGATION,STOCK,IRRIGATION,RECREATION,DOMESTIC,LAWN AND GARDEN,IRRIGATION,STOCK,WILDLIFE/WATERFOWL,IRRIGATION,STOCK,DOMESTIC,POWER GENERATION,COMMERCIAL,DOMESTIC,IRRIGATION,COMMERCIAL,FIRE PROTECTION,FISH AND WILDLIFE,STOCK,COMMERCIAL,DOMESTIC,DOMESTIC,STOCK,IRRIGATION,FISH AND WILDLIFE,IRRIGATION,RECREATION,FIRE PROTECTION,STOCK,FISH AND WILDLIFE,FISH AND WILDLIFE,IRRIGATION,LAWN AND GARDEN,RECREATION,OTHER PURPOSE,FLOOD CONTROL,IRRIGATION,RECREATION,SEDIMENT CONTROL,FISH AND WILDLIFE,DOMESTIC,STOCK,DOMESTIC,FIRE PROTECTION,MUNICIPAL,LAWN AND GARDEN,MULTIPLE DOMESTIC,AGRICULTURAL SPRAYING,DOMESTIC,FIRE PROTECTION,STOCK,COMMERCIAL,DOMESTIC,STOCK,STOCK,IRRIGATION,STOCK,LAWN AND GARDEN,UNKNOWN,IRRIGATION,STOCK,FISH AND WILDLIFE,FISHERY,MITIGATION W

In [21]:
def Convert(string):
    li = list(string.split(","))
    return li
  
list1 = Convert(str1)

for i in list1:
    print(i)

MULTIPLE DOMESTIC
IRRIGATION
STOCK
DOMESTIC
DOMESTIC
STOCK
DOMESTIC
IRRIGATION
MUNICIPAL
IRRIGATION
STOCK
INSTITUTIONAL
LAWN AND GARDEN
FISH AND WILDLIFE
IRRIGATION
STOCK
FISH AND WILDLIFE
COMMERCIAL
STOCK
DOMESTIC
INDUSTRIAL
MINING
DOMESTIC
IRRIGATION
STOCK
IRRIGATION
RECREATION
DOMESTIC
LAWN AND GARDEN
IRRIGATION
STOCK
WILDLIFE/WATERFOWL
IRRIGATION
STOCK
DOMESTIC
POWER GENERATION
COMMERCIAL
DOMESTIC
IRRIGATION
COMMERCIAL
FIRE PROTECTION
FISH AND WILDLIFE
STOCK
COMMERCIAL
DOMESTIC
DOMESTIC
STOCK
IRRIGATION
FISH AND WILDLIFE
IRRIGATION
RECREATION
FIRE PROTECTION
STOCK
FISH AND WILDLIFE
FISH AND WILDLIFE
IRRIGATION
LAWN AND GARDEN
RECREATION
OTHER PURPOSE
FLOOD CONTROL
IRRIGATION
RECREATION
SEDIMENT CONTROL
FISH AND WILDLIFE
DOMESTIC
STOCK
DOMESTIC
FIRE PROTECTION
MUNICIPAL
LAWN AND GARDEN
MULTIPLE DOMESTIC
AGRICULTURAL SPRAYING
DOMESTIC
FIRE PROTECTION
STOCK
COMMERCIAL
DOMESTIC
STOCK
STOCK
IRRIGATION
STOCK
LAWN AND GARDEN
UNKNOWN
IRRIGATION
STOCK
FISH AND WILDLIFE
FISHERY
MITIGATION WA

LAWN AND GARDEN
IRRIGATION
OTHER PURPOSE
IRRIGATION
STOCK
GEOTHERMAL
MITIGATION WATER
IRRIGATION
IRRIGATION
LAWN AND GARDEN
AGRICULTURAL SPRAYING
DOMESTIC
STOCK
DOMESTIC
OTHER PURPOSE
STOCK
IRRIGATION
LAWN AND GARDEN
GEOTHERMAL HEATING
LAWN AND GARDEN
FIRE PROTECTION
FISHERY
MULTIPLE DOMESTIC
OTHER PURPOSE
STOCK
IRRIGATION
LAWN AND GARDEN
COMMERCIAL
DOMESTIC
FIRE PROTECTION
RECREATION
DOMESTIC
STOCK
FISHERY
FIRE PROTECTION
STOCK
FISHERY
FIRE PROTECTION
FISH AND WILDLIFE
DOMESTIC
LAWN AND GARDEN
DOMESTIC
LAWN AND GARDEN
IRRIGATION
STOCK
FIRE PROTECTION
DOMESTIC
COMMERCIAL
LAWN AND GARDEN
LAWN AND GARDEN
POWER GENERATION
NONCONSUMPTIVE
COMMERCIAL
LAWN AND GARDEN
FIRE PROTECTION
DOMESTIC
OTHER PURPOSE
STOCK
OTHER PURPOSE
FISH AND WILDLIFE
DOMESTIC
LAWN AND GARDEN
STOCK
DOMESTIC
STOCK
MINING
DOMESTIC
INSTITUTIONAL
IRRIGATION
LAWN AND GARDEN
IRRIGATION
STOCK
FISH AND WILDLIFE
RECREATION
DOMESTIC
STOCK
RECREATION
FISH AND WILDLIFE
DOMESTIC
INSTITUTIONAL
STOCK
INSTITUTIONAL
LAWN AND GARDEN
ST