# Pre-processing Montana Water Right data for WaDEQA upload.

Date Updated: 11/30/2020

Purpose:  To pre-process the Montana data into one master file for simple DataFrame creation and extraction.

In [1]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
import datetime
import geopandas as gpd
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Montana/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [3]:
# columns needed for combined output Dataframe
columnsList = ["in_GNISFeatureNameCV",	"in_WaterQualityIndicatorCV",	"in_WaterSourceName",	"in_WaterSourceNativeID",	"in_WaterSourceTypeCV",
               "in_CoordinateAccuracy",	"in_CoordinateMethodCV",	"in_County",	"in_EPSGCodeCV",	"in_Geometry",	"in_GNISCodeCV",	"in_HUC12",	"in_HUC8",	"in_Latitude",	"in_Longitude",	"in_NHDNetworkStatusCV",	"in_NHDProductCV",	"in_PODorPOUSite",	"in_SiteName",	"in_SiteNativeID",	"in_SitePoint",	"in_SiteTypeCV",	"in_StateCV",	"in_USGSSiteID",
               "in_AllocationApplicationDate",	"in_AllocationAssociatedConsumptiveUseSiteIDs",	"in_AllocationAssociatedWithdrawalSiteIDs",	"in_AllocationBasisCV",	"in_AllocationChangeApplicationIndicator",	"in_AllocationCommunityWaterSupplySystem",	"in_AllocationCropDutyAmount",	"in_AllocationExpirationDate",	"in_AllocationFlow_CFS",	"in_AllocationLegalStatusCV",	"in_AllocationNativeID",	"in_AllocationOwner",	"in_AllocationPriorityDate",	"in_AllocationSDWISIdentifierCV",	"in_AllocationTimeframeEnd",	"in_AllocationTimeframeStart",	"in_AllocationTypeCV",	"in_AllocationVolume_AF",	"in_BeneficialUseCategory",	"in_CommunityWaterSupplySystem",	"in_CropTypeCV",	"in_CustomerTypeCV",	"in_DataPublicationDate",	"in_DataPublicationDOI",	"in_ExemptOfVolumeFlowPriority",	"in_GeneratedPowerCapacityMW",	"in_IrrigatedAcreage",	"in_IrrigationMethodCV",	"in_LegacyAllocationIDs",	"in_OrganizationUUID",	"in_PopulationServed",	"in_PowerType",	"in_PrimaryUseCategory",	"in_VariableSpecificUUID",	"in_WaterAllocationNativeURL"]

## POD Water Budget Data

In [4]:
# CSV input file
fileInput = "WaDE_PODs_input.csv"
df = pd.read_csv(fileInput)
print(len(df))
df.head(1)

  df = pd.read_csv(fileInput)


631581


Unnamed: 0,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long
0,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1/11/1966 0:00:00,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.652738,-111.697572


In [5]:
# WaterSourceTypeCV
waterSourceTypeDict = {
"SURFACE" : "Surface Water",
"GROUNDWATER" : "Groundwater",
"ALL NATURALLY OCCURING WATER" : "Surface Water"
}

def retrieveWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = waterSourceTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['WaterSourceTypeCV'] = df.apply(lambda row: retrieveWaterSourceTypeCV(row['SOURCE_TYP']), axis=1)
df['WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Unspecified'], dtype=object)

In [6]:
# Update datatype of Priority Date to fit WaDE 2.0 structure

df['ENF_PRIORI'] = pd.to_datetime(df['ENF_PRIORI'])
df['ENF_PRIORI'] = pd.to_datetime(df["ENF_PRIORI"].dt.strftime('%m/%d/%Y'))
df.head(1)

Unnamed: 0,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long,WaterSourceTypeCV
0,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1966-01-11,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.652738,-111.697572,Groundwater


In [7]:
# Creating easy MethodTypeCV retreival for AllocationsAmounts_fact sheet.

x = datetime.datetime(1973, 7, 1)
x

def createMethodTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        if colrowValue < x:
            outList = "Adjudication"
        else:
            outList = "Appropriations"
    return outList

df['MethodTypeCV'] = df.apply(lambda row: createMethodTypeCV(row['ENF_PRIORI']), axis=1)
df['MethodTypeCV'].unique()

array(['Adjudication', 'Appropriations'], dtype=object)

In [8]:
# Creating TimeframeStart.
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframeStart(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

df['TimeframeStart'] = df.apply(lambda row: createTimeframeStart(row['PER_DIV_BG']), axis=1)
df.head(1)

Unnamed: 0,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long,WaterSourceTypeCV,MethodTypeCV,TimeframeStart
0,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1966-01-11,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.652738,-111.697572,Groundwater,Adjudication,


In [9]:
# Creating TimeframeEnd.
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframeEnd(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

df['TimeframeEnd'] = df.apply(lambda row: createTimeframeEnd(row['PER_DIV_EN']), axis=1)
df.head(1)

Unnamed: 0,OID_,OBJECTID,WR_NUMBER,WR_TYPE,WR_STATUS,ALL_OWNERS,PURPOSES,POD_NO,SOURCE_TYP,SOURCE_NAM,HUC_12_SOU,HUC_12,MEANS_OF_D,ENF_PRIORI,DITCH_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PER_USE_AL,PER_DIV_AL,PER_DIV_BG,PER_DIV_EN,ABST_LINK,VERS_TYPE,WRKEY,LLDS_GOVT_,LLDS_QTRS,LLDS_TRS,LLDS_COUNT,X_METERS,Y_METERS,Y_MAPPED,X_METERS_C,Y_METERS_C,LLDS_TWP_R,LLDS_SCTN_,LLDSIDSEQ,TRSSIDSEQ,OP_AUTH_DT,BASIN,WR_NO,EXT,WRGT_ID_SE,VERSIDSEQ,PODV_ID_SE,DTM_CREATE,WR_PROGRAM,Lat,Long,WaterSourceTypeCV,MethodTypeCV,TimeframeStart,TimeframeEnd
0,0,1,41I 1 00,STATEMENT OF CLAIM,ACTIVE,JEROME F CROTEAU; KATHERINE P CROTEAU,MULTIPLE DOMESTIC,1,GROUNDWATER,GROUNDWATER,Cave Bay-Canyon Ferry Lake,100301011205,WELL,1966-01-11,,10.0,0.02,0.5,0.51,01/01 to 12/31,01/01 to 12/31,01/01,12/31,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,ORIGINAL RIGHT,1-1,0,NWNESW,10N1W 2,LEWIS AND CLARK,0.0,0.0,N,431903.4951,269353.8514,10N1W,2,1417159,240277,7/1/1973 0:00:00,41I,1,0,1,1,124705,3/5/2021 0:00:00,ADJ,46.652738,-111.697572,Groundwater,Adjudication,,


In [10]:
#Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(columns=columnsList)

#Method
dfPOD["in_MethodTypeCV"] = df['MethodTypeCV']

# Water Source
dfPOD["in_WaterSourceName"] = df['SOURCE_NAM']
dfPOD["in_WaterSourceTypeCV"] = df['WaterSourceTypeCV']

# Site
dfPOD["in_CoordinateAccuracy"] = "Unspecified"
dfPOD["in_CoordinateMethodCV"] = "Unspecified"
dfPOD["in_County"] = df['LLDS_COUNT']
dfPOD["in_HUC12"] = df['HUC_12']
dfPOD["in_Latitude"] = df['Lat']
dfPOD["in_Longitude"] = df['Long']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = df['DITCH_NAME']
dfPOD["in_SiteNativeID"] = df['PODV_ID_SE']
dfPOD["in_SiteTypeCV"] = df['MEANS_OF_D']

# Allocation
dfPOD["in_AllocationFlow_CFS"] = df['FLW_RT_CFS']
dfPOD["in_AllocationLegalStatusCV"] = df['WR_STATUS']
dfPOD["in_AllocationNativeID"] = df['WR_NUMBER']
dfPOD["in_AllocationOwner"] = df['ALL_OWNERS']
dfPOD["in_AllocationPriorityDate"] = df['ENF_PRIORI'] 
dfPOD["in_AllocationTimeframeEnd"] = df['TimeframeEnd']
dfPOD["in_AllocationTimeframeStart"] = df['TimeframeStart']
dfPOD["in_AllocationTypeCV"] = df['WR_TYPE']
dfPOD["in_AllocationVolume_AF"] = df['VOLUME']
dfPOD["in_BeneficialUseCategory"] = df['PURPOSES']
dfPOD["in_DataPublicationDOI"] = df['ABST_LINK']
dfPOD["in_IrrigatedAcreage"] = df['MAX_ACRES']

print(len(dfPOD))
dfPOD.head(1)

631581


Unnamed: 0,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_Geometry,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OrganizationUUID,in_PopulationServed,in_PowerType,in_PrimaryUseCategory,in_VariableSpecificUUID,in_WaterAllocationNativeURL,in_MethodTypeCV
0,,,GROUNDWATER,,Groundwater,Unspecified,Unspecified,LEWIS AND CLARK,,,,100301011205,,46.652738,-111.697572,,,POD,,124705,,WELL,,,,,,,,,,,0.02,ACTIVE,41I 1 00,JEROME F CROTEAU; KATHERINE P CROTEAU,1966-01-11,,,,STATEMENT OF CLAIM,0.5,MULTIPLE DOMESTIC,,,,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,,,0.51,,,,,,,,,Adjudication


## PoU Water Budget Data

In [11]:
# CSV input file
fileInput = "WaDE_PoUs_input.csv"
df = pd.read_csv(fileInput)
print(len(df))
df.head(1)

14571


Unnamed: 0,OID_,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue
0,0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,12/31/1955 0:00,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2592224.897,6440.128506,-115.085915,47.952505


In [12]:
# Fixing datatypes of inputs.
df['ENF_PRIORI'] = pd.to_datetime(df['ENF_PRIORI'])

In [13]:
# WaterSourceTypeCV

waterSourceTypeDict = {
    "GROUNDWATER" : "Groundwater",
    "SURFACE" : "Surface Water"
}

def retrieveWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = waterSourceTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['WaterSourceTypeCV'] = df.apply(lambda row: retrieveWaterSourceTypeCV(row['SRCTYPE']), axis=1)
df['WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater'], dtype=object)

In [14]:
# Creating easy MethodTypeCV retreival for AllocationsAmounts_fact sheet.

x = datetime.datetime(1973, 7, 1)
x

def createMethodTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        if colrowValue < x:
            outList = "Adjudication"
        else:
            outList = "Appropriations"
    return outList

df['MethodTypeCV'] = df.apply(lambda row: createMethodTypeCV(row['ENF_PRIORI']), axis=1)
df['MethodTypeCV'].unique()

array(['Adjudication', 'Appropriations'], dtype=object)

In [15]:
# Creating TimeframeStart.
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframeStart(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

df['TimeframeStart'] = df.apply(lambda row: createTimeframeStart(row['PER_USE_BG']), axis=1)
df.head(1)

Unnamed: 0,OID_,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue,WaterSourceTypeCV,MethodTypeCV,TimeframeStart
0,0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2592224.897,6440.128506,-115.085915,47.952505,Surface Water,Adjudication,04/1


In [16]:
# Creating TimeframeEnd.
# Spliting string, returning WaDE friendly format.

MonthNumbDict = {
"Jan" : "01",
"Feb" : "02",
"Mar" : "03",
"May" : "04",
"Apr" : "05",
"Jun" : "06",
"Jul" : "07",
"Aug" : "08",
"Sep" : "09",
"Oct" : "10",
"Nov" : "11",
"Dec" : "12"}

def createTimeframeEnd(ColRowVal):
    val = str(ColRowVal)
    day = val.split('-')[0]
    try:
        month = val.split('-')[1]
        month = MonthNumbDict[month] + "/"
    except:
        day = ""
        month = ""
    outlist = month + day
    return outlist

df['TimeframeEnd'] = df.apply(lambda row: createTimeframeEnd(row['PER_USE_EN']), axis=1)
df.head(1)

Unnamed: 0,OID_,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue,WaterSourceTypeCV,MethodTypeCV,TimeframeStart,TimeframeEnd
0,0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,1-May,31-Oct,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,10/30/2019 0:00,N,2592224.897,6440.128506,-115.085915,47.952505,Surface Water,Adjudication,04/1,10/31


In [17]:
# Creating the output Dataframe for POUs.

dfPOU = pd.DataFrame(columns=columnsList)

# Method
dfPOU["in_MethodTypeCV"] = df['MethodTypeCV']

# Water Source
dfPOU["in_WaterSourceName"] = df['SOURC_NAME']
dfPOU["in_WaterSourceTypeCV"] = df['WaterSourceTypeCV']

# Site
dfPOU["in_CoordinateAccuracy"] = "Unspecified"
dfPOU["in_CoordinateMethodCV"] = "Centroid"
dfPOU["in_County"] = df['COUNTY']
dfPOU["in_Latitude"] = df['Latitdue']
dfPOU["in_Longitude"] = df['Longitdue']
dfPOU["in_PODorPOUSite"] = "POU"
dfPOU["in_SiteName"] = "Unspecified"
dfPOU["in_SiteNativeID"] = "POU" + df['OBJECTID'].astype(str)
dfPOU["in_SiteTypeCV"] = "Unspecified" 

# Allocation
dfPOU["in_AllocationFlow_CFS"] = df['FLW_RT_CFS']
dfPOU["in_AllocationLegalStatusCV"] = df['STATUS']
dfPOU["in_AllocationNativeID"] = df['WRNUMBER']
dfPOU["in_AllocationOwner"] = df['ALL_OWNERS']
dfPOU["in_AllocationPriorityDate"] = df['ENF_PRIORI']
dfPOU["in_AllocationTimeframeEnd"] = df['TimeframeEnd']
dfPOU["in_AllocationTimeframeStart"] = df['TimeframeStart'] 
dfPOU["in_AllocationTypeCV"] = df['WRTYPE']
dfPOU["in_AllocationVolume_AF"] = df['VOLUME']
dfPOU["in_BeneficialUseCategory"] = df['PURPOSE']
dfPOU["in_DataPublicationDOI"] = df['NRIS_LINK']
dfPOU["in_IrrigatedAcreage"] = df['MAX_ACRES']

print(len(dfPOU))
dfPOU.head(1)

14571


Unnamed: 0,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_Geometry,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OrganizationUUID,in_PopulationServed,in_PowerType,in_PrimaryUseCategory,in_VariableSpecificUUID,in_WaterAllocationNativeURL,in_MethodTypeCV
0,,,INDIAN CREEK,,Surface Water,Unspecified,Centroid,SANDERS,,,,,,47.952505,-115.085915,,,POU,Unspecified,POU2301927,,Unspecified,,,,,,,,,,,0.11,ACTIVE,76N 116400 00,WEYERHAEUSER COMPANY,1955-12-31,,10/31,04/1,STATEMENT OF CLAIM,1.0,FIRE PROTECTION,,,,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,,,0.0,,,,,,,,,Adjudication


### Concatenate

In [18]:
# Merge dataframes
frames = [dfPOD, dfPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index()
print(len(outdf))

550635


In [19]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEMT_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf.head(1)

Unnamed: 0,index,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_Geometry,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OrganizationUUID,in_PopulationServed,in_PowerType,in_PrimaryUseCategory,in_VariableSpecificUUID,in_WaterAllocationNativeURL,in_MethodTypeCV
0,0,,,GROUNDWATER,WaDEMT_WS1,Groundwater,Unspecified,Unspecified,LEWIS AND CLARK,,,,100301011205,,46.652738,-111.697572,,,POD,,124705,,WELL,,,,,,,,,,,0.02,ACTIVE,41I 1 00,JEROME F CROTEAU; KATHERINE P CROTEAU,1966-01-11,,,,STATEMENT OF CLAIM,0.5,MULTIPLE DOMESTIC,,,,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,,,0.51,,,,,,,,,Adjudication


## Shapefile Data
- For attaching gemetry to POU csv inputs.

In [20]:
# PoU Shapefile Data
# Shapefile input
ShapeFileInput = gpd.read_file('PoUShp/MT_PoU.shp')
dfPoUshapetemp = pd.DataFrame(ShapeFileInput)
dfPoUshapetemp.head(3)

Unnamed: 0,OBJECTID,TRS,QTRS,GOVT_LOT,LLDS_320_1,LLDS_160_4,LLDS_80_10,LLDS_40_2_,TWP_RNG,SECTION_NU,LLDS_ID_SE,TRSS_ID_SE,WTR_RIGHTS,WRKEY_NUMB,BASIN,WRNUMBER,WRTYPE,STATUS,ENF_PRIORI,ALL_OWNERS,VER_TYP,SRCTYPE,SOURC_NAME,FLW_RT_GPM,FLW_RT_CFS,VOLUME,MAX_ACRES,PURPOSE,POU_NO,ACREAGE,COUNTY,STT_CD,IRR_TYP,PER_USE_AL,PER_USE_BG,PER_USE_EN,ANIMAL_UNI,NRIS_LINK,WRKEY,DTM_CREATE,XY_SHAPE_M,Shape__Are,Shape__Len,Longitdue,Latitdue,geometry
0,2301927,25N27W4,,0,,,,,25N27W,4,547295,148033,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,5,0.0,SANDERS,MT,,05/01 to 10/31,05/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,2019-10-30,N,2592225.0,6440.128506,-115.085915,47.952505,"POLYGON ((-115.09665 47.94526, -115.09667 47.9..."
1,2155835,25N27W3,,0,,,,,25N27W,3,547296,148034,76N116400 00,;204783-1;,76N,76N 116400 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,WEYERHAEUSER COMPANY,ORIG,SURFACE,INDIAN CREEK,50.0,0.11,1.0,0.0,FIRE PROTECTION,4,0.0,SANDERS,MT,,05/01 to 10/31,05/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204783-1,2019-10-30,N,2595401.0,6443.821302,-115.064419,47.952462,"POLYGON ((-115.07516 47.94522, -115.07517 47.9..."
2,2942909,25N26W6,,0,,,,,25N26W,6,547299,148037,76N116396 00,;204777-1;,76N,76N 116396 00,STATEMENT OF CLAIM,ACTIVE,1955-12-31,"MONTANA, STATE OF DEPT OF FISH WILDLIFE & PARK...",ORIG,SURFACE,THOMPSON RIVER,50.0,0.11,2.0,0.0,FIRE PROTECTION,53,0.0,SANDERS,MT,,04/01 to 10/31,04/01,10/31,,http://wr.dnrc.mt.gov/reports/rwservlet?dnrcwr...,204777-1,2019-10-30,N,2548555.0,6384.60915,-114.999982,47.952443,"POLYGON ((-115.01055 47.94518, -115.01058 47.9..."


In [21]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['OBJECTID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POU2301927,"POLYGON ((-115.09665 47.94526, -115.09667 47.9..."
1,POU2155835,"POLYGON ((-115.07516 47.94522, -115.07517 47.9..."
2,POU2942909,"POLYGON ((-115.01055 47.94518, -115.01058 47.9..."


In [22]:
# # Concatenate dataframes
# frames = [dfPODshape, dfPoUshape]
# dfshape = pd.concat(frames)
# print(len(dfshape))

## Export Data

In [23]:
# Export out to CSV.
outdf.to_csv('P_MontanaMaster.csv', index=False) # The output.
dfPoUshape.to_csv('P_MontanaGeometry.csv', index=False) # The output geometry.