# Pre-processing Oregon Allocation data for WaDEQA upload.
Date Updated: 11/03/2020
Purpose:  To pre-process the Oregon data into one master file for simple DataFrame creation and extraction

Useful Links to Data:

1) Data Avalaible (use 'Statewide Water Right Spatial Data with Metadata'): https://www.oregon.gov/OWRD/access_Data/Pages/Data.aspx

2) POD metadata: https://arcgis.wrd.state.or.us/data/wr_pod_metadata.pdf

3) PoU metadata: https://arcgis.wrd.state.or.us/data/wr_pou_metadata.pdf

In [1]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Oregon/WaterAllocation/RawInputData"
os.chdir(workingDir)
PoDFile = "ORwr_v_pod_public_input.csv"  # contains PoD info

In [3]:
# Dataframe creation
df = pd.read_csv(PoDFile, encoding = "ISO-8859-1") #Input

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
df

Unnamed: 0,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,app_nbr,permit_char,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_title,transfer_nbr,wr_type,name_last,name_first,name_company,pod_nbr,pod_char,source_type,use_code,use_category,use_code_description,priority_date,duty,rate_cfs,rate_cfs_est,max_rate_cfs,acre_feet,acre_feet_est,max_rate_acre_feet,source,tributary_to,streamcode,stream_name,supplemental,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,POINT_X,POINT_Y
0,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,11987.0,G,10961.0,,,,,,GW,,,FORMOSA EXPLORATION INC.,1,,WE,MI,0,MINING,11/21/1989 0:00:00,,0.040,0,0.040,,0,,FORMOSA 1 ADIT ...,CANYON CREEK,16125009000400430220,UNN STR > MIDDLE CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/1996 0:00:00,6/1/1996 0:00:00,,0 G 10961 1,5.394125e+05,4.167058e+05
1,2,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6910,26860,G,11987.0,G,10961.0,,,,,,GW,,,FORMOSA EXPLORATION INC.,2,,WE,MI,0,MINING,11/21/1989 0:00:00,,0.005,0,0.005,,0,,SILVER BUTTE 1 ADIT ...,CANYON CREEK,16125009000400430220,UNN STR > MIDDLE CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/1996 0:00:00,6/1/1996 0:00:00,,0 G 10961 2,5.392329e+05,4.162520e+05
2,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,13684.0,G,12684.0,,,,,,GW,,,DUTCH MINING LLC,1,,WE,MI,0,MINING,5/6/1994 0:00:00,,0.110,1,0.220,,0,,A WELL ...,ROGUE RIVER,15168010500040,DRAIN CR > WHISKY CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/2001 0:00:00,6/1/2001 0:00:00,,0 G 12684 1,4.709328e+05,3.559155e+05
3,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,13944.0,G,12750.0,,,,,,GW,STEYAERT,JEFF,KNIFE RIVER CORP.,1,,WE,MI,0,MINING,1/18/1995 0:00:00,,0.670,0,0.670,,0,,A WELL,TROUT CREEK BASIN,05198012200060050,TROUT CR > INDIAN FORD CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,5/1/2001 0:00:00,5/1/2001 0:00:00,,0 G 12750 1,1.010124e+06,9.319935e+05
4,5,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29836,G,13944.0,G,12750.0,,,,,,GW,STEYAERT,JEFF,KNIFE RIVER CORP.,1,,WE,MI,0,MINING,1/18/1995 0:00:00,,0.110,0,0.110,,0,,A WELL,TROUT CREEK BASIN,05198012200060050,TROUT CR > INDIAN FORD CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,5/1/2001 0:00:00,5/1/2001 0:00:00,,0 G 12750 1,1.010124e+06,9.319935e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189817,189818,Permit: R 15430 * WI,R 15430,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,202390,344799,786691,R,88456.0,R,15430.0,,,,,,ST,URSTADT,ERIC ANTHONY,,1,,WR,WI,W,WILDLIFE,9/5/2017 0:00:00,,,0,,1.5375,1,12.3,RUNOFF,EAST FORK DAIRY CREEK,02114003000480080,E FK DAIRY CR > DAIRY CR,0,12.0,1.0,12.0,31.0,SLB,OWRD,2/19/2020 0:00:00,2/20/2020 15:10:24,30.0,MEASURED DISTANCE; MOVED RELATIVE TO POU,6.532996e+05,1.447329e+06
189818,189819,Permit: R 15430 * WI,R 15430,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,202390,344799,786692,R,88456.0,R,15430.0,,,,,,ST,URSTADT,ERIC ANTHONY,,1,,WR,WI,W,WILDLIFE,9/5/2017 0:00:00,,,0,,1.5375,1,12.3,RUNOFF,EAST FORK DAIRY CREEK,02114003000480080,E FK DAIRY CR > DAIRY CR,0,1.0,1.0,4.0,30.0,SLB,OWRD,2/19/2020 0:00:00,2/20/2020 15:10:24,30.0,MEASURED DISTANCE; MOVED RELATIVE TO POU,6.532996e+05,1.447329e+06
189819,189820,Permit: R 15430 * WI,R 15430,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,202390,344799,786699,R,88456.0,R,15430.0,,,,,,ST,URSTADT,ERIC ANTHONY,,1,,WR,WI,W,WILDLIFE,9/5/2017 0:00:00,,,0,,4.1000,1,12.3,RUNOFF,EAST FORK DAIRY CREEK,02114003000480080,E FK DAIRY CR > DAIRY CR,0,5.0,1.0,11.0,30.0,SLB,OWRD,2/19/2020 0:00:00,2/20/2020 15:10:24,30.0,MEASURED DISTANCE; MOVED RELATIVE TO POU,6.532996e+05,1.447329e+06
189820,189821,Permit: R 13003 * WI,R 13003,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,33683,34240,787503,R,84451.0,R,13003.0,,,,,,ST,MADDOX,ALTON,,2,,WE,WI,W,WILDLIFE,6/10/2000 0:00:00,,,0,,0.2187,1,3.5,A WELL,ABERNATHY CREEK,02114002500150,UNN STR > ABERNETHY CR,0,1.0,1.0,6.0,30.0,SLB,OWRD,3/11/2020 0:00:00,3/11/2020 14:04:21,30.0,PLACED USING 2018 IMAGERY,8.075046e+05,1.302937e+06


In [6]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

OBJECTID                  int64
pod_display              object
pod_display_short        object
wris_link                object
snp_id                    int64
pod_location_id           int64
pod_use_id                int64
app_char                 object
app_nbr                 float64
permit_char              object
permit_nbr              float64
cert_nbr                float64
claim_char               object
claim_nbr               float64
decree_title             object
transfer_nbr             object
wr_type                  object
name_last                object
name_first               object
name_company             object
pod_nbr                   int64
pod_char                 object
source_type              object
use_code                 object
use_category             object
use_code_description     object
priority_date            object
duty                    float64
rate_cfs                float64
rate_cfs_est              int64
max_rate_cfs            float64
acre_fee

In [7]:
# Create Purge dataframe to note data that was removed for WaDE purposes.
columnslist = df.columns.tolist()
dfpurge = pd.DataFrame(columns=columnslist)  # purge DataFrame
dfpurge = dfpurge.assign(ReasonRemoved='')
dfpurge

Unnamed: 0,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,app_nbr,permit_char,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_title,transfer_nbr,wr_type,name_last,name_first,name_company,pod_nbr,pod_char,source_type,use_code,use_category,use_code_description,priority_date,duty,rate_cfs,rate_cfs_est,max_rate_cfs,acre_feet,acre_feet_est,max_rate_acre_feet,source,tributary_to,streamcode,stream_name,supplemental,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,POINT_X,POINT_Y,ReasonRemoved


In [8]:
# Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [10]:
# Removing NaN, and missing (999) values from max_rate_acre_feet.  Need an amount value for WaDE.
mask = df.loc[ (df['max_rate_acre_feet'] == 999) | (df['max_rate_acre_feet'].isnull()) ].assign(ReasonRemoved='Null max_rate_acre_feet').reset_index()
if len(mask.index) > 0:
    dfpurge = dfpurge.append(mask)  # Append to purge DataFrame
    dropIndex = df.loc[ (df['max_rate_acre_feet'] == 999) | (df['max_rate_acre_feet'].isnull()) ].index
    df = df.drop(dropIndex)
    df = df.reset_index(drop=True)
df.head(3)

Unnamed: 0,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,app_nbr,permit_char,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_title,transfer_nbr,wr_type,name_last,name_first,name_company,pod_nbr,pod_char,source_type,use_code,use_category,use_code_description,priority_date,duty,rate_cfs,rate_cfs_est,max_rate_cfs,acre_feet,acre_feet_est,max_rate_acre_feet,source,tributary_to,streamcode,stream_name,supplemental,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,POINT_X,POINT_Y
0,42,Permit: S 53648 * MI,S 53648,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,51887,47388,56626,S,84101.0,S,53648.0,,,,,,SW,,,"KNAPP RANCHES INC.; KNAPP, JEFF",1,,RS,MI,0,MINING,1999-03-24,2.5,0.0,0,0.0,40.0,0,40.0,A RESERVOIR ...,ELK RIVER,171590,ELK R > PACIFIC OCEAN,0,1.0,1.0,12.0,31.0,KLS,OWRD,10/26/2007 12:42:34,,10.0,Automapped as center of the envelope for the T...,236258.299869,403105.375
1,94,Cert:3592 OR * MI,3592,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,55967,52723,61998,R,7012.0,R,467.0,3592.0,,,,,ST,LEHMAN,DAVID,,1,,ST,MI,0,MINING,1920-01-22,,0.0,0,0.0,3.0,0,3.0,WOLF CREEK ...,GRAVE CREEK,15168011100150,WOLF CR > GRAVE CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/2001 0:00:00,6/1/2001 0:00:00,,3592 R 467 1,566863.041667,365284.141732
2,385,Cert:12006 OR * MI,12006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,64395,64098,74743,R,15462.0,R,675.0,12006.0,,,,,ST,MAGERLE,CARLOS A,,1,,ST,MI,0,MINING,1934-08-07,,0.0,0,0.0,5.7,0,5.7,WARD CREEK ...,ROGUE RIVER,1516801750,WARD CR > ROGUE R,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/2001 0:00:00,6/1/2001 0:00:00,,12006 R 675 1,608290.608268,281559.799869


In [11]:
# Removing NaN, and missing (999) values from rate_cfs.  Need an amount value for WaDE.
mask = df.loc[ (df['rate_cfs'] == 999) | (df['rate_cfs'].isnull()) ].assign(ReasonRemoved='Null rate_cfs').reset_index()
if len(mask.index) > 0:
    dfpurge = dfpurge.append(mask)  # Append to purge DataFrame
    dropIndex = df.loc[ (df['rate_cfs'] == 999) | (df['rate_cfs'].isnull()) ].index
    df = df.drop(dropIndex)
    df = df.reset_index(drop=True)
df.head(3)

Unnamed: 0,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,app_nbr,permit_char,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_title,transfer_nbr,wr_type,name_last,name_first,name_company,pod_nbr,pod_char,source_type,use_code,use_category,use_code_description,priority_date,duty,rate_cfs,rate_cfs_est,max_rate_cfs,acre_feet,acre_feet_est,max_rate_acre_feet,source,tributary_to,streamcode,stream_name,supplemental,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,POINT_X,POINT_Y
0,42,Permit: S 53648 * MI,S 53648,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,51887,47388,56626,S,84101.0,S,53648.0,,,,,,SW,,,"KNAPP RANCHES INC.; KNAPP, JEFF",1,,RS,MI,0,MINING,1999-03-24,2.5,0.0,0,0.0,40.0,0,40.0,A RESERVOIR ...,ELK RIVER,171590,ELK R > PACIFIC OCEAN,0,1.0,1.0,12.0,31.0,KLS,OWRD,10/26/2007 12:42:34,,10.0,Automapped as center of the envelope for the T...,236258.299869,403105.375
1,94,Cert:3592 OR * MI,3592,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,55967,52723,61998,R,7012.0,R,467.0,3592.0,,,,,ST,LEHMAN,DAVID,,1,,ST,MI,0,MINING,1920-01-22,,0.0,0,0.0,3.0,0,3.0,WOLF CREEK ...,GRAVE CREEK,15168011100150,WOLF CR > GRAVE CR,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/2001 0:00:00,6/1/2001 0:00:00,,3592 R 467 1,566863.041667,365284.141732
2,385,Cert:12006 OR * MI,12006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,64395,64098,74743,R,15462.0,R,675.0,12006.0,,,,,ST,MAGERLE,CARLOS A,,1,,ST,MI,0,MINING,1934-08-07,,0.0,0,0.0,5.7,0,5.7,WARD CREEK ...,ROGUE RIVER,1516801750,WARD CR > ROGUE R,0,1.0,1.0,12.0,31.0,MIGRT,OWRD,6/1/2001 0:00:00,6/1/2001 0:00:00,,12006 R 675 1,608290.608268,281559.799869


In [12]:
#Determining Company vs Individual. Concatenating name of individual.
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = ", ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


def assignownerNameORCompany(colrowValue1, colrowValue2, colrowValue3):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList = assignownerName(colrowValue2, colrowValue3)
    else:
        outList = colrowValue1
    return outList

df['Owner'] = df.apply(lambda row: assignownerNameORCompany(row['name_company'], row['name_last'], row['name_first']), axis=1)

In [13]:
#Determining AllocationTimeframe Start & End time for each site.

def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString1)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

df['AllocationTimeframeStart'] = df.apply(lambda row: formatDateString(row['begin_month'], row['begin_day']), axis=1)
df['AllocationTimeframeEnd'] = df.apply(lambda row: formatDateString(row['end_month'], row['end_day']), axis=1)

In [14]:
#Fixing Beneficial Uses PRIMARY_PURPOSE
def fixBenUse(colrowValue):
    if colrowValue == 'IRRIGATION, LIVESTOCK AND DOMESTIC':
        outList = 'IRRIGATION LIVESTOCK AND DOMESTIC'
    else:
        outList = colrowValue
    return outList

df['use_code_description'] = df.apply(lambda row: fixBenUse(row['use_code_description']), axis=1)

In [17]:
#Exporting to Finished File
df.to_csv('P_OregonMaster.csv', index=False)  # The output
dfpurge.to_csv('inputOregonDataRemoved.csv', index=False)  # Error check for states to see why we removed certaind data.