# Pre-processing Idaho Allocation data for WaDEQA upload.
Date Updated: 02/28/2020
Purpose:  To pre-process the Idaho data into one master file for simple DataFrame creation and extraction.  Working Idaho data for WaDEQA 2.0 is mostly composed of point of diversion data.

Useful Links to Data:
Point of Diversion (POD): (download spreadsheet)
https://data-idwr.opendata.arcgis.com/datasets/water-right-pods

Place of Use (PoU): Water Right (download the Zipped Files). See metadata in the View. Open WaterRightPOUs.dbf into Excel/load it into pandas Python 
https://data-idwr.opendata.arcgis.com/pages/gis-data#WaterRights

Seasons of Use (SoU):  contains begin and end data of water use, split into water basins
https://data-idwr.opendata.arcgis.com/datasets/282b354f586144e596e309e09dd16a00_2

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input Files
workingDir = "C:/Users/rjame/Documents/WSWC Documents/ID Project/data_dont open"
os.chdir(workingDir)
FI_POD = "Water_Right_PODs.csv"
FI_PoU = "Water_Right_PoUs.csv"
FI_PODSOU = "WaterRights_POD_SOU_SpatialJoin.csv"

In [3]:
#Dataframe creation
df_POD = pd.read_csv(filepath_or_buffer=FI_POD)
df_PoU = pd.read_csv(FI_PoU)
df_PODSOU = pd.read_csv(FI_PODSOU)
df = df_POD

  interactivity=interactivity, compiler=compiler, result=result)


In [14]:
#Chaning datatype of Priority Date
#df['PriorityDate'] = pd.to_datetime(df['PriorityDate'], errors = 'coerce')
df['PriorityDate'] = pd.to_datetime(df['PriorityDate'])
df['PriorityDate'] = pd.to_datetime(df["PriorityDate"].dt.strftime('%m/%d/%Y'))

In [6]:
#Adding BenificalUse Column
def assignBenUse(colrowValue, df_PoU):
    outlist = df_PoU.loc[(df_PoU['RightID'] == colrowValue), 'WaterUse'].str.title().tolist()
    return outlist

df['BeneficialUseCategoryCV'] = df.apply(lambda row: assignBenUse(row['RightID'], df_PoU), axis=1)

In [7]:
# Compiling 'AllocationTimeframeStart' & 'AllocationTimeframeEnd'
# Both can have a string format for WaDE 2.0.

df_PODSOU['STARTMONTH'] = df_PODSOU['STARTMONTH'].values.astype(np.int64)
df_PODSOU['STARTDAY'] = df_PODSOU['STARTDAY'].values.astype(np.int64)
df_PODSOU['STARTMONTH_str'] = df_PODSOU['STARTMONTH'].values.astype(str)
df_PODSOU['STARTDAY_str'] = df_PODSOU['STARTDAY'].values.astype(str)

df_PODSOU['ENDMONTH'] = df_PODSOU['ENDMONTH'].values.astype(np.int64)
df_PODSOU['ENDDAY'] = df_PODSOU['ENDDAY'].values.astype(np.int64)
df_PODSOU['ENDMONTH_str'] = df_PODSOU['ENDMONTH'].values.astype(str)
df_PODSOU['ENDDAY_str'] = df_PODSOU['ENDDAY'].values.astype(str)

df_PODSOU['AllocationTimeframeStart'] = df_PODSOU['STARTMONTH_str'] + "/" + df_PODSOU['STARTDAY_str']
df_PODSOU['AllocationTimeframeEnd'] = df_PODSOU['ENDMONTH_str'] + "/" + df_PODSOU['ENDDAY_str']

df['AllocationTimeframeStart'] = df_PODSOU['AllocationTimeframeStart']
df['AllocationTimeframeEnd'] = df_PODSOU['AllocationTimeframeEnd']

In [8]:
# Charning string text to be in .title() format of various columns.
df['DiversionName'] = df['DiversionName'].str.title()
df['Owner'] = df['Owner'].str.title()
df['Source'] = df['Source'].str.title()
df['SourceQualifier'] = df['SourceQualifier'].str.title()
df['TributaryOf'] = df['TributaryOf'].str.title()
df['TributaryOfQualifier'] = df['TributaryOfQualifier'].str.title()

In [9]:
#Sort Dataframe by RightID and columns alphebeticallly.
df = df.sort_values(by=['RightID'])
df = df.reindex(sorted(df.columns), axis=1)

In [10]:
df.columns

Index(['AllocationTimeframeEnd', 'AllocationTimeframeStart', 'BasinNumber',
       'Basis', 'BeneficialUseCategoryCV', 'DataSource', 'DiversionName',
       'DiversionType', 'MetalTagNumber', 'OBJECTID',
       'OverallMaxDiversionRate', 'Owner', 'PointOfDiversionID',
       'PriorityDate', 'RightID', 'SequenceNumber', 'Source',
       'SourceQualifier', 'SpatialDataID', 'SplitSuffix', 'Status',
       'TributaryOf', 'TributaryOfQualifier', 'VersionNumber', 'WRDocs',
       'WRMap', 'WRReport', 'WaterDistrictNumber', 'WaterRightNumber', 'X',
       'Y'],
      dtype='object')

In [11]:
df.dtypes

AllocationTimeframeEnd              object
AllocationTimeframeStart            object
BasinNumber                          int64
Basis                               object
BeneficialUseCategoryCV             object
DataSource                          object
DiversionName                       object
DiversionType                       object
MetalTagNumber                      object
OBJECTID                             int64
OverallMaxDiversionRate            float64
Owner                               object
PointOfDiversionID                   int64
PriorityDate                datetime64[ns]
RightID                              int64
SequenceNumber                       int64
Source                              object
SourceQualifier                     object
SpatialDataID                      float64
SplitSuffix                         object
Status                              object
TributaryOf                         object
TributaryOfQualifier                object
VersionNumb

In [15]:
df

Unnamed: 0,AllocationTimeframeEnd,AllocationTimeframeStart,BasinNumber,Basis,BeneficialUseCategoryCV,DataSource,DiversionName,DiversionType,MetalTagNumber,OBJECTID,OverallMaxDiversionRate,Owner,PointOfDiversionID,PriorityDate,RightID,SequenceNumber,Source,SourceQualifier,SpatialDataID,SplitSuffix,Status,TributaryOf,TributaryOfQualifier,VersionNumber,WRDocs,WRMap,WRReport,WaterDistrictNumber,WaterRightNumber,X,Y
237177,10/31,4/1,63,License,[Irrigation],QQ,,,,237178,0.19,Maurice Mc Clue,16,1989-06-21,22,10824,Ground Water,,482579.0,,Active,,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,TBD,63-10824,2.305278e+06,1.390599e+06
27749,11/15,3/1,67,License,[Stockwater],QQ,,,,27750,0.05,United States Of America Acting Through,122,1992-05-27,130,7744,Spring,,50203.0,,Active,Sinks,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,TBD,67-7744,2.301183e+06,1.453358e+06
7480,11/15,3/1,63,License,[Commercial],QQ,,,,7481,0.13,James P Mertz,160,1990-02-28,175,11206,Ground Water,,8421.0,,Active,,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,NWD,63-11206,2.268087e+06,1.383476e+06
72892,11/15,3/15,36,License,[Irrigation],GPS - Downloaded,,,A0003417,72893,1.20,Southfield Land & Livestock Llc,185,1986-08-20,207,8313,Ground Water,,137260.0,A,Active,,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,130,36-8313A,2.435239e+06,1.284883e+06
72893,10/31,5/1,36,License,[Irrigation],GPS - Downloaded,,,A0003409,72894,1.20,Southfield Land & Livestock Llc,186,1986-08-20,207,8313,Ground Water,,137261.0,A,Active,,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,130,36-8313A,2.435239e+06,1.284708e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193106,10/31,4/1,75,Decreed,[],GPS - Downloaded,,,A0015049,193107,0.11,William C Bernt,940300,1899-06-29,696604,14972,Tower Creek,,365647.0,,Active,Salmon Creek,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,NWD,75-14972,2.510776e+06,1.572809e+06
193045,10/31,4/1,75,Decreed,[],GPS - Downloaded,,,A0015054,193046,0.11,William C Bernt,940301,1899-06-29,696604,14972,Tower Creek,,365597.0,,Active,Salmon Creek,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,NWD,75-14972,2.510922e+06,1.573219e+06
193112,10/31,4/1,75,Decreed,[],GPS - Downloaded,,,A0015050,193113,0.04,William C Bernt,940305,1909-12-10,696605,14973,Tower Creek,,365648.0,,Active,Salmon Creek,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,NWD,75-14973,2.510770e+06,1.572753e+06
193046,10/31,4/1,75,Decreed,[],GPS - Downloaded,,,A0015054,193047,0.04,William C Bernt,940304,1909-12-10,696605,14973,Tower Creek,,365597.0,,Active,Salmon Creek,,0,https://www.idwr.idaho.gov/apps/ExtSearch/Rela...,https://maps.idwr.idaho.gov/Map/IDWRlayout?Bas...,https://www.idwr.idaho.gov/apps/ExtSearch/Righ...,NWD,75-14973,2.510922e+06,1.573219e+06


In [16]:
#Exporting to Finished File
df.to_csv('P_IdahoMaster.csv', index=False)  # The output