# Pre-processing Utah Allocation data for WaDEQA upload.
Date Updated: 03/13/2020
Purpose:  To pre-process the Utah data into one master file for simple DataFrame creation and extraction

Useful Links to Data:
The Utah Division of Water Rights (UTDWR) publishes its water right data on-demand through the PUBDUMP Database table dump Utility available at (Download the WATER_MASTER table) 
https://www.waterrights.utah.gov/cgi-bin/pubdump.exe?DBNAME=WRDB&SECURITYKEY=wrt2012access

Contact infomratoin can be found at:
https://www.waterrights.utah.gov/contact.asp

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input Files
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah\WaterAllocation/RawInputData"
os.chdir(workingDir)
FI_Master = "WRCHEX_WATER_MASTER.csv"
FI_PoD = "WRCHEX_POINTS_OF_DIVERSION.csv"
FI_Own = "OWNERS.csv"
FI_Irr = "IRRIGATION_MASTER.csv"
FI_Mun = "WTRUSE_MUNICIPAL.csv"
FI_Pow = "WTRUSE_POWER.csv"

In [3]:
#Dataframe creation
df_MS = pd.read_csv(FI_Master, encoding = "ISO-8859-1") #Input
df_PoD = pd.read_csv(FI_PoD, encoding = "ISO-8859-1") #Input
df_Own = pd.read_csv(FI_Own, encoding = "ISO-8859-1") #Input
df_Irr = pd.read_csv(FI_Irr, encoding = "ISO-8859-1") #Input
df_Mun = pd.read_csv(FI_Mun, encoding = "ISO-8859-1") #Input
df_Pow = pd.read_csv(FI_Pow, encoding = "ISO-8859-1") #Inpu
df = pd.DataFrame() #Output

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
#Merging dataframes into one, using left-join.
df = pd.merge(df_MS, df_PoD, left_on='WRNUM', right_on='WRCHEX', how='outer') # Joinning PoD data
df = pd.merge(df,    df_Own, left_on='WRNUM', right_on='WRCHEX', how='outer') # Joinning Owner data
df = pd.merge(df,    df_Irr, left_on='WRNUM', right_on='WRNUM', how='outer') #Joinning Irrigaton data
df = pd.merge(df,    df_Mun, left_on='WRNUM', right_on='WRNUM', how='outer')  #Joinning Municipal data
df = pd.merge(df,    df_Pow, left_on='WRNUM', right_on='WRNUM', how='outer')  #Joinning Power data

In [7]:
#Changing datatype of used date fields. 
df['DATE_PRIORITY'] = pd.to_datetime(df['DATE_PRIORITY'], errors = 'coerce')
df['DATE_PRIORITY'] = pd.to_datetime(df["DATE_PRIORITY"].dt.strftime('%m/%d/%Y'))

df['DATE_TERMINATED'] = pd.to_datetime(df['DATE_TERMINATED'], errors = 'coerce')
df['DATE_TERMINATED'] = pd.to_datetime(df["DATE_TERMINATED"].dt.strftime('%m/%d/%Y'))

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

In [8]:
#Making Sure datatype of Long, Lat, Wrex, Irrigation are Float
df['X_UTM'] = pd.to_numeric(df['X_UTM'], errors='coerce')
df['Y_UTM'] = pd.to_numeric(df['Y_UTM'], errors='coerce')
df['WREX_CFS'] = pd.to_numeric(df['WREX_CFS'], errors='coerce')
df['WREX_ACFT'] = pd.to_numeric(df['WREX_ACFT'], errors='coerce')
df['IRRIGATION_DEPLETION'] = pd.to_numeric(df['IRRIGATION_DEPLETION'], errors='coerce')
df['IRRIGATION_ACREAGE'] = pd.to_numeric(df['IRRIGATION_ACREAGE'], errors='coerce')

In [9]:
#Removing Empty Y_UTM and X_UTM rows.
dropIndex = df.loc[(df['X_UTM'].isnull()) | (df['X_UTM'] == '') |
                      (df['Y_UTM'].isnull()) | (df['Y_UTM'] == '')].index
if len(dropIndex) > 0:
    df = df.drop(dropIndex)
    df = df.reset_index(drop=True)

  res_values = method(rvalues)


In [10]:
#Removing empty DATE_PRIORITY fields.
dropIndex = df.loc[(df['DATE_PRIORITY'].isnull()) | (df['DATE_PRIORITY'] == '')].index

if len(dropIndex) > 0:
    df = df.drop(dropIndex)
    df = df.reset_index(drop=True)

In [11]:
#Removing empty AllocationAmount and AllocationMaxium fields.
dropIndex = df.loc[((df['WREX_CFS'].isnull()) | (df['WREX_CFS'] == '')) &
                   ((df['WREX_ACFT'].isnull()) | (df['WREX_ACFT'] == ''))].index

if len(dropIndex) > 0:
    df = df.drop(dropIndex)
    df = df.reset_index(drop=True)

In [13]:
#Compiling 'AllocationTimeframeStart' & 'AllocationTimeframeEnd'
#Both can have a string format for WaDE 2.0.
    
def assignTime(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outlist = ""
    else:
        colrowValue = str(colrowValue)
        colrowValue = colrowValue.strip()
        if len(colrowValue) == 4:
            startmonth = colrowValue[0:1]
            startday = colrowValue[1:2]
            outlist = "0" + startmonth + "/" + "0" + startday
        elif len(colrowValue) == 5:
            startmonth = colrowValue[0:1]
            startday = colrowValue[1:3]
            outlist = "0" + startmonth + "/" + startday
        elif len(colrowValue) == 6:
            startmonth = colrowValue[0:2]
            startday = colrowValue[2:4]
            outlist = startmonth + "/" + startday
        else:
            outlist = ""
  
    return outlist


df['AllocationTimeframeStart'] = df.apply(lambda row: assignTime(row['USE_BEG_DATE_x']), axis=1)
df['AllocationTimeframeEnd'] = df.apply(lambda row: assignTime(row['USE_END_DATE_x']), axis=1)

In [None]:
#Creating Singular location value by concatinating other cells.

def assignSiteLocation(vWRNUM, vA, vB, vC, vD, vE):
    vWRNUM = str(vWRNUM)
    vWRNUM = vWRNUM.strip()
    
    if vA == "" or pd.isnull(vA):
        vA = ""
    else:
        vA = str(vA)
        vA = vA.strip()

    if vB == "" or pd.isnull(vB):
        vB = "0"
    else:
        vB = int(vB)
        vB = str(vB)
        vB = vB.strip()
    
    if vC == "" or pd.isnull(vC):
        vC = ""
    else:
        vC = str(vC)
        vC = vC.strip()

    if vD == "" or pd.isnull(vD):
        vD = "0"
    else:
        vD = int(vD)
        vD = str(vD)
        vD = vD.strip()
        
    if vE == "" or pd.isnull(vE):
        vE = ""
    else:
        vE = str(vE)
        vE = vE.strip()
    
    outlist = vWRNUM + "_" + vA + vB + vC + vD + vE
    outlist = outlist.strip()
    
    return outlist

df['SiteLocation'] = df.apply(lambda row: assignSiteLocation(row['WRNUM'], 
                                                             row['NS_DIRECTION'], 
                                                             row['NS_DISTANCE'], 
                                                             row['EW_DIRECTION'], 
                                                             row['EW_DISTANCE'], 
                                                             row['SECTION_CORNER']), axis=1).reset_index()

In [None]:
#Sort Dataframe by RightID and columns alphebeticallly.
df = df.sort_values(by=['WRNUM'])
df = df.reindex(sorted(df.columns), axis=1)

In [None]:
#Removing all NaN Values and replacing with blank
df = df.replace(np.nan, '', regex=True)

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
df

In [None]:
#Exporting to Finished File
df.to_csv('P_UtahMaster.csv', index=False)  # The output