# Pre-processing Utah Allocation data for WaDEQA upload.
Date Updated: 03/13/2020
Purpose:  To pre-process the Utah data into one master file for simple DataFrame creation and extraction

Useful Links to Data:
The Utah Division of Water Rights (UTDWR) publishes its water right data on-demand through the PUBDUMP Database table dump Utility available at (Download the WATER_MASTER table) 
https://www.waterrights.utah.gov/cgi-bin/pubdump.exe?DBNAME=WRDB&SECURITYKEY=wrt2012access

Contact infomratoin can be found at:
https://www.waterrights.utah.gov/contact.asp

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input Files
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah/WaterAllocation/RawInputData"
os.chdir(workingDir)
FI_Master = "WRCHEX_WATER_MASTER.csv"
FI_PoD = "WRCHEX_POINTS_OF_DIVERSION.csv"
FI_Own = "OWNERS.csv"
FI_Irr = "IRRIGATION_MASTER.csv"
FI_Mun = "WTRUSE_MUNICIPAL.csv"
FI_Pow = "WTRUSE_POWER.csv"

In [3]:
#Dataframe creation
df_MS = pd.read_csv(FI_Master, encoding = "ISO-8859-1") #Input
df_PoD = pd.read_csv(FI_PoD, encoding = "ISO-8859-1") #Input
df_Own = pd.read_csv(FI_Own, encoding = "ISO-8859-1") #Input
df_Irr = pd.read_csv(FI_Irr, encoding = "ISO-8859-1") #Input
df_Mun = pd.read_csv(FI_Mun, encoding = "ISO-8859-1") #Input
df_Pow = pd.read_csv(FI_Pow, encoding = "ISO-8859-1") #Inpu
df = pd.DataFrame() #Output

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
#Merging dataframes into one, using left-join.
df = pd.merge(df_MS, df_PoD, left_on='WRNUM', right_on='WRCHEX', how='outer') # Joinning PoD data
df = pd.merge(df,    df_Own, left_on='WRNUM', right_on='WRCHEX', how='outer') # Joinning Owner data
df = pd.merge(df,    df_Irr, left_on='WRNUM', right_on='WRNUM', how='outer') # Joinning Irrigaton data
df = pd.merge(df,    df_Mun, left_on='WRNUM', right_on='WRNUM', how='outer')  # Joinning Municipal data
df = pd.merge(df,    df_Pow, left_on='WRNUM', right_on='WRNUM', how='outer')  # Joinning Power data

In [5]:
#Changing datatype of used date fields. 
df['DATE_PRIORITY'] = pd.to_datetime(df['DATE_PRIORITY'], errors = 'coerce')
df['DATE_PRIORITY'] = pd.to_datetime(df["DATE_PRIORITY"].dt.strftime('%m/%d/%Y'))

df['DATE_TERMINATED'] = pd.to_datetime(df['DATE_TERMINATED'], errors = 'coerce')
df['DATE_TERMINATED'] = pd.to_datetime(df["DATE_TERMINATED"].dt.strftime('%m/%d/%Y'))

In [6]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

WRNUM                                object
APPL_CLAIM_NUMBER                    object
CERTIFICATE_NUMBER                   object
LAND_OWNERSHIP                       object
WREX_CFS                             object
WREX_ANDOR                           object
WREX_ACFT                           float64
WREX_SOURCE                          object
WREX_COUNTY                          object
COMMON_DESCRIPTION                   object
DATE_FILED                           object
TAKEN_BY_INITIALS                    object
DATE_PRIORITY                datetime64[ns]
ADV_NEWSPAPER                        object
DATE_PUB_BEGAN                      float64
DATE_PROOF_PUB                       object
PROTESTED                            object
DATE_PROTEST_END                     object
MEMO_DECISION                        object
DATE_APPROVED_REJECTED               object
APPROVED_REJECTED                    object
DATE_PROOF_DUE                       object
DATE_EXTENSION_FILED            

In [7]:
#Making Sure datatype of Long, Lat, Wrex, Irrigation are Float
df['X_UTM'] = pd.to_numeric(df['X_UTM'], errors='coerce')
df['Y_UTM'] = pd.to_numeric(df['Y_UTM'], errors='coerce')
df['WREX_CFS'] = pd.to_numeric(df['WREX_CFS'], errors='coerce')
df['WREX_ACFT'] = pd.to_numeric(df['WREX_ACFT'], errors='coerce')
df['IRRIGATION_DEPLETION'] = pd.to_numeric(df['IRRIGATION_DEPLETION'], errors='coerce')
df['IRRIGATION_ACREAGE'] = pd.to_numeric(df['IRRIGATION_ACREAGE'], errors='coerce')

In [8]:
#Compiling 'AllocationTimeframeStart' & 'AllocationTimeframeEnd'
#Both can have a string format for WaDE 2.0.
    
def assignTime(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outlist = ""
    else:
        colrowValue = str(colrowValue)
        colrowValue = colrowValue.strip()
        if len(colrowValue) == 4:
            startmonth = colrowValue[0:1]
            startday = colrowValue[1:2]
            outlist = "0" + startmonth + "/" + "0" + startday
        elif len(colrowValue) == 5:
            startmonth = colrowValue[0:1]
            startday = colrowValue[1:3]
            outlist = "0" + startmonth + "/" + startday
        elif len(colrowValue) == 6:
            startmonth = colrowValue[0:2]
            startday = colrowValue[2:4]
            outlist = startmonth + "/" + startday
        else:
            outlist = ""
  
    return outlist


df['AllocationTimeframeStart'] = df.apply(lambda row: assignTime(row['USE_BEG_DATE_x']), axis=1)
df['AllocationTimeframeEnd'] = df.apply(lambda row: assignTime(row['USE_END_DATE_x']), axis=1)

In [9]:
#SiteName
#Creating Singular location value by concatinating other cells.

def assignSiteLocation(vWRNUM, vA, vB, vC, vD, vE):
    
    #### vWRNUM ####
    vWRNUM = str(vWRNUM)
    vWRNUM = vWRNUM.strip()
    
    #### vA ####
    vA = str(vA)
    vA = vA.strip()

    #### vB ####
    vB = str(vB)
    vB = vB.strip()
        
    #### vC ####
    vC = str(vC)
    vC = vC.strip()

    #### vD ####
    vD = str(vD)
    vD = vD.strip()
    
    #### vE ####
    vE = str(vE)
    vE = vE.strip()
    
    #### outlist
    outlist = vWRNUM + "_" + vA + vB + vC + vD + vE
    outlist = outlist.strip()
    
    return outlist


df['SiteLocation'] = df.apply(lambda row: assignSiteLocation(row['WRNUM'], 
                                                             row['NS_DIRECTION'], 
                                                             row['NS_DISTANCE'], 
                                                             row['EW_DIRECTION'], 
                                                             row['EW_DISTANCE'], 
                                                             row['SECTION_CORNER']), axis=1)

In [10]:
#Sort Dataframe by RightID and columns alphebeticallly.
df = df.sort_values(by=['WRNUM'])
df = df.reindex(sorted(df.columns), axis=1)

In [11]:
#Removing all NaN Values and replacing with blank
df = df.replace(np.nan, '', regex=True)

In [12]:
df.columns

Index(['ACRE_FEET_EXPORTED', 'ACTIVE_LITIGATION', 'ADV_NEWSPAPER',
       'APPL_CLAIM_NUMBER', 'APPROVED_REJECTED', 'AREA_CODE',
       'ATTORNEY_RECORD_ID', 'AllocationTimeframeEnd',
       'AllocationTimeframeStart', 'BAD_ADDRESS',
       ...
       'WREX_CFS', 'WREX_COUNTY', 'WREX_SOURCE', 'WREX_STATUS', 'WRNUM',
       'WR_SOLE_SUPPLY', 'X_UTM', 'Y_UTM', 'recordId_x', 'recordId_y'],
      dtype='object', length=199)

In [13]:
df.dtypes

ACRE_FEET_EXPORTED    object
ACTIVE_LITIGATION     object
ADV_NEWSPAPER         object
APPL_CLAIM_NUMBER     object
APPROVED_REJECTED     object
                       ...  
WR_SOLE_SUPPLY        object
X_UTM                 object
Y_UTM                 object
recordId_x            object
recordId_y            object
Length: 199, dtype: object

In [14]:
df

Unnamed: 0,ACRE_FEET_EXPORTED,ACTIVE_LITIGATION,ADV_NEWSPAPER,APPL_CLAIM_NUMBER,APPROVED_REJECTED,AREA_CODE,ATTORNEY_RECORD_ID,AllocationTimeframeEnd,AllocationTimeframeStart,BAD_ADDRESS,CERTIFICATE_NUMBER,CERT_SIGNATURE,COMMON_DESCRIPTION,COUNTY_TAX_ID,DAM_PERMIT_REQUIRED,DATE_50YEAR_PERIOD,DATE_APPROVED_REJECTED,DATE_CERT_WUC_ISSUED,DATE_ELECTION_PROOF,DATE_EXTENSION_FILED,DATE_FIELD_REVIEW,DATE_FILED,DATE_HEARING_HELD,DATE_LAPSING_LETTER,DATE_PRIORITY,DATE_PROOF_DUE,DATE_PROOF_PUB,DATE_PROTEST_END,DATE_PUB_BEGAN,DATE_PUB_ENDED,DATE_RECON_REQUESTED,DATE_RENOVATED,DATE_RUSH_LETTER,DATE_TERMINATED,DATE_VERIFIED,DATE_WUCS_RECEIVED,DATE_WUC_SIGNED,DECREE_CLASS,DEPLETION_DUTY,DIVDEP_DOCUMENT_ID,DIVERSION_DUTY,DIVERSION_LIMIT,DIVERTING_WORKS,DOCUMENT_ENTERED,DOMESTIC_DEPLETION,DOMESTIC_DIVERSION,DSYSDB_NUMBER,ELECTION_PROOF,ELEVATION,ENDORSEMENT_COMMENT1,ENDORSEMENT_COMMENT2,ENDORSEMENT_COMMENT3,EW_DIRECTION,EW_DISTANCE,EXCHANGE_ACFT,EXCHANGE_ANDOR,EXCHANGE_BASE_WRNUM,EXCHANGE_BEG_DATE,EXCHANGE_CFS,EXCHANGE_CONTRACT_NUMBER,EXCHANGE_COUNTY,EXCHANGE_END_DATE,EXCHANGE_EVIDENCED1,EXCHANGE_EVIDENCED2,EXCHANGE_EVIDENCED3,EXCHANGE_PORELEASE_SAME,EXCHANGE_RELEASE_ACFT,EXCHANGE_RELEASE_ANDOR,EXCHANGE_RELEASE_BEG_DATE,EXCHANGE_RELEASE_CFS,EXCHANGE_RELEASE_END_DATE,EXCHANGE_SOURCE,EXNUM,FILE_DESTROYED,FULL_SUPPLY_EQUIVALENCE,GROUP_IDNUM,GROUP_IDNUM_x,GROUP_IDNUM_y,GROWING_SEASON,IRRIGATION_ACREAGE,IRRIGATION_COMMENTS,IRRIGATION_DEPLETION,IRRIGATION_DIVERSION,LABELS_PRINTED,LAND_OWNERSHIP,LAST_MODIFIED_TABLE,LAST_MODIFIED_TYPE,MANUAL,MANUAL_EVALUATION,MAP_NUMBER,MEMO_DECISION,MINING_DEPLETION,MINING_DIVERSION,MODIFIED_LOGON,MUNICIPALITY,MUNICIPAL_COMMENTS,MUNICIPAL_DEPLETION,MUNICIPAL_DIVERSION,NS_DIRECTION,NS_DISTANCE,OSE_SIGNATURE,OTHER_DEPLETION,OTHER_DIVERSION,OWNER_ADDRESS,OWNER_ADDRESS1,OWNER_ADDRESS2,OWNER_ADDRESS3,OWNER_CARE_OF,OWNER_CITY,OWNER_EMAIL_ADDRESS,OWNER_FIRST_NAME,OWNER_INTEREST,OWNER_LAST_NAME,OWNER_NAME,OWNER_PHONE,OWNER_REMARKS,OWNER_STATE,OWNER_TITLE,OWNER_TYPE,OWNER_ZIPCODE,PDBOOK_NUMBER,POD_COMMENT,POD_TYPE,POWER_CAPACITY,POWER_COMMENTS,POWER_DEPLETION,POWER_DIVERSION,POWER_PLANT_NAME,POWER_TYPE,POWER_UNITS,PROOF_DUE_YEARS,PROOF_PROF_LICENSE_NUMBER,PROTESTED,PUBLIC_VIEW,PUBLIC_WATER_SUPPLIER,RECONSIDER_TYPE,RECORD_ID,REPORT_TYPE,SECTION_CORNER,SHARES_DISTRIBUTION,SHARES_OF_STOCK,SOLE_SUPPLY,STOCK_COMPANY_ID,STOCK_DEPLETION,STOCK_DIVERSION,STR,STREAM_ALT_REQUIRED,SUB_BASIN_NAME,SUB_BASIN_NUMBER,SiteLocation,TAKEN_BY_INITIALS,TITLE_CONFLICT,TITLE_CONFLICT_COMMENTS,TYPE_OF_RIGHT,USE_BEG_DATE,USE_BEG_DATE_x,USE_BEG_DATE_y,USE_END_DATE,USE_END_DATE_x,USE_END_DATE_y,USE_IDNUM,USE_IDNUM_x,USE_IDNUM_y,Unnamed: 10,Unnamed: 11,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25_x,Unnamed: 25_y,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 9,VERIFIED_BY_INITIALS,WATER_USES,WATER_USE_REPORTING,WELL_DEPTH1,WELL_DEPTH2,WELL_DIAMETER,WELL_LOG,WELL_YEAR_DRILLED,WIN,WRCHEX_x,WRCHEX_y,WREX_ACFT,WREX_ANDOR,WREX_CFS,WREX_COUNTY,WREX_SOURCE,WREX_STATUS,WRNUM,WR_SOLE_SUPPLY,X_UTM,Y_UTM,recordId_x,recordId_y
295286,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,_nannannannannan,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
242726,,,,cert 460),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10-22-40 and 55-11091 (a1171_nannannannannan,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10-22-40 and 55-11091 (a1171,,,,,
295287,,,0,1937 81.13 DEC 31,0,,,,,,1940 80.97 AUG 05,,0,,,,0,...,,,,0,,,,0,0,0,0,,,...,,,,0,,,,,,,,,215,1.99333,,,,,,,,,,,,,,,,MODIFY,,0,0,,0,,-1,,48960,TRANSFER-FROM-ADJUDICATION,,,,,,,,,,0,N,,"1941""",,,,Y,,0,0,,,,,57,,,,,0,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,0,,,,,,,,,,,5,,,,,,JUL 23_nannannannannan,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,GBRIMLEY,,0,,,JUL 23,,,,,
306582,,,13,'wrprint.exe?wrnum=35-11696'>35-11696</a> <a h...,0,,,,,,0,,,,,,,,35,0,,0,,,,0,D,Y,13,0,MODIFY,,,,,,,,,,,,,1,-1,0,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,,,,,,,0,Y,0,,,,,167541,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OWNERS,,,,,,,,,,,,,,"* * * * REPLACED BY <a href =""_nannannannannan",,,,,,,,,,,,,,,,,,,,,,,,,,,218,,,,,,,,,,,13,13,1000,SU,Underground Water Wells,,"* * * * REPLACED BY <a href =""",,,,,
227225,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",_nannannannannan",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",",,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604897,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,c/o: Gordon Steven Neff,P.O. Box 447,"Oakley, UT 84055",,,sneff@neffandjensen.com,,100,C7 Family LLC,,4357835828,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nan_nannannannannan,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,E5645,,,,,,,,,,,,288910
3604898,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,c/o: Gordon Steven Neff,P.O. Box 447,"Oakley, UT 84055",,,sneff@neffandjensen.com,,100,C7 Family LLC,,4357835828,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nan_nannannannannan,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,E5645,,,,,,,,,,,,288910
3604899,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,c/o: Gordon Steven Neff,P.O. Box 447,"Oakley, UT 84055",,,sneff@neffandjensen.com,,100,C7 Family LLC,,4357835828,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nan_nannannannannan,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,E5645,,,,,,,,,,,,288910
3604900,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,c/o: Gordon Steven Neff,P.O. Box 447,"Oakley, UT 84055",,,sneff@neffandjensen.com,,100,C7 Family LLC,,4357835828,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nan_nannannannannan,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,E5645,,,,,,,,,,,,288910


In [15]:
#Exporting to Finished File
df.to_csv('P_UtahMaster.csv', index=False)  # The output

PermissionError: [Errno 13] Permission denied: 'P_UtahMaster.csv'