# Pre-processing Texas TCEQ Allocation data for WaDEQA upload.
Date Updated: 06/24/2020
Purpose:  To pre-process the Texas data into one master file for simple DataFrame creation and extraction

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
from pyproj import Transformer, transform # for transforming coordinates to a new projection
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory
workingDir = "G:/Shared drives/WaDE Data/Texas/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Assign Owner Name
- will use a dictionary to assign owner name for output main file.

In [3]:
#Dataframe creation - owners
ownerInput = "WaterRightOwner.csv"
df_owner = pd.read_csv(ownerInput, usecols=['Water Right ID', 'Owner'], encoding="ISO-8859-1")
df_owner.head()

Unnamed: 0,Water Right ID,Owner
0,P5235,RANDALL BOLTON
1,P5235,RANNY MCLAIN
2,P5235,RICHARD W SPARKS JR
3,P5235,RODNEY NICHOLAS
4,P5235,RONALD WALTER BOLTON


In [4]:
# Clean Owner info.  Remove special characters

import re

def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).strip()
    return Val

df_owner['Owner'] = df_owner.apply(lambda row: cleanOwnerDataFunc(row['Owner']), axis=1)
df_owner['Owner'].unique()

array(['RANDALL BOLTON', 'RANNY MCLAIN', 'RICHARD W SPARKS JR', ...,
       'LAKE AUSTIN LAND AND CATTLE LTD', 'MINI ME MANAGEMENT LTD',
       'SELMA HUGHES INVESTMENT LTD'], dtype=object)

In [5]:
# Merge onwers, assign ID
# Issue of multiple owners per water Right ID

def retrieveNames(df):
    ids = df['Water Right ID'].drop_duplicates()
    outdf = pd.DataFrame(ids)
    outdf.reset_index(drop=True, inplace=True)
    outdf['owners'] = ''
    outdf.set_index(outdf['Water Right ID'], inplace=True)

    for id in ids:
        vals = df.loc[df['Water Right ID'] == id]
        vals.reset_index(inplace=True)
        names = []
        for i, row in vals.iterrows():
            names.append(row['Owner'])

        outdf.at[id, 'owners'] = ', '.join(names)

    return outdf

df_owners = retrieveNames(df_owner)
df_owners['WaterRightID'] = df_owners['Water Right ID']
df_owners.head()

Unnamed: 0_level_0,Water Right ID,owners,WaterRightID
Water Right ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
P5235,P5235,"RANDALL BOLTON, RANNY MCLAIN, RICHARD W SPARKS...",P5235
P5239,P5239,HOLY TRINITY CATHOLIC CHURCH,P5239
P5240,P5240,SHANKLIN H B,P5240
P5241,P5241,BARKAT LAND AND CATTLE COMPANY,P5241
P5243,P5243,DANIEL RAY BOLF,P5243


## Assign Ben Use
- will use a dictionary to assign owner name for output main file.

In [6]:
#Dataframe creation - ben use
useInput = "WaterUse.csv"
df_use = pd.read_csv(useInput, usecols=['Water Right ID', 'Use'])
df_use.head()

Unnamed: 0,Water Right ID,Use
0,C1000,IRRIGATION
1,C1000,IRRIGATION
2,C1000,IRRIGATION
3,C1000,IRRIGATION
4,C1000,IRRIGATION


In [7]:
def retrieveUses(df):
    ids = df['Water Right ID'].drop_duplicates()
    outdf = pd.DataFrame(ids)
    outdf.reset_index(drop=True, inplace=True)
    outdf['uses'] = ''
    outdf.set_index(outdf['Water Right ID'], inplace=True)

    for id in ids:
        vals = df.loc[df['Water Right ID'] == id]
        vals.reset_index(inplace=True)
        uses = []
        for i, row in vals.iterrows():
            new_use = row['Use']

            if new_use not in uses:
                uses.append(new_use)

        outdf.at[id, 'uses'] = ','.join(uses)

    return outdf

df_uses = retrieveUses(df_use)
df_uses['WaterRightID'] = df_uses['Water Right ID']
df_uses.head()

Unnamed: 0_level_0,Water Right ID,uses,WaterRightID
Water Right ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C1000,C1000,IRRIGATION,C1000
C1001,C1001,"MUNICIPAL/DOMESTIC AND LIVESTOCK,RECREATION",C1001
C1002,C1002,"MUNICIPAL/DOMESTIC AND LIVESTOCK,INDUSTRIAL,IR...",C1002
C1003,C1003,IRRIGATION,C1003
C1004,C1004,IRRIGATION,C1004


## Water Right Points and Output file
- maint output

In [8]:
# Input File
fileInput = "WaterRightPoint.csv"
df = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df:
    df['WaDEUUID'] = "tx" + df.index.astype(str)
    df.to_csv('WaterRightPoint.csv', index=False)

print(len(df))
df.head()

14067


Unnamed: 0,OBJECTID,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,SHAPE,WaDEUUID
0,14068,11305156302,On-channel Reservoir,1,29.651976,-96.275803,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx0
1,14069,11305156301,On-channel Reservoir,1,29.660384,-96.285681,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx1
2,14070,11303887001,Diversion Point,2,29.520679,-96.154075,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx2
3,14071,11303887501,Discharge Point,9,29.519565,-96.152952,OTHER,0,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx3
4,14072,11303847001,Diversion Point,2,29.541696,-96.12169,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3847,WRPERM3847,Point,tx4


In [9]:
df.columns

Index(['OBJECTID', 'TCEQ_ID', 'TYPE', 'VERIFIED', 'LAT_DD', 'LONG_DD',
       'HORZ_METH', 'HORZ_ACC', 'HORZ_REF', 'HORZ_DATE', 'HORZ_ORG',
       'HORZ_DATUM', 'WR_ID', 'WR_TYPE_NO', 'SHAPE', 'WaDEUUID'],
      dtype='object')

In [10]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

OBJECTID        int64
TCEQ_ID         int64
TYPE           object
VERIFIED        int64
LAT_DD        float64
LONG_DD       float64
HORZ_METH      object
HORZ_ACC        int64
HORZ_REF       object
HORZ_DATE      object
HORZ_ORG       object
HORZ_DATUM     object
WR_ID          object
WR_TYPE_NO     object
SHAPE          object
WaDEUUID       object
dtype: object


In [11]:
# Assining owner name to output file

# Loop up dictonary
OwnerDict = pd.Series(df_owners.owners.values, index=df_owners.WaterRightID).to_dict()

def retrieveOwner(val):
    if val == '' or pd.isnull(val):
        outString = "Unspecified"
    else:
        String1 = str(val).strip()
        try:
            outString = OwnerDict[String1]
        except:
            outString = "Unspecified"
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: retrieveOwner(row['WR_ID']), axis=1)
df

Unnamed: 0,OBJECTID,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,SHAPE,WaDEUUID,in_AllocationOwner
0,14068,11305156302,On-channel Reservoir,1,29.651976,-96.275803,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx0,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...
1,14069,11305156301,On-channel Reservoir,1,29.660384,-96.285681,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx1,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...
2,14070,11303887001,Diversion Point,2,29.520679,-96.154075,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx2,"RABIUS JO MARIE, RABIUS RAYMOND A"
3,14071,11303887501,Discharge Point,9,29.519565,-96.152952,OTHER,0,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx3,"RABIUS JO MARIE, RABIUS RAYMOND A"
4,14072,11303847001,Diversion Point,2,29.541696,-96.121690,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3847,WRPERM3847,Point,tx4,"HLAVINKA COMPANY, S W K LAND CO"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14062,28130,11005726714,WWTP Release Point,1,29.596343,-95.207717,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point,tx14062,TEXAS PARKS AND WILDLIFE DEPARTMENT
14063,28131,11005726717,WWTP Release Point,1,29.602586,-95.236294,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point,tx14063,TEXAS PARKS AND WILDLIFE DEPARTMENT
14064,28132,11104201401,Off-channel Reservoir,1,29.294438,-95.363397,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point,tx14064,"GARRETT JOHN TRAVIS JACKO JR, GARRETT LAND COM..."
14065,28133,11104201003,Diversion Point,1,29.295798,-95.363254,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point,tx14065,"GARRETT JOHN TRAVIS JACKO JR, GARRETT LAND COM..."


In [12]:
# Assining ben use to output file

# Loop up dictonary
BenuseDict = pd.Series(df_uses.uses.values, index=df_uses.WaterRightID).to_dict()

def retrieveBenUse(val):
    if val == '' or pd.isnull(val):
        outString = "Unspecified"
    else:
        String1 = str(val).strip()
        try:
            outString = BenuseDict[String1]
        except:
            outString = "Unspecified"
    return outString

df['in_BeneficialUseCategory'] = df.apply(lambda row: retrieveBenUse(row['WR_ID']), axis=1)
df

Unnamed: 0,OBJECTID,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,SHAPE,WaDEUUID,in_AllocationOwner,in_BeneficialUseCategory
0,14068,11305156302,On-channel Reservoir,1,29.651976,-96.275803,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx0,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...,"OTHER,WILDLIFE MANAGEMENT"
1,14069,11305156301,On-channel Reservoir,1,29.660384,-96.285681,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx1,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...,"OTHER,WILDLIFE MANAGEMENT"
2,14070,11303887001,Diversion Point,2,29.520679,-96.154075,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx2,"RABIUS JO MARIE, RABIUS RAYMOND A",IRRIGATION
3,14071,11303887501,Discharge Point,9,29.519565,-96.152952,OTHER,0,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx3,"RABIUS JO MARIE, RABIUS RAYMOND A",IRRIGATION
4,14072,11303847001,Diversion Point,2,29.541696,-96.121690,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3847,WRPERM3847,Point,tx4,"HLAVINKA COMPANY, S W K LAND CO",IRRIGATION
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14062,28130,11005726714,WWTP Release Point,1,29.596343,-95.207717,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point,tx14062,TEXAS PARKS AND WILDLIFE DEPARTMENT,"RECREATION,RECREATION, WETLANDS"
14063,28131,11005726717,WWTP Release Point,1,29.602586,-95.236294,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point,tx14063,TEXAS PARKS AND WILDLIFE DEPARTMENT,"RECREATION,RECREATION, WETLANDS"
14064,28132,11104201401,Off-channel Reservoir,1,29.294438,-95.363397,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point,tx14064,"GARRETT JOHN TRAVIS JACKO JR, GARRETT LAND COM...",IRRIGATION
14065,28133,11104201003,Diversion Point,1,29.295798,-95.363254,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point,tx14065,"GARRETT JOHN TRAVIS JACKO JR, GARRETT LAND COM...",IRRIGATION


In [13]:
# Tx projection = EPSG:4269.  WGS84 projection used by WaDE 2.0 = epsg:4326.
transformer = Transformer.from_proj(4269, 4326)  

def assignLat(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return lat

def assignLong(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return long



df['in_Latitude'] = df.apply(lambda row: assignLat(row['LAT_DD'], row['LONG_DD']), axis=1)
df['in_Longitude'] = df.apply(lambda row: assignLong(row['LAT_DD'], row['LONG_DD']), axis=1)
df.head()

Unnamed: 0,OBJECTID,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,SHAPE,WaDEUUID,in_AllocationOwner,in_BeneficialUseCategory,in_Latitude,in_Longitude
0,14068,11305156302,On-channel Reservoir,1,29.651976,-96.275803,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx0,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...,"OTHER,WILDLIFE MANAGEMENT",29.651976,-96.275803
1,14069,11305156301,On-channel Reservoir,1,29.660384,-96.285681,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point,tx1,US DEPARTMENT OF THE INTERIOR FISH AND WILDLIF...,"OTHER,WILDLIFE MANAGEMENT",29.660384,-96.285681
2,14070,11303887001,Diversion Point,2,29.520679,-96.154075,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx2,"RABIUS JO MARIE, RABIUS RAYMOND A",IRRIGATION,29.520679,-96.154075
3,14071,11303887501,Discharge Point,9,29.519565,-96.152952,OTHER,0,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point,tx3,"RABIUS JO MARIE, RABIUS RAYMOND A",IRRIGATION,29.519565,-96.152952
4,14072,11303847001,Diversion Point,2,29.541696,-96.12169,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3847,WRPERM3847,Point,tx4,"HLAVINKA COMPANY, S W K LAND CO",IRRIGATION,29.541696,-96.12169


In [14]:
#fixing Spelling issues in TCEQ TYPE field
TYPEdict = {
"Dischrage Point" : "Discharge Point",
"Dishcharge Point" : "Discharge Point",
"IBT -  Diversion Point" : "IBT - Diversion Point",
"On-channel  Reservoir" : "On-channel Reservoir",
"On-channel Reservior" : "On-channel Reservoir",
"GW -  Release Point" : "GW - Release Point"
}

def updateTYPE(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = colrowValue
    else:
        String1 = colrowValue  # remove whitespace chars
        try:
            outList = TYPEdict[String1]
        except:
            outList = colrowValue
    return outList

df['TYPE'] = df.apply(lambda row: updateTYPE(row['TYPE']), axis=1)

In [15]:
#Exporting to Finished File
df.to_csv('P_TexasWRP.csv', index=False)  # The output