# Pre-processing Texas TCEQ Allocation data for WaDEQA upload.
Date Updated: 06/24/2020
Purpose:  To pre-process the Texas data into one master file for simple DataFrame creation and extraction

In [9]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [10]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Texas/WaterAllocation/RawInputData"
os.chdir(workingDir)

WRP = "WaterRightPoint.csv"

In [11]:
#Dataframe creation
df = pd.read_csv(WRP)
df

Unnamed: 0,OBJECTID,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,SHAPE
0,14068,11305156302,On-channel Reservoir,1,29.651976,-96.275803,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point
1,14069,11305156301,On-channel Reservoir,1,29.660384,-96.285681,DOQ,5,Other,1/15/2010,TCEQ,NAD83,P5156,WRPERM5156,Point
2,14070,11303887001,Diversion Point,2,29.520679,-96.154075,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point
3,14071,11303887501,Discharge Point,9,29.519565,-96.152952,OTHER,0,Other,1/15/2010,TCEQ,NAD83,P3887,WRPERM3887,Point
4,14072,11303847001,Diversion Point,2,29.541696,-96.121690,DRG,12,Other,1/15/2010,TCEQ,NAD83,P3847,WRPERM3847,Point
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14062,28130,11005726714,WWTP Release Point,1,29.596343,-95.207717,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point
14063,28131,11005726717,WWTP Release Point,1,29.602586,-95.236294,DOQ,5,OTHER,11/8/2011,TCEQ,NAD83,P5726,WRPERM5726,Point
14064,28132,11104201401,Off-channel Reservoir,1,29.294438,-95.363397,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point
14065,28133,11104201003,Diversion Point,1,29.295798,-95.363254,DOQ,5,OTHER,4/29/2015,TCEQ,NAD83,P4201,WRPERM4201,Point


In [12]:
df.columns

Index(['OBJECTID', 'TCEQ_ID', 'TYPE', 'VERIFIED', 'LAT_DD', 'LONG_DD',
       'HORZ_METH', 'HORZ_ACC', 'HORZ_REF', 'HORZ_DATE', 'HORZ_ORG',
       'HORZ_DATUM', 'WR_ID', 'WR_TYPE_NO', 'SHAPE'],
      dtype='object')

In [13]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

OBJECTID        int64
TCEQ_ID         int64
TYPE           object
VERIFIED        int64
LAT_DD        float64
LONG_DD       float64
HORZ_METH      object
HORZ_ACC        int64
HORZ_REF       object
HORZ_DATE      object
HORZ_ORG       object
HORZ_DATUM     object
WR_ID          object
WR_TYPE_NO     object
SHAPE          object
dtype: object


In [14]:
#fixing Spelling issues in TCEQ TYPE field
TYPEdict = {
"Dischrage Point" : "Discharge Point",
"Dishcharge Point" : "Discharge Point",
"IBT -  Diversion Point" : "IBT - Diversion Point",
"On-channel  Reservoir" : "On-channel Reservoir",
"On-channel Reservior" : "On-channel Reservoir"
}

def updateTYPE(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = colrowValue
    else:
        String1 = colrowValue  # remove whitespace chars
        try:
            outList = TYPEdict[String1]
        except:
            outList = colrowValue
    return outList

df['TYPE'] = df.apply(lambda row: updateTYPE(row['TYPE']), axis=1)

In [15]:
#Exporting to Finished File
df.to_csv('P_TexasWRP.csv', index=False)  # The output