# Pre-processing Water Right and Time Series Water Use data for WaDE Upload
- Purpose:  To pre-process the data into one main file for simple DataFrame creation and extraction

In [1]:
import os
import sys
print(os.environ['CONDA_DEFAULT_ENV'])
print(sys.version)

base
3.12.3 | packaged by conda-forge | (main, Apr 15 2024, 18:20:11) [MSC v.1938 64 bit (AMD64)]


In [2]:
# Needed Libraries / Modules

# ---- working with data ----
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [3]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/WaDE Data Folder/Texas/WaterAllocation_WaterUse_TCEQ" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

The working Directory is: G:/Shared drives/WaDE Data/WaDE Data Folder/Texas/WaterAllocation_WaterUse_TCEQ


## Data Input 1 - timeseries WaterUse
- clean up ben use values
- explode / separate out non-timeseries info & re-attach timseries info with specific month value

In [4]:
# Input File - WaterUse
fileInput = "RawInputData/WaterUse.zip"
dfin1 = pd.read_csv(fileInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfin1:
    dfin1['WaDEUUID'] = "in1" + dfin1.index.astype(str)
    dfin1.to_csv("RawInputData/WaterUse.zip", compression=dict(method='zip', archive_name='WaterUse.csv'), index=False)

print(len(dfin1))
dfin1.head(1)

87636


Unnamed: 0,OBJECTID,Water Right ID,Owner,Use,Year,JAN_DIV,FEB_DIV,MAR_DIV,APR_DIV,MAY_DIV,JUN_DIV,JUL_DIV,AUG_DIV,SEPT_DIV,OCT_DIV,NOV_DIV,DEC_DIV,TOTAL,WaDEUUID
0,92960689,P3942,"PEACH SPRINGS NURSERY, LLC",AGRICULTURE,2019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,in10


In [5]:
# clean ben use info
# replace "&" with ",", remove white space
dfin1['Use'] = dfin1['Use'].str.strip().str.replace("  ", " ").str.title()
dfin1['Use'] = dfin1['Use'].str.replace(" And ", ", ").str.strip()
dfin1['Use'] = dfin1['Use'].str.replace(". ", ", ").str.strip()

def fixBenUse(val):
    val = str(val).strip()
    if val == "Domestic And Livestock":
        outString = "Domestic, Livestock"
    elif val == "Domestic And Livestock & Livestock":
        outString = "Domestic, Livestock"
    elif val == "Non-Consumptive":
        outString = "Non Consumptive"
    elif val == "Instraem":
        outString = "Instream"
    elif val == "Wilflife Management":
        outString = "Wildlife Management"
    elif val == "Watwe Quality":
        outString = "Water Quality"
    elif val == "Minng":
        outString = "Mining"
    elif val == "Muncipal":
        outString = "Municipal"
    else:
        outString = val
    return outString

dfin1['Use'] = dfin1.apply(lambda row: fixBenUse(row['Use']), axis=1)
for x in dfin1['Use'].sort_values().unique():
    print(f'"' + x + '",')

"Agriculture",
"Agriculture, Agriculture",
"Agriculture, Domestic, Livestock",
"Agriculture, Domestic, Livestock, Industrial, Mining",
"Agriculture, Domestic, Livestock, Industrial, Municipal, Recreation",
"Agriculture, Domestic, Livestock, Recreation",
"Agriculture, Flood Control, Municipal",
"Agriculture, Game Preserves, Industrial, Municipal, Public Parks, Recreation",
"Agriculture, Game Preserves, Public Parks, Recreation",
"Agriculture, Game Preserves, Recreation",
"Agriculture, Industrial",
"Agriculture, Industrial, Instream",
"Agriculture, Industrial, Instream, Mining, Municipal",
"Agriculture, Industrial, Mariculture",
"Agriculture, Industrial, Mining",
"Agriculture, Industrial, Mining, Municipal",
"Agriculture, Industrial, Mining, Municipal, Recreation",
"Agriculture, Industrial, Municipal",
"Agriculture, Industrial, Municipal, Recreation",
"Agriculture, Industrial, Recreation",
"Agriculture, Instream",
"Agriculture, Instream, Mining, Recreation",
"Agriculture, Instream, Recre

In [6]:
# temp dataframe of non-timseries info
dfin1_b = dfin1[["OBJECTID", "Water Right ID", "Owner", "Use", "Year"]]
print(len(dfin1_b))
dfin1_b.head(1)

87636


Unnamed: 0,OBJECTID,Water Right ID,Owner,Use,Year
0,92960689,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2019


In [7]:
# extract timeseries data / month values, attach to non-timseries info

divColList = ["JAN_DIV", "FEB_DIV", "MAR_DIV", "APR_DIV", "MAY_DIV", "JUN_DIV", "JUL_DIV", 
              "AUG_DIV", "SEPT_DIV", "OCT_DIV", "NOV_DIV", "DEC_DIV"] # list of column names with Amount values
monthNumList = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] # list of month num values
lastDayMonNumLisdt = ["31", "28", "31", "30", "31", "30", "31", "31", "30", "31", "30", "31"]
dfin1_c =pd.DataFrame() # empty dataframe

for x in range(11):
    # divColName = 
    # divMonthNumVal = 
    # divLastDayMontNumVal = 
    
    dftemp = dfin1_b.copy()
    dftemp['in_Amount'] = dfin1[divColList[x]]
    dftemp['in_TimeframeEnd'] = monthNumList[x] + "/" + lastDayMonNumLisdt[x] + "/" + dftemp['Year'].astype(str)
    dftemp['in_TimeframeStart'] = monthNumList[x] + "/" + "01" + "/" + dftemp['Year'].astype(str)
    
    dfin1_c = pd.concat([dfin1_c, dftemp])

print(len(dfin1_c))
dfin1_c.head(1)

963996


Unnamed: 0,OBJECTID,Water Right ID,Owner,Use,Year,in_Amount,in_TimeframeEnd,in_TimeframeStart
0,92960689,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2019,0.0,01/31/2019,01/01/2019


## Data Input 2 - owner info
- remove special characters
- group by "Water Right ID"

In [8]:
#Dataframe creation - owners
ownerInput = "RawinputData/WaterRightOwner.zip"
dfowner = pd.read_csv(ownerInput).replace(np.nan, "")
dfowner = dfowner.rename(columns={"Water Right ID": "WaterRightID"})

print(len(dfowner))
dfowner.head(1)

11347


Unnamed: 0,OBJECTID,WaterRightID,Owner
0,23251368,P5574,LSF Development Corp.


In [9]:
# Clean Owner info.  Remove special characters. Change to title format.
def cleanOwnerDataFunc(Val):
    Val = str(Val).strip()
    Val = re.sub("[$'\"'@&.;,/\)(-]", "", Val).title().strip()
    return Val

dfowner['Owner'] = dfowner.apply(lambda row: cleanOwnerDataFunc(row['Owner']), axis=1)
# for x in dfowner['Owner'].sort_values().unique():
#     print(f'"' + x + '",')
dfowner.head()

Unnamed: 0,OBJECTID,WaterRightID,Owner
0,23251368,P5574,Lsf Development Corp
1,23251369,P5575,John B Hollyer
2,23251370,P5576,Lake Windcrest Property Owners Association
3,23251371,P5577,Watson Robert L G
4,23251372,P5578,City Of Henderson


In [10]:
# group owner info by WR_ID
dfowner = dfowner.drop(['OBJECTID'], axis=1) # drop unused 'OBJECTID' columns
dfowner = dfowner.groupby('WaterRightID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
print(len(dfowner))
dfowner.head(1)

6232


Unnamed: 0,WaterRightID,Owner
0,C10,Edward Mathers Farms Lp


## Data Input 3 - WaterRightsAsSinglePoints
- clean up Type
- attach owner info to sites
- merge site/owner info to ben use

In [11]:
# Input File
fileInput = "RawinputData/WaterRightsAsSinglePoints.zip"
dfinPOD = pd.read_csv(fileInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "in3" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('RawinputData/WaterRightsAsSinglePoints.zip', compression=dict(method='zip', archive_name='WaterRightsAsSinglePoints.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head(1)

15680


Unnamed: 0,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,geometry,WaDEUUID
0,61902159001.0,Diversion Point,3,29.24134,-98.40784,MAP_MAN,70.0,OTHER,2009-11-04,TCEQ,NAD 1983,C2159,ADJ2159,POINT Z (-98.40784952699994 29.24134420300004 0),in30


In [12]:
# clean TYPE info
dfinPOD['TYPE'] = dfinPOD['TYPE'].str.strip().str.replace("  ", " ")

def fixTypeFunc(val):
    val = str(val).strip()
    if val == "On-channel Reservior":
        outString = "On-channel Reservoir"
    else:
        outString = val
    return outString

dfinPOD['TYPE'] = dfinPOD.apply(lambda row: fixTypeFunc(row['TYPE']), axis=1)   
for x in dfinPOD['TYPE'].sort_values().unique():
    print(f'"' + x + '",')

"D/S Limit - Discharge Segment",
"D/S Limit - Diversion",
"D/S Limit - Diversion Segment",
"D/S Limit - Release Segment",
"Discharge Point",
"Diversion POint",
"Diversion Point",
"Diversion Point - Well",
"GW - Release Point",
"IBT - Discharge Point",
"IBT - Diversion Point",
"IBT - Off-channel Diversion Pt",
"IBT - Off-channel Reservoir",
"IBT - On-channel Reservoir",
"IBT - Release Point",
"Off-channel Diversion Point",
"Off-channel Release Point",
"Off-channel Reservoir",
"Off-channel Reservoir Complex",
"On-channel Reservoir",
"On-channel Reservoir Complex",
"Release Point",
"Return Flow Point",
"U/S Dam",
"U/S Limit - Discharge Segment",
"U/S Limit - Diversion Segment",
"U/S Limit - Release Segment",
"WWTP Release Point",


In [13]:
# attach owner info to Site info

# Loop up dictonary using owner dataframe
OwnerDict = pd.Series(dfowner.Owner.values, index=dfowner.WaterRightID).to_dict()

def retrieveOwner(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        String1 = str(val).strip()
        try:
            outString = OwnerDict[String1]
        except:
            outString = ""
    return outString

dfinPOD['in_AllocationOwner'] = dfinPOD.apply(lambda row: retrieveOwner(row['WR_ID']), axis=1)
dfinPOD['in_AllocationOwner'].unique()

array(['City Of San Antonio', 'Estate Of Ben B Morris',
       'Watson Robert L G', ...,
       'Geraldine Wallace,Jeannie Duncan,Jerry Duncan,Joyce Weishuhn,Keith Wallace,Gary Weishuhn',
       'City Of Paint Rock',
       'Ratha K Lelek,Rhonda Lelek,Cory Allen Lelek,Jerry Lelek'],
      dtype=object)

### merge site info with time series info

In [14]:
# merge site/owner info to ben use

dfin1_c = dfin1_c.merge(dfinPOD, how='left', left_on='Water Right ID', right_on='WR_ID')
print(len(dfin1_c))
dfin1_c.head()

2922766


Unnamed: 0,OBJECTID,Water Right ID,Owner,Use,Year,in_Amount,in_TimeframeEnd,in_TimeframeStart,TCEQ_ID,TYPE,VERIFIED,LAT_DD,LONG_DD,HORZ_METH,HORZ_ACC,HORZ_REF,HORZ_DATE,HORZ_ORG,HORZ_DATUM,WR_ID,WR_TYPE_NO,geometry,WaDEUUID,in_AllocationOwner
0,92960689,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2019,0.0,01/31/2019,01/01/2019,10503942001.0,Diversion Point,2.0,32.79544,-95.2061,DRG,12.0,OTHER,2008-08-25,TCEQ,NAD83,P3942,WRPERM3942,POINT Z (-95.20610291799994 32.79544370800005 0),in3512,Peach Springs Nursery Llc
1,92960689,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2019,0.0,01/31/2019,01/01/2019,10503942401.0,Off-channel Reservoir,1.0,32.80018,-95.20694,DOQ,5.0,OTHER,2008-08-25,TCEQ,NAD83,P3942,WRPERM3942,POINT Z (-95.20802694899999 32.80023185300007 0),in3641,Peach Springs Nursery Llc
2,92960690,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2020,0.0,01/31/2020,01/01/2020,10503942001.0,Diversion Point,2.0,32.79544,-95.2061,DRG,12.0,OTHER,2008-08-25,TCEQ,NAD83,P3942,WRPERM3942,POINT Z (-95.20610291799994 32.79544370800005 0),in3512,Peach Springs Nursery Llc
3,92960690,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2020,0.0,01/31/2020,01/01/2020,10503942401.0,Off-channel Reservoir,1.0,32.80018,-95.20694,DOQ,5.0,OTHER,2008-08-25,TCEQ,NAD83,P3942,WRPERM3942,POINT Z (-95.20802694899999 32.80023185300007 0),in3641,Peach Springs Nursery Llc
4,92960691,P3942,"PEACH SPRINGS NURSERY, LLC",Agriculture,2021,0.0,01/31/2021,01/01/2021,10503942001.0,Diversion Point,2.0,32.79544,-95.2061,DRG,12.0,OTHER,2008-08-25,TCEQ,NAD83,P3942,WRPERM3942,POINT Z (-95.20610291799994 32.79544370800005 0),in3512,Peach Springs Nursery Llc


In [15]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfin1_c['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "TCEQwrwu_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "TCEQwrwu_V1" # for wr records portion only, will create sa portion below
df['in_AggregationIntervalUnitCV'] = "Monthly"
df['in_VariableCV'] = "Water Right Use"

# Organization Info
df['in_OrganizationUUID'] = "TCEQwrwu_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = "WaDE Blank" # need this for auto fill below
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "Surface Water" # need this for auto fill below

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfin1_c['LAT_DD']
df['in_Longitude'] = dfin1_c['LONG_DD']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "s" + dfin1_c['TCEQ_ID'].replace("", 0).fillna(0).astype('int64').astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfin1_c['TYPE']
df['in_StateCV'] = "TX"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] = dfin1_c['Water Right ID']
df['in_AllocationOwner'] = dfin1_c['in_AllocationOwner']
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = ""
df['in_BeneficialUseCategory'] = dfin1_c['Use']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 1 # 1 or 0, if we want this data exempt
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = "https://gisweb.tceq.texas.gov/WRRetrieveRights/?ID=" + dfin1_c['WR_ID'].replace("", 0).fillna(0).str.strip().astype(str)

# Site VariableAmounts Info
df['in_Amount'] = dfin1_c['in_Amount']
df['in_AssociatedNativeAllocationIDs'] = dfin1_c['Water Right ID']
df['in_PowerGeneratedGWh'] = ""
df['in_PrimaryUseCategory'] = ""
df['in_ReportYearCV'] = dfin1_c['Year']
df['in_SDWISIdentifier'] = ""
df['in_TimeframeEnd'] = dfin1_c['in_TimeframeEnd']
df['in_TimeframeStart'] = dfin1_c['in_TimeframeStart']
# df['in_AllocationCropDutyAmount'] = "" see above AllocationAmount Info
# df['in_BeneficialUseCategory'] = "" see above AllocationAmount Info
# df['in_CommunityWaterSupplySystem'] = "" see above AllocationAmount Info
# df['in_CropTypeCV'] = "" see above AllocationAmount Info
# df['in_CustomerTypeCV'] = "" see above AllocationAmount Info
# df['in_DataPublicationDate'] = "" see above AllocationAmount Info
# df['in_DataPublicationDOI'] = "" see above AllocationAmount Info
# df['in_Geometry'] = "" see above Site Info
# df['in_IrrigatedAcreage'] = "" see above AllocationAmount Info
# df['in_IrrigationMethodCV'] = "" see above AllocationAmount Info
# df['in_PopulationServed'] = "" see above AllocationAmount Info
# df['in_PowerType'] = "" see above AllocationAmount Info
# df['in_SDWISIdentifier'] = "" see above AllocationAmount Info

outdf1 = df.copy()
outdf1 = outdf1.drop_duplicates().reset_index(drop=True)
print(len(outdf1))
outdf1.head()

2600635


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_AggregationIntervalUnitCV,in_VariableCV,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL,in_Amount,in_AssociatedNativeAllocationIDs,in_PowerGeneratedGWh,in_PrimaryUseCategory,in_ReportYearCV,in_SDWISIdentifier,in_TimeframeEnd,in_TimeframeStart
0,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,WaDE Blank,,Surface Water,,,,4326,,,,32.79544,-95.2061,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,0.0,P3942,,,2019,,01/31/2019,01/01/2019
1,in3641,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,WaDE Blank,,Surface Water,,,,4326,,,,32.80018,-95.20694,,,POD,,s10503942401,,Off-channel Reservoir,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,0.0,P3942,,,2019,,01/31/2019,01/01/2019
2,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,WaDE Blank,,Surface Water,,,,4326,,,,32.79544,-95.2061,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,0.0,P3942,,,2020,,01/31/2020,01/01/2020
3,in3641,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,WaDE Blank,,Surface Water,,,,4326,,,,32.80018,-95.20694,,,POD,,s10503942401,,Off-channel Reservoir,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,0.0,P3942,,,2020,,01/31/2020,01/01/2020
4,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,WaDE Blank,,Surface Water,,,,4326,,,,32.79544,-95.2061,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,0.0,P3942,,,2021,,01/31/2021,01/01/2021


## Concatenate POD and POU Data.  Make needed changes

In [16]:
# etc etc,
# outdf2

## Concatenate DataFrames together

In [17]:
# Concatenate dataframes
frames = [outdf1]  # list all out dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

2600635


## Clean Data / data types

In [18]:
# Clean name entries of spcial characters
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().replace("  ", " ").strip().rstrip(',')
    return Val

In [19]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Wade Blank'], dtype=object)

In [20]:
outdf['in_County'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_County']), axis=1)
outdf['in_County'].unique()

array([''], dtype=object)

In [21]:
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [22]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Peach Springs Nursery Llc', 'Irwin Belinda,Irwin Don',
       'May Youmans Trust', ..., 'T L Ranch Co',
       'Marcial Sorrel Ii Trust', 'Jendrusch Alice P'], dtype=object)

In [23]:
# Ensure Empty String / remove string value of "nan"

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [24]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Wade Blank'], dtype=object)

In [25]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Surface Water'], dtype=object)

In [26]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Diversion Point', 'Off-channel Reservoir', 'On-channel Reservoir',
       'Discharge Point', 'D/S Limit - Discharge Segment',
       'U/S Limit - Discharge Segment', 'U/S Limit - Diversion Segment',
       'D/S Limit - Diversion Segment', '', 'U/S Limit - Release Segment',
       'D/S Limit - Release Segment', 'Off-channel Diversion Point',
       'Off-channel Release Point', 'Release Point',
       'Off-channel Reservoir Complex', 'Return Flow Point',
       'WWTP Release Point', 'GW - Release Point',
       'Diversion Point - Well', 'IBT - Diversion Point',
       'IBT - On-channel Reservoir', 'Diversion POint',
       'IBT - Release Point', 'IBT - Off-channel Reservoir', 'U/S Dam',
       'On-channel Reservoir Complex', 'IBT - Discharge Point',
       'IBT - Off-channel Diversion Pt', 'D/S Limit - Diversion'],
      dtype=object)

In [27]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [28]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

array([''], dtype=object)

In [29]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Peach Springs Nursery Llc', 'Irwin Belinda,Irwin Don',
       'May Youmans Trust', ..., 'T L Ranch Co',
       'Marcial Sorrel Ii Trust', 'Jendrusch Alice P'], dtype=object)

In [30]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
uniqueList = list(set([i.strip() for i in ','.join(outdf['in_BeneficialUseCategory'].astype(str)).split(',')]))
uniqueList.sort()
uniqueList

['Agriculture',
 'Aquaculture',
 'Chloride Control',
 'Domestic',
 'Fire Fighting',
 'Flood Control',
 'Game Preserves',
 'Hydroelectric',
 'Industrial',
 'Instream',
 'Livestock',
 'Mariculture',
 'Mining',
 'Multi Use',
 'Municipal',
 'Navigation',
 'Non Consumptive',
 'Other',
 'Public Parks',
 'Recharge',
 'Recreation',
 'Reuse',
 'Saltwater Barrier',
 'Sediment Control',
 'Storage',
 'Voluntary Environmental Flow',
 'Water Quality',
 'Wetlands',
 'Wildlife Management']

In [31]:
# Ensure Latitude entry is numireic, replace '0' values for removal
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([32.795438, 32.800183, 31.708575, ..., 29.369402, 28.966674,
       28.967308], dtype=object)

In [32]:
# Ensure Longitude entry is numireic, replace '0' values for removal
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-95.206096, -95.206937, -94.621274, ..., -96.165255, -98.037331,
       -98.030496], dtype=object)

In [33]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([''], dtype=object)

In [34]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array([''], dtype=object)

In [35]:
# Ensure Amount entry is either numireic or blank, no 0 entries
outdf['in_Amount'] = pd.to_numeric(outdf['in_Amount'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_Amount'].unique()

array(['', 300.0, 3600.0, ..., 5071.0, 2185.0, 184.5], dtype=object)

In [36]:
# Ensure PopulationServed entry is numireic WITH 0 entries (no blank strings)
outdf['in_PopulationServed'] = pd.to_numeric(outdf['in_PopulationServed'], errors='coerce').round().replace("",0).fillna(0).astype('int64').replace(0,"").fillna("")
outdf['in_PopulationServed'].unique()

array([''], dtype=object)

In [37]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

<DatetimeArray>
['NaT']
Length: 1, dtype: datetime64[ns]

In [38]:
# Convert TimeframeEnd to YYYY-MM-DD format.
outdf['in_TimeframeEnd'] = pd.to_datetime(outdf['in_TimeframeEnd'], utc=True, errors = 'coerce').fillna("")
outdf['in_TimeframeEnd'] = pd.to_datetime(outdf["in_TimeframeEnd"].dt.strftime('%m/%d/%Y'))
outdf['in_TimeframeEnd'].unique()

<DatetimeArray>
['2019-01-31 00:00:00', '2020-01-31 00:00:00', '2021-01-31 00:00:00',
 '2022-01-31 00:00:00', '2023-01-31 00:00:00', '2015-01-31 00:00:00',
 '2009-01-31 00:00:00', '2010-01-31 00:00:00', '2011-01-31 00:00:00',
 '2012-01-31 00:00:00',
 ...
 '2015-11-30 00:00:00', '2009-11-30 00:00:00', '2010-11-30 00:00:00',
 '2011-11-30 00:00:00', '2012-11-30 00:00:00', '2013-11-30 00:00:00',
 '2014-11-30 00:00:00', '2016-11-30 00:00:00', '2017-11-30 00:00:00',
 '2018-11-30 00:00:00']
Length: 165, dtype: datetime64[ns]

In [39]:
# Convert TimeframeStart to YYYY-MM-DD format.
outdf['in_TimeframeStart'] = pd.to_datetime(outdf['in_TimeframeStart'], utc=True, errors = 'coerce').fillna("")
outdf['in_TimeframeStart'] = pd.to_datetime(outdf["in_TimeframeStart"].dt.strftime('%m/%d/%Y'))
outdf['in_TimeframeStart'].unique()

<DatetimeArray>
['2019-01-01 00:00:00', '2020-01-01 00:00:00', '2021-01-01 00:00:00',
 '2022-01-01 00:00:00', '2023-01-01 00:00:00', '2015-01-01 00:00:00',
 '2009-01-01 00:00:00', '2010-01-01 00:00:00', '2011-01-01 00:00:00',
 '2012-01-01 00:00:00',
 ...
 '2015-11-01 00:00:00', '2009-11-01 00:00:00', '2010-11-01 00:00:00',
 '2011-11-01 00:00:00', '2012-11-01 00:00:00', '2013-11-01 00:00:00',
 '2014-11-01 00:00:00', '2016-11-01 00:00:00', '2017-11-01 00:00:00',
 '2018-11-01 00:00:00']
Length: 165, dtype: datetime64[ns]

In [40]:
# extract year out
outdf['in_ReportYearCV'] = outdf['in_ReportYearCV'].replace("", 0).fillna(0).astype('int64').astype(str)
outdf['in_ReportYearCV'].unique()

array(['2019', '2020', '2021', '2022', '2023', '2015', '2009', '2010',
       '2011', '2012', '2013', '2014', '2016', '2017', '2018'],
      dtype=object)

In [41]:
# Assign Primary Use Category

import sys
sys.path.append("C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/5_CustomFunctions/AssignPrimaryUseCategory")
import AssignPrimaryUseCategoryFile # Use Custom import file

outdf['in_PrimaryUseCategory'] = outdf.apply(lambda row: AssignPrimaryUseCategoryFile.retrievePrimaryUseCategory(row['in_BeneficialUseCategory']), axis=1)
outdf['in_PrimaryUseCategory'].unique()

array(['Agriculture Irrigation', 'Recreation', 'Livestock', 'Mining',
       'Domestic', 'Commercial/Industrial', 'Public Supply', 'Other',
       'Aquaculture', 'Municipal Irrigation', 'In-stream Flow',
       'Treated Wastewater/Reuse', 'Reservoir Storage', 'Hydroelectric',
       'Aquifer Recharge', 'Fire'], dtype=object)

In [42]:
# Creating WaDE Custom VariableSpecificCV
# ----------------------------------------------------------------------------------------------------
def createVariableSpecificCV(inV, inAIU, inPU, inWST):
    inV = str(inV).strip()
    inAIU = str(inAIU).strip()
    inPU = str(inPU).strip().title()
    inWST = str(inWST).strip()
    outString = inV + "_" + inAIU + "_" + inPU + "_" + inWST
    return outString

outdf['in_VariableSpecificCV'] = outdf.apply(lambda row: createVariableSpecificCV(row['in_VariableCV'], 
                                                                                  row['in_AggregationIntervalUnitCV'],
                                                                                  row['in_PrimaryUseCategory'],
                                                                                  row['in_WaterSourceTypeCV']), axis=1)
outdf['in_VariableSpecificCV'].unique()

array(['Water Right Use_Monthly_Agriculture Irrigation_Surface Water',
       'Water Right Use_Monthly_Recreation_Surface Water',
       'Water Right Use_Monthly_Livestock_Surface Water',
       'Water Right Use_Monthly_Mining_Surface Water',
       'Water Right Use_Monthly_Domestic_Surface Water',
       'Water Right Use_Monthly_Commercial/Industrial_Surface Water',
       'Water Right Use_Monthly_Public Supply_Surface Water',
       'Water Right Use_Monthly_Other_Surface Water',
       'Water Right Use_Monthly_Aquaculture_Surface Water',
       'Water Right Use_Monthly_Municipal Irrigation_Surface Water',
       'Water Right Use_Monthly_In-Stream Flow_Surface Water',
       'Water Right Use_Monthly_Treated Wastewater/Reuse_Surface Water',
       'Water Right Use_Monthly_Reservoir Storage_Surface Water',
       'Water Right Use_Monthly_Hydroelectric_Surface Water',
       'Water Right Use_Monthly_Aquifer Recharge_Surface Water',
       'Water Right Use_Monthly_Fire_Surface Water'], dt

In [43]:
# Creating WaDE Custom water source native ID for easy water source identification
# use unique WaterSourceName and WaterSourceType values
# ----------------------------------------------------------------------------------------------------

# Create temp in_WaterSourceNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_WaterSourceName'] = outdf['in_WaterSourceName'].astype(str).str.strip()
dfTempID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_WaterSourceNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_WaterSourceName'].astype(str) + dfTempID['in_WaterSourceTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_WaterSourceNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_WaterSourceNativeID'], 
                                                                              row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeId1'], dtype=object)

In [44]:
# Creating WaDE Custom site native ID for easy site identification
# use Unique Latitude, Longitude, SiteName and SiteTypeCV values
# ----------------------------------------------------------------------------------------------------

# Create temp in_SiteNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_Latitude'] = outdf['in_Latitude'].astype(str).str.strip()
dfTempID['in_Longitude'] = outdf['in_Longitude'].astype(str).str.strip()
dfTempID['in_SiteName'] = outdf['in_SiteName'].astype(str).str.strip()
dfTempID['in_SiteTypeCV'] = outdf['in_SiteTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_SiteNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_Latitude'].astype(str) + dfTempID['in_Longitude'].astype(str) + dfTempID['in_SiteName'].astype(str)+ dfTempID['in_SiteTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_SiteNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB, valC, valD):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip() + str(valC).strip() + str(valD).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_SiteNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_SiteNativeID'], 
                                                                       row['in_Latitude'], row['in_Longitude'],
                                                                       row['in_SiteName'], row['in_SiteTypeCV']), axis=1)
outdf['in_SiteNativeID'].unique()

array(['s10503942001', 's10503942401', 's10603943001', ...,
       's11304229802', 's11904230001', 's11904230002'], dtype=object)

## Drop non-Active AllocationLegalStatusCV Water Rights
- For this {state name / organization}, we don't want water rights that are considered: {enter string entries here}

In [45]:
# not applied here for TCEQ

# # drop non-active AllocationLegalStatusCV values specific to that state.

# # drop the list
# dropLegalStatusList = [""] # enter string entries here

# # drop rows from above list
# outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

# print(len(outdf))
# outdf['in_AllocationLegalStatusCV'].unique()

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [46]:
# not applied here for TCEQ

# # PoU Shapefile Data
# shapefileInput = "RawInputData/shapefiles/{enter file name here}.zip" # ziped folder of the shp file

# dfPoUshapetemp = gpd.read_file(shapefileInput)
# dfPoUshapetemp['geometry'] = dfPoUshapetemp['geometry'].to_crs(epsg=4326) # Realign Geometry Projection
# print(len(dfPoUshapetemp))
# dfPoUshapetemp.head()

In [47]:
# # create temp dataframe to hold native ID and geometry from shapefile input
# columnsList = ['in_SiteNativeID', 'geometry']
# dfPoUshape = pd.DataFrame(columns=columnsList)

# # assing values to temp dataframe based on shapefile input
# # for in_SiteNativeID assure ID value is the same as that listed above for POU info.
# dfPoUshape['in_SiteNativeID'] = "POU" + ""
# dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
# dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
# print(len(dfPoUshape))
# dfPoUshape.head()

## Export Outputs

In [48]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2600635 entries, 0 to 2600634
Data columns (total 74 columns):
 #   Column                                        Dtype         
---  ------                                        -----         
 0   WaDEUUID                                      object        
 1   in_MethodUUID                                 object        
 2   in_VariableSpecificUUID                       object        
 3   in_AggregationIntervalUnitCV                  object        
 4   in_VariableCV                                 object        
 5   in_OrganizationUUID                           object        
 6   in_Geometry                                   object        
 7   in_GNISFeatureNameCV                          object        
 8   in_WaterQualityIndicatorCV                    object        
 9   in_WaterSourceName                            object        
 10  in_WaterSourceNativeID                        object        
 11  in_WaterSourceTypeCV    

In [49]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_AggregationIntervalUnitCV,in_VariableCV,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL,in_Amount,in_AssociatedNativeAllocationIDs,in_PowerGeneratedGWh,in_PrimaryUseCategory,in_ReportYearCV,in_SDWISIdentifier,in_TimeframeEnd,in_TimeframeStart,in_VariableSpecificCV
0,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,32.79544,-95.20610,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P3942,,Agriculture Irrigation,2019,,2019-01-31,2019-01-01,Water Right Use_Monthly_Agriculture Irrigation...
1,in3641,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,32.80018,-95.20694,,,POD,,s10503942401,,Off-channel Reservoir,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P3942,,Agriculture Irrigation,2019,,2019-01-31,2019-01-01,Water Right Use_Monthly_Agriculture Irrigation...
2,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,32.79544,-95.20610,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P3942,,Agriculture Irrigation,2020,,2020-01-31,2020-01-01,Water Right Use_Monthly_Agriculture Irrigation...
3,in3641,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,32.80018,-95.20694,,,POD,,s10503942401,,Off-channel Reservoir,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P3942,,Agriculture Irrigation,2020,,2020-01-31,2020-01-01,Water Right Use_Monthly_Agriculture Irrigation...
4,in3512,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,32.79544,-95.20610,,,POD,,s10503942001,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P3942,,Agriculture Irrigation,2021,,2021-01-31,2021-01-01,Water Right Use_Monthly_Agriculture Irrigation...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600630,in313032,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,29.65840,-95.98695,,,POD,,s11204232305,,On-channel Reservoir,TX,,,,,,,,,,,,P4232,"Twinwood Corporation Nv,Twinwood Us Inc",NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P4232,,Agriculture Irrigation,2010,,2010-11-30,2010-11-01,Water Right Use_Monthly_Agriculture Irrigation...
2600631,in314407,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,29.65846,-95.98930,,,POD,,s11204232001,,Diversion Point,TX,,,,,,,,,,,,P4232,"Twinwood Corporation Nv,Twinwood Us Inc",NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P4232,,Agriculture Irrigation,2010,,2010-11-30,2010-11-01,Water Right Use_Monthly_Agriculture Irrigation...
2600632,in314408,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,29.66243,-95.99638,,,POD,,s11204232002,,Diversion Point,TX,,,,,,,,,,,,P4232,"Twinwood Corporation Nv,Twinwood Us Inc",NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P4232,,Agriculture Irrigation,2010,,2010-11-30,2010-11-01,Water Right Use_Monthly_Agriculture Irrigation...
2600633,in314409,TCEQwrwu_M1,TCEQwrwu_V1,Monthly,Water Right Use,TCEQwrwu_O1,,,,Wade Blank,wadeId1,Surface Water,,,,4326,,,,29.66138,-95.98878,,,POD,,s11204232003,,Diversion Point,TX,,,,,,,,,,,,P4232,"Twinwood Corporation Nv,Twinwood Us Inc",NaT,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...,,P4232,,Agriculture Irrigation,2010,,2010-11-30,2010-11-01,Water Right Use_Monthly_Agriculture Irrigation...


In [50]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pwrwu_Main.zip', compression=dict(method='zip', archive_name='Pwr_wu_Main.csv'), index=False)  # The output, save as a zip
#dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.