# Pre-processing Texas TCEQ Allocation data for WaDEQA upload.
Purpose:  To pre-process the Texas data into one master file for simple DataFrame creation and extraction

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
#Working Directory
workingDir = "G:/Shared drives/WaDE Data/Texas/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Input Data

#### Onwer Info

In [3]:
#Dataframe creation - owners
ownerInput = "WaterRightOwner.zip"
df_owner = pd.read_csv(ownerInput).replace(np.nan, "")
print(len(df_owner))
df_owner.head(1)

11327


Unnamed: 0,OID_,WR_ID,OWNERS
0,1,P5264,"River Bend, L.L.P."


In [4]:
# Clean Owner info.  Remove special characters
def cleanOwnerDataFunc(Val):
    Val = str(Val).strip()
    Val = re.sub("[$'\"'@&.;,/\)(-]", "", Val).title().strip()
    return Val

df_owner['OWNERS'] = df_owner.apply(lambda row: cleanOwnerDataFunc(row['OWNERS']), axis=1)
for x in df_owner['OWNERS'].sort_values().unique():
    print(f'"' + x + '",')

"",
"1031 Facilitators No 21 Llc",
"141 Ranch Company Llc",
"177 Lake Estates Assn Inc",
"1802 Main Street Partners Lp",
"1951 Interests Lp",
"1970 Childress Lp",
"2006 Brazoria Venture Llc",
"2016 Shahan Family Partnership Lp",
"2017 Pg Investments Llc",
"2219 Kaufman Partners Lp",
"2350 Senator Partners Llc",
"2Br Land Lp",
"2Gl Llc",
"2Mb Land And Cattle Llc",
"3 H Lake Inc",
"3276 Medina Nstanton Llc",
"3Bu Family Limited Partnership",
"3K River Ranch Llc",
"3S Real Estate Investments Llc",
"46 Skyline Drive Llc",
"4C Ranch Properties Llc",
"4Gs Ranch Partnership",
"4L Family Lp",
"4Leaf Properties Llc",
"4Ses Llc",
"4W Trust",
"5653041 Acre Ranch Lp",
"5D Inc",
"7 M Ranch Trust",
"759 Cr 144 Llc",
"7D Ue Llc",
"85 Jacaranda Limited Partnership",
"A  H Developers Inc",
"A A Martin",
"A D D Corp",
"A Dean Mabry Et Al",
"A Farrer Et Al",
"A G Spanos Construction Inc",
"A Jabler Rodriguez",
"A L Martinez",
"A R E Property Owners Association Inc",
"A R Galloway Inc",
"A Staude Family L

In [5]:
# group owner info by WR_ID
df_owner = df_owner.drop(['OID_'], axis=1) # drop unused columns
df_owner = df_owner.groupby('WR_ID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
print(len(df_owner))
df_owner.head()

6228


Unnamed: 0,WR_ID,OWNERS
0,,
1,C10,Edward Mathers Farms Lp
2,C100,"Carlota Coy Leal,Guzman Amelia Coy,Alfonso Est..."
3,C1000,"Wilson Dorothy,Wilson Robert W,Wilson Woodrow"
4,C1001,Borden County


#### Ben Use Info

In [6]:
#Dataframe creation - ben use
useInput = "WaterUse.zip"
df_use = pd.read_csv(useInput).replace(np.nan, "")
print(len(df_use))
df_use.head(1)

60313


Unnamed: 0,OID_,WR_ID,OWN_NAME,USE_NAME,YEAR,JAN_DIV,FEB_DIV,MAR_DIV,APR_DIV,MAY_DIV,JUN_DIV,JUL_DIV,AUG_DIV,SEPT_DIV,OCT_DIV,NOV_DIV,DEC_DIV,TOTAL
0,1,C1010,CHERYL J. HOYLE; NATHAN C. HOYLE,AGRICULTURE,2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# clean ben use info
df_use = df_use[['WR_ID', 'USE_NAME']].copy()
df_use['USE_NAME'] = df_use['USE_NAME'].str.strip().str.replace("  ", " ").str.title()

def fixBenUse(val):
    val = str(val).strip()
    if val == "Domestic And Livestock":
        outString = "Domestic, Livestock"
    elif val == "Domestic And Livestock & Livestock":
        outString = "Domestic, Livestock"
    elif val == "Non-Consumptive":
        outString = "Non Consumptive"
    elif val == "Instraem":
        outString = "Instream"
    elif val == "Wilflife Management":
        outString = "Wildlife Management"
    elif val == "Watwe Quality":
        outString = "Water Quality"
    elif val == "Minng":
        outString = "Mining"
    elif val == "Muncipal":
        outString = "Municipal"
    else:
        outString = val
    return outString

df_use['USE_NAME'] = df_use.apply(lambda row: fixBenUse(row['USE_NAME']), axis=1)
for x in df_use['USE_NAME'].sort_values().unique():
    print(f'"' + x + '",')

"Agriculture",
"Aquaculture",
"Chloride Control",
"Domestic",
"Domestic, Livestock",
"Fire Fighting",
"Flood Control",
"Flood Control, Recreation",
"Game Preserves",
"Hydroelectric",
"Industrial",
"Instream",
"Mariculture",
"Mining",
"Multi Use",
"Municipal",
"Navigation",
"Non Consumptive",
"Other",
"Recharge",
"Recreation",
"Recreation, Water Quality",
"Recreation, Wetlands",
"Reuse",
"Saltwater Barrier",
"Storage",
"Water Quality",
"Wetlands",
"Wildlife Management",


In [8]:
# group owner info by WR_ID
df_use = df_use.groupby('WR_ID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
print(len(df_use))
df_use.head()

4559


Unnamed: 0,WR_ID,USE_NAME
0,C1000,Agriculture
1,C1001,"Recreation,Municipal"
2,C1002,"Agriculture,Mining,Industrial,Recreation,Munic..."
3,C1003,Agriculture
4,C1004,Agriculture


#### POD Surface water points info

In [9]:
# Input File
fileInput = "Water_Rights_As_Single_Points.zip"
dfinPOD = pd.read_csv(fileInput).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "txD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('Water_Rights_As_Single_Points.zip', compression=dict(method='zip', archive_name='Water_Rights_As_Single_Points.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head()

15541


Unnamed: 0,OID_,TCEQ_ID,TYPE,VERIFIED,wadeSiteID,Latitude,Longitude,LocationMethod,LocationAccuracy,Reference,LocationDate,LocationOrganization,Datum,WR_ID,WR_TYPE_NO,WaDEUUID
0,1,10503942001.0,Diversion Point,2,wade1,32.79544,-95.2061,DRG,12.0,OTHER,8/25/2008 0:00:00,TCEQ,NAD83,P3942,WRPERM3942,txD0
1,2,10503969001.0,Diversion Point,2,wade2,32.60875,-95.09037,DRG,12.0,OTHER,8/25/2008 0:00:00,TCEQ,NAD83,P3969,WRPERM3969,txD1
2,3,10503969501.0,Discharge Point,2,wade3,32.60286,-95.09196,DRG,12.0,OTHER,8/25/2008 0:00:00,TCEQ,NAD83,P3969,WRPERM3969,txD2
3,4,10504202201.0,U/S Limit - Diversion Segment,2,wade4,32.72922,-95.43441,DRG,12.0,OTHER,8/25/2008 0:00:00,TCEQ,NAD83,P4202,WRPERM4202,txD3
4,5,10504202101.0,D/S Limit - Diversion Segment,2,wade5,32.71574,-95.41702,DRG,12.0,OTHER,8/25/2008 0:00:00,TCEQ,NAD83,P4202,WRPERM4202,txD4


In [10]:
# Assign owner name to POD

# Loop up dictonary using owner dataframe
OwnerDict = pd.Series(df_owner.OWNERS.values, index=df_owner.WR_ID).to_dict()

def retrieveOwner(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        String1 = str(val).strip()
        try:
            outString = OwnerDict[String1]
        except:
            outString = ""
    return outString

dfinPOD['in_AllocationOwner'] = dfinPOD.apply(lambda row: retrieveOwner(row['WR_ID']), axis=1)
dfinPOD['in_AllocationOwner'].unique()

array(['Peach Springs Nursery Llc', 'Tyler Sand Company', 'Kay H Walker',
       ..., 'Our Savior Lutheran Church',
       'Cinco Residential Property Association Inc',
       'Cove Creek Corporation'], dtype=object)

In [11]:
# Assign ben use to POD

# Loop up dictonary using ben use dataframe
BenuseDict = pd.Series(df_use.USE_NAME.values, index=df_use.WR_ID).to_dict()

def retrieveBenUse(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        String1 = str(val).strip()
        try:
            outString = BenuseDict[String1]
        except:
            outString = ""
    return outString

dfinPOD['in_BeneficialUseCategory'] = dfinPOD.apply(lambda row: retrieveBenUse(row['WR_ID']), axis=1)
dfinPOD['in_BeneficialUseCategory'].unique()

array(['Agriculture', 'Mining', 'Agriculture,Industrial', 'Municipal',
       'Domestic, Livestock', 'Industrial,Aquaculture', 'Recreation',
       'Industrial,Recreation,Municipal', 'Industrial,Mining',
       'Domestic, Livestock,Recreation', 'Agriculture,Recreation',
       'Fire Fighting,Recreation,Municipal', 'Industrial,Recreation',
       'Agriculture,Mining,Industrial,Recreation,Municipal',
       'Industrial,Municipal', 'Recreation,Municipal',
       'Agriculture,Mining', 'Industrial',
       'Hydroelectric,Agriculture,Mining,Industrial,Recreation,Municipal',
       'Agriculture,Industrial,Municipal',
       'Agriculture,Mining,Wildlife Management,Industrial,Recreation',
       'Domestic, Livestock,Mining', 'Agriculture,Industrial,Recreation',
       'Agriculture,Recreation,Municipal',
       'Wildlife Management,Industrial,Recreation,Aquaculture',
       'Hydroelectric,Municipal', 'Flood Control',
       'Hydroelectric,Agriculture,Industrial,Recreation,Municipal',
       'Agr

In [12]:
# clean TYPE info
dfinPOD['TYPE'] = dfinPOD['TYPE'].str.strip().str.replace("  ", " ")

def fixBenUse(val):
    val = str(val).strip()
    if val == "On-channel Reservior":
        outString = "On-channel Reservoir"
    else:
        outString = val
    return outString

dfinPOD['TYPE'] = dfinPOD.apply(lambda row: fixBenUse(row['TYPE']), axis=1)   
for x in dfinPOD['TYPE'].sort_values().unique():
    print(f'"' + x + '",')

"D/S Limit - Discharge Segment",
"D/S Limit - Diversion Segment",
"D/S Limit - Release Segment",
"Discharge Point",
"Diversion Point",
"Diversion Point - Well",
"GW - Release Point",
"IBT - Discharge Point",
"IBT - Diversion Point",
"IBT - Off-channel Diversion Pt",
"IBT - Off-channel Reservoir",
"IBT - On-channel Reservoir",
"IBT - Release Point",
"Off-channel Diversion Point",
"Off-channel Release Point",
"Off-channel Reservoir",
"Off-channel Reservoir Complex",
"On-channel Reservoir",
"On-channel Reservoir Complex",
"Other - Rec Use",
"Release Point",
"Return Flow Point",
"U/S Dam",
"U/S Limit - Discharge Segment",
"U/S Limit - Diversion Segment",
"U/S Limit - Release Segment",
"WWTP Release Point",


In [13]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "TXwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "TXwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "TXwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = "WaDE Blank" # need this for auto fill below
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = "WaDE Blank" # need this for auto fill below

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POD" + dfinPOD['wadeSiteID'].replace("", 0).fillna(0).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['TYPE']
df['in_StateCV'] = "TX"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = "" #empty
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOD['WR_ID'].replace("", 0).fillna(0).str.strip().astype(str)
df['in_AllocationOwner'] = dfinPOD['in_AllocationOwner']
df['in_AllocationPriorityDate'] = "" #empty
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = "" #empty
df['in_BeneficialUseCategory'] = dfinPOD['in_BeneficialUseCategory']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 1 # we want this data excempt
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = "https://gisweb.tceq.texas.gov/WRRetrieveRights/?ID=" + dfinPOD['WR_TYPE_NO'].replace("", 0).fillna(0).str.strip().astype(str)

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True)
print(len(outPOD))
outPOD.head()

15541


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,txD0,TXwr_M1,TXwr_V1,TXwr_O1,,,,,,,,,,4326,,,,32.79544,-95.2061,,,POD,,PODwade1,,Diversion Point,TX,,,,,,,,,,,,P3942,Peach Springs Nursery Llc,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...
1,txD1,TXwr_M1,TXwr_V1,TXwr_O1,,,,,,,,,,4326,,,,32.60875,-95.09037,,,POD,,PODwade2,,Diversion Point,TX,,,,,,,,,,,,P3969,Tyler Sand Company,,,,,,,Mining,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...
2,txD2,TXwr_M1,TXwr_V1,TXwr_O1,,,,,,,,,,4326,,,,32.60286,-95.09196,,,POD,,PODwade3,,Discharge Point,TX,,,,,,,,,,,,P3969,Tyler Sand Company,,,,,,,Mining,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...
3,txD3,TXwr_M1,TXwr_V1,TXwr_O1,,,,,,,,,,4326,,,,32.72922,-95.43441,,,POD,,PODwade4,,U/S Limit - Diversion Segment,TX,,,,,,,,,,,,P4202,Kay H Walker,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...
4,txD4,TXwr_M1,TXwr_V1,TXwr_O1,,,,,,,,,,4326,,,,32.71574,-95.41702,,,POD,,PODwade5,,D/S Limit - Diversion Segment,TX,,,,,,,,,,,,P4202,Kay H Walker,,,,,,,Agriculture,,,,,,1,,,,,,,,,,https://gisweb.tceq.texas.gov/WRRetrieveRights...


In [14]:
# Concatenate dataframes
frames = [outPOD]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

15541


## Inspect and Clean Data

In [15]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [16]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Diversion Point', 'Discharge Point',
       'U/S Limit - Diversion Segment', 'D/S Limit - Diversion Segment',
       'On-channel Reservoir', 'IBT - Discharge Point', 'Release Point',
       'Off-channel Reservoir', 'Off-channel Diversion Point',
       'Off-channel Reservoir Complex', 'U/S Limit - Discharge Segment',
       'D/S Limit - Discharge Segment', 'IBT - Diversion Point',
       'On-channel Reservoir Complex', 'Return Flow Point',
       'IBT - Off-channel Reservoir', 'IBT - Release Point',
       'IBT - Off-channel Diversion Pt', 'Diversion Point - Well',
       'U/S Dam', 'IBT - On-channel Reservoir', 'GW - Release Point',
       'U/S Limit - Release Segment', 'D/S Limit - Release Segment',
       'Off-channel Release Point', 'WWTP Release Point',
       'Other - Rec Use'], dtype=object)

In [17]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Peach Springs Nursery Llc', 'Tyler Sand Company', 'Kay H Walker',
       ..., 'Our Savior Lutheran Church',
       'Cinco Residential Property Association Inc',
       'Cove Creek Corporation'], dtype=object)

In [18]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Agriculture', 'Mining', 'Agriculture,Industrial', 'Municipal',
       'Domestic, Livestock', 'Industrial,Aquaculture', 'Recreation',
       'Industrial,Recreation,Municipal', 'Industrial,Mining',
       'Domestic, Livestock,Recreation', 'Agriculture,Recreation',
       'Fire Fighting,Recreation,Municipal', 'Industrial,Recreation',
       'Agriculture,Mining,Industrial,Recreation,Municipal',
       'Industrial,Municipal', 'Recreation,Municipal',
       'Agriculture,Mining', 'Industrial',
       'Hydroelectric,Agriculture,Mining,Industrial,Recreation,Municipal',
       'Agriculture,Industrial,Municipal',
       'Agriculture,Mining,Wildlife Management,Industrial,Recreation',
       'Domestic, Livestock,Mining', 'Agriculture,Industrial,Recreation',
       'Agriculture,Recreation,Municipal',
       'Wildlife Management,Industrial,Recreation,Aquaculture',
       'Hydroelectric,Municipal', 'Flood Control',
       'Hydroelectric,Agriculture,Industrial,Recreation,Municipal',
       'Agr

In [19]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna("")
outdf['in_Latitude'].unique()

array([32.79544371, 32.60875158, 32.60286437, ..., 29.66044217,
       29.62854717, 29.62761517])

In [20]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna("")
outdf['in_Longitude'].unique()

array([-95.20610292, -95.09037256, -95.09195996, ..., -95.55312805,
       -95.51472403, -95.51399603])

In [21]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['NaT'], dtype='datetime64[ns]')

In [22]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([''], dtype=object)

In [23]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array([''], dtype=object)

In [24]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array([''], dtype=object)

## Export Data

In [25]:
# Export the output dataframe
outdf.to_csv('Pwr_txMain.zip', compression=dict(method='zip', archive_name='Pwr_txMain.csv'), index=False) # The output, save as a zip
#dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.