# Pre-processing Colorado Allocation data for WaDEQA upload.
Date Updated: 04/13/2022
Purpose:  To pre-process the Colorado data into one master file for simple DataFrame creation and extraction

Notes:
asdf

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Colorado/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [3]:
# Input File
fileInput = "DWR_Water_Right_-_Net_Amounts_input.csv"
dfinPOD = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "coD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('DWR_Water_Right_-_Net_Amounts_input.zip', compression=dict(method='zip', archive_name='DWR_Water_Right_-_Net_Amounts_input.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head(1)

  dfinPOD = pd.read_csv(fileInput)


171732


Unnamed: 0,WDID,Structure Name,Structure Type,Water Source,GNIS ID,Stream Mile,DIV,WD,County,Q10,Q40,Q160,Section,Township,Range,PM,CoordsEW,CoordsEW Dir,CoordsNS,CoordsNS Dir,UTM x,UTM y,Latitude,Longitude,Location Accuracy,Adjudication Date,Previous Adj Date,Appropriation Date,Admin No,Order No,Priority No,Associated Case Numbers,Decreed Uses,Net Absolute,Net Conditional,Net APEX Absolute,Net APEX Conditional,Decreed Units,Seasonal Limits,Comments,Modified,More Information,Location,WaDEUUID
0,6801433,QUAKING ASPENS DITCH,Ditch,COAL CREEK,188598.0,0.81,4,68,OURAY,,SE,NE,4,44.0 N,8.0 W,N,,,,,259910.0,4220032.0,38.09623,-107.73793,GPS,12/31/2002,12/31/2001,9/27/2002,55787.0,0,,"16CW0049, 02CW0246",158PW,0.033,0.078,0.0,0.0,C,No,50 GPM CUMULATIVE WITH PATSY'S & BRITTNEY'S SP...,4/6/2017 16:53,https://dwr.state.co.us/Tools/WaterRights/NetA...,"(38.096228, -107.737935)",coD0


In [4]:
#Creating Beneficial Use.
#Need to split CO abbreviatoin strings to a workable format.

BenUseDict = {
"0" : "Storage",
"1" : "Irrigation",
"2" : "Municipal",
"3" : "Commercial",
"4" : "Industrial",
"5" : "Recreation",
"6" : "Fishery",
"7" : "Fire",
"8" : "Domestic",
"9" : "Stock",
"A" : "Augmentation",
"B" : "Export from Basin",
"C" : "Cumulative Accretion to River",
"D" : "Cumulative Depletion from River",
"E" : "Evaporative",
"F" : "Federal Reserved",
"G" : "Geothermal",
"H" : "Household Use Only",
"K" : "Snow Making",
"M" : "Minimum Streamflow",
"N" : "Net Effect on River",
"P" : "Power Generation",
"Q" : "Other",
"R" : "Recharge",
"S" : "Export from State",
"T" : "Transmountain Export",
"W" : "Wildlife",
"X" : "All Beneficial Uses"}

def retrieveBenUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        x=[]
        x[:] = String1
        try:
            outList = []
            for i in range(len(x)):
                y = x[i].strip()
                y = BenUseDict[y]
                outList.append(y)
            outString = ",".join(str(e) for e in outList)
        except:
            outString = "WaDE Unspecified"
    return outString

dfinPOD['in_BeneficialUseCategory'] = dfinPOD.apply(lambda row: retrieveBenUse(row['Decreed Uses']), axis=1)
dfinPOD['in_BeneficialUseCategory'].unique()

array(['Irrigation,Recreation,Domestic,Power Generation,Wildlife',
       'Domestic,Stock',
       'Recreation,Domestic,Stock,Augmentation,Wildlife', ...,
       'Irrigation,Municipal,Industrial,Recreation,Fishery,Domestic,Stock,Augmentation,Power Generation,Other',
       'Storage,Irrigation,Domestic,Wildlife',
       'Storage,Fishery,Fire,Domestic,Stock,Wildlife'], dtype=object)

In [5]:
#Determining WaterSourceTypeCV

def determineWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "WaDE Unspecified"
    else:
        String1 = str(colrowValue).strip()
        if "GROUNDWATER:" in String1:
            outString = "Groundwater"
        else:
            outString = "Surface Water"
    return outString

dfinPOD['in_WaterSourceTypeCV'] = dfinPOD.apply(lambda row: determineWaterSourceTypeCV(row['Water Source']), axis=1)
dfinPOD['in_WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater'], dtype=object)

In [6]:
#Determining WaterSourceName
#Want to remove the "GROUNDWATER: " from the name.

def determineWaterSourceName(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "WaDE Unspecified"
    else:
        String1 = str(colrowValue).strip()
        if "GROUNDWATER:" in String1:
            outString = colrowValue.replace("GROUNDWATER: ", "").strip()
        else:
            outString = colrowValue
    return outString

dfinPOD['in_WaterSourceName'] = dfinPOD.apply(lambda row: determineWaterSourceName(row['Water Source']), axis=1)
dfinPOD['in_WaterSourceName'].unique()

array(['COAL CREEK', 'ONION CREEK', 'MCKENZIE CREEK', ...,
       'BEAVER DAMS CREEK', 'PINE ARROYO', 'WILDHORSE CREEK'],
      dtype=object)

In [7]:
# Allocation_CFS
# If Decreed Units = "C" and Net Absolute != 0, then return Net Absolute
# Elif Decreed Units = "C" and Net Conditional != 0, then return Net Conditional
# Else return blank

# For creating Allocation_CFS
def assignAllocation_CFS(valA, valB, valC):
    valA = str(valA).strip()
    if (valB != 0) and (valC != 0):
        outString = 0
    else:
        if (valA == "C") and (valB != 0):
            outString = valB
        elif (valA  == "C") and (valC != 0):
            outString = valC
        else:
            outString = 0
    return outString

dfinPOD['in_AllocationFlow_CFS'] = dfinPOD.apply(lambda row: assignAllocation_CFS(row["Decreed Units"], row["Net Absolute"], row["Net Conditional"]), axis=1)
dfinPOD['in_AllocationFlow_CFS'].unique()

array([0.000e+00, 2.700e-01, 2.200e-02, ..., 3.813e+01, 2.538e-01,
       3.002e+01])

In [8]:
# AllocationVolume_AF
# If Decreed Units = "A" and Net Absolute != 0, then return Net Absolute
# Elif Decreed Units = "A" and Net Conditional != 0, then return Net Conditional
# Else return blank

# For creating AllocationVolume_AF
def assignAllocationVolume_AF(valA, valB, valC):
    valA = str(valA).strip()
    if (valB != 0) and (valC != 0):
        outString = 0
    else:
        if (valA == "A") and (valB != 0):
            outString = valB
        elif (valA  == "A") and (valC != 0):
            outString = valC
        else:
            outString = 0
    return outString

dfinPOD['in_AllocationVolume_AF'] = dfinPOD.apply(lambda row: assignAllocationVolume_AF(row["Decreed Units"], row["Net Absolute"], row["Net Conditional"]), axis=1)
dfinPOD['in_AllocationVolume_AF'].unique()

array([0.0000e+00, 1.3500e-01, 1.2250e+01, ..., 2.4740e+01, 8.2302e+02,
       7.8750e+01])

In [9]:
# For creating AllocationLegalStatusCV
# If Net Absolute = 0 and Net Condontial = 0, then Condtional Aboslute
# Elif Net Absolute = 0 and Net Condontial != 0, then Condtional
# Else, Aboslute

def assignAllocationLegalStatusCV(valA, valB):
    if (valA == 0) and (valB == 0):
        outString = "Conditional Absolute"
    elif (valA == 0) and (valB != 0):
        outString = "Conditional"
    else:
        outString = "Absolute"
    return outString

dfinPOD['in_AllocationLegalStatusCV'] = dfinPOD.apply(lambda row: assignAllocationLegalStatusCV(row['Net Absolute'], row['Net Conditional']), axis=1)
dfinPOD['in_AllocationLegalStatusCV'].unique()

array(['Absolute', 'Conditional Absolute', 'Conditional'], dtype=object)

In [10]:
# Need a unique identifier for WaDE AllocationNativeID.  Combine combine **Admin No**, **Order No**, **Decreed Units**, & **WDID** into single string entry.

# For creating AllocationAmount
def assignAllocationNativeID(colrowValueA, colrowValueB, colrowValueC, colrowValueD):
    colrowValueA = str(int(colrowValueA))
    colrowValueB = str(int(colrowValueB))
    colrowValueD = str(int(colrowValueD))  
    outString = "-".join(map(str, [colrowValueA, colrowValueB, colrowValueC, colrowValueD]))
    return outString

dfinPOD['in_AllocationNativeID'] = dfinPOD.apply(lambda row: assignAllocationNativeID(row['Admin No'], row['Order No'], row['Decreed Units'], row['WDID']), axis=1)
dfinPOD['in_AllocationNativeID'].unique()

array(['55787-0-C-6801433', '44559-0-C-6800885', '52595-0-C-6801282', ...,
       '54624-0-C-6801392', '55087-0-C-6801414', '22922-0-C-7000528'],
      dtype=object)

In [11]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "COwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "COwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "COwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['in_WaterSourceName']
df['in_WaterSourceNativeID'] = dfinPOD['GNIS ID'].replace("", 0).fillna(0).astype(float).astype(int).astype(str)
df['in_WaterSourceTypeCV'] = dfinPOD['in_WaterSourceTypeCV']

# Site Info
df['in_RegulatoryOverlayUUIDs'] = ""
df['in_WaterSourceUUID'] = "" # ???
df['in_CoordinateAccuracy'] = "WaDE Unspecified"
df['in_CoordinateMethodCV'] = dfinPOD['Location Accuracy']
df['in_County'] = dfinPOD['County']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = dfinPOD['Structure Name']
df['in_SiteNativeID'] = dfinPOD['WDID'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['Structure Type'].astype(str)
df['in_StateCV'] = "CO"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['in_AllocationFlow_CFS'].astype(float) # see above for conversion
df['in_AllocationLegalStatusCV'] = dfinPOD['in_AllocationLegalStatusCV']
df['in_AllocationNativeID'] =  dfinPOD['in_AllocationNativeID'].astype(str)
df['in_AllocationOwner'] = "WaDE Unspecified"
df['in_AllocationPriorityDate'] = dfinPOD['Appropriation Date']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = "12/31"
df['in_AllocationTimeframeStart'] = "01/01"
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['in_AllocationVolume_AF'].astype(float) # see above for conversion
df['in_BeneficialUseCategory'] = dfinPOD['in_BeneficialUseCategory']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['More Information']

df = df.drop_duplicates().reset_index(drop=True)
print(len(df))
df.head()

171732


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_RegulatoryOverlayUUIDs,in_WaterSourceUUID,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,coD0,COwr_M1,COwr_V1,COwr_O1,,,,COAL CREEK,188598,Surface Water,,,WaDE Unspecified,GPS,OURAY,4326,,,,38.09623,-107.73793,,,POD,QUAKING ASPENS DITCH,6801433,,Ditch,CO,,,,,,,,,,0.0,Absolute,55787-0-C-6801433,WaDE Unspecified,9/27/2002,,12/31,01/01,,0.0,"Irrigation,Recreation,Domestic,Power Generatio...",,,,,,0,,,,,,,,,,https://dwr.state.co.us/Tools/WaterRights/NetA...
1,coD1,COwr_M1,COwr_V1,COwr_O1,,,,ONION CREEK,188246,Surface Water,,,WaDE Unspecified,Spotted from quarters,OURAY,4326,,,,38.32317,-107.67245,,,POD,JUTTEN SPRING & PIPELINE,6800885,,Spring,CO,,,,,,,,,,0.27,Absolute,44559-0-C-6800885,WaDE Unspecified,7/26/1968,,12/31,01/01,,0.0,"Domestic,Stock",,,,,,0,,,,,,,,,,https://dwr.state.co.us/Tools/WaterRights/NetA...
2,coD2,COwr_M1,COwr_V1,COwr_O1,,,,MCKENZIE CREEK,188237,Surface Water,,,WaDE Unspecified,Spotted from quarters,OURAY,4326,,,,38.17991,-107.93089,,,POD,S. J. SPRING R,6801282,,Spring,CO,,,,,,,,,,0.022,Absolute,52595-0-C-6801282,WaDE Unspecified,6/1/1967,,12/31,01/01,,0.0,"Recreation,Domestic,Stock,Augmentation,Wildlife",,,,,,0,,,,,,,,,,https://dwr.state.co.us/Tools/WaterRights/NetA...
3,coD3,COwr_M1,COwr_V1,COwr_O1,,,,COAL CREEK,188598,Surface Water,,,WaDE Unspecified,Spotted from quarters,OURAY,4326,,,,38.05895,-107.75189,,,POD,ZATTONI DITCH,6800782,,Ditch,CO,,,,,,,,,,1.234,Absolute,20255-0-C-6800782,WaDE Unspecified,6/1/1901,,12/31,01/01,,0.0,"Irrigation,Domestic",,,,,,0,,,,,,,,,,https://dwr.state.co.us/Tools/WaterRights/NetA...
4,coD4,COwr_M1,COwr_V1,COwr_O1,,,,FISHER CREEK,188396,Surface Water,,,WaDE Unspecified,GPS,OURAY,4326,,,,38.1756,-107.88057,,,POD,WALSH CABIN SPG & POND,6803755,,Reservoir,CO,,,,,,,,,,0.0,Absolute,54421-0-A-6803755,WaDE Unspecified,5/24/1920,,12/31,01/01,,0.135,"Recreation,Stock,Wildlife",,,,,,0,,,,,,,,,,https://dwr.state.co.us/Tools/WaterRights/NetA...


In [12]:
# Concatenate dataframes
frames = [df] # add dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

171732


## Clean output dataframes

In [13]:
# Fixing empty string names

def fixEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [14]:
outdf['in_WaterSourceName'] = df.apply(lambda row: fixEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['COAL CREEK', 'ONION CREEK', 'MCKENZIE CREEK', ...,
       'BEAVER DAMS CREEK', 'PINE ARROYO', 'WILDHORSE CREEK'],
      dtype=object)

In [15]:
outdf['in_WaterSourceTypeCV'] = df.apply(lambda row: fixEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater'], dtype=object)

In [16]:
outdf['in_County'] = df.apply(lambda row: fixEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['OURAY', 'GARFIELD', 'DOLORES', 'SAN MIGUEL', 'MESA', 'SAN JUAN',
       'EL PASO', 'DELTA', 'LA PLATA', 'GUNNISON', 'MONTEZUMA', 'MORGAN',
       'WELD', 'ADAMS', 'ARCHULETA', 'ELBERT', 'ARAPAHOE', 'LARIMER',
       'WASHINGTON', 'HINSDALE', 'RIO BLANCO', 'MONTROSE', 'DOUGLAS',
       'HUERFANO', 'MINERAL', 'PARK', 'JEFFERSON', 'BENT', 'PUEBLO',
       'CLEAR CREEK', 'CUSTER', 'LINCOLN', 'BOULDER', 'BROOMFIELD',
       'DENVER', 'SAGUACHE', 'ALAMOSA', 'RIO GRANDE', 'CONEJOS',
       'UNKNOWN', 'GRAND', 'GILPIN', 'TELLER', 'COSTILLA', 'LAS ANIMAS',
       'CHAFFEE', 'FREMONT', 'LAKE', 'PITKIN', 'CROWLEY', 'OTERO',
       'KIOWA', 'nan', 'PHILLIPS', 'SUMMIT', 'EAGLE', 'JACKSON', 'ROUTT',
       'MOFFAT', 'YUMA', 'CHEYENNE', 'KIT CARSON', 'LOGAN', 'SEDGWICK',
       'PROWERS', 'BACA'], dtype=object)

In [17]:
outdf['in_SiteName'] = df.apply(lambda row: fixEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['QUAKING ASPENS DITCH', 'JUTTEN SPRING & PIPELINE',
       'S. J. SPRING R', ..., 'LESLIE SPRING NO. 1', 'MICAH SPRING',
       'CORCORAN KEYES DITCH'], dtype=object)

In [18]:
outdf['in_SiteTypeCV'] = df.apply(lambda row: fixEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Ditch', 'Spring', 'Reservoir', 'Pipeline', 'Pump', 'Reach',
       'Well', 'Minimum Flow', 'Mine', 'Well Field', 'Recharge Area',
       'Well Group', 'Other', 'Seep', 'Reach (Aggregating)',
       'Measuring Point', 'Power Plant', 'Augmentation/Replacement Plan',
       'Reservoir System', 'Stream Gage', 'Exchange Plan',
       'Aquifer NNT/NT Reservation', 'Recharge Area Group',
       'Ditch System'], dtype=object)

In [19]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Absolute', 'Conditional Absolute', 'Conditional'], dtype=object)

In [20]:
# Clean owner name up
def cleanOwnerDataFunc(Val):
    Val = re.sub("[$@&.;,/\)(-]", "", Val).strip()
    return Val
outdf['in_AllocationOwner'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['WaDE Unspecified'], dtype=object)

In [21]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'])
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['2002-09-27T00:00:00.000000000', '1968-07-26T00:00:00.000000000',
       '1967-06-01T00:00:00.000000000', ...,
       '1970-01-28T00:00:00.000000000', '1978-08-27T00:00:00.000000000',
       '1999-07-22T00:00:00.000000000'], dtype='datetime64[ns]')

In [22]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
outdf['in_AllocationFlow_CFS'].unique()

array([0.000e+00, 2.700e-01, 2.200e-02, ..., 3.813e+01, 2.538e-01,
       3.002e+01])

In [23]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').fillna(0)
outdf['in_AllocationVolume_AF'].unique()

array([0.0000e+00, 1.3500e-01, 1.2250e+01, ..., 2.4740e+01, 8.2302e+02,
       7.8750e+01])

In [24]:
# Convert in_Latitude & in_Longitude to numeric
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna(0).astype(float)
print(outdf['in_Latitude'].unique())
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna(0).astype(float)
print(outdf['in_Longitude'].unique())

[38.096228 38.323168 38.179911 ... 38.218898 38.079351 39.362257]
[-107.737935 -107.672448 -107.930891 ... -107.606825 -107.713538
 -108.476679]


## Export Outputs

In [25]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(outdf.dtypes)

WaDEUUID                                                object
in_MethodUUID                                           object
in_VariableSpecificUUID                                 object
in_OrganizationUUID                                     object
in_Geometry                                             object
in_GNISFeatureNameCV                                    object
in_WaterQualityIndicatorCV                              object
in_WaterSourceName                                      object
in_WaterSourceNativeID                                  object
in_WaterSourceTypeCV                                    object
in_RegulatoryOverlayUUIDs                               object
in_WaterSourceUUID                                      object
in_CoordinateAccuracy                                   object
in_CoordinateMethodCV                                   object
in_County                                               object
in_EPSGCodeCV                                          

In [26]:
# Export the output dataframe
outdf.to_csv('Pwr_coMain.zip', index=False, compression="zip")  # The output, save as a zip
#dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.