# Preprocessing Arizona Allocation data for WaDEQA upload.
Purpose:  To preprocess the Arizona data into one mail file for simple DataFrame creation and extraction.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Arizona/WaterAllocation_GW"
os.chdir(workingDir)

## Groundwater Data (POD)

In [3]:
# input File - Well_Registry.csv
fileInput = "RawInputData/Well_Registry.zip"
dfgw = pd.read_csv(fileInput, compression='zip').replace(np.nan, "")

if 'WaDEUUID' not in dfgw:
    dfgw['WaDEUUID'] = "azGW" + dfgw.index.astype(str)
    dfgw.to_csv('RawInputData/Well_Registry.zip', compression='zip', index=False)

print(len(dfgw))
dfgw.head()

  dfgw = pd.read_csv(fileInput, compression='zip').replace(np.nan, "")


229051


Unnamed: 0,WaDEUUID,ReasonRemoved,IncompleteField,OID_,OBJECTID,PROGRAM,REGISTRY_I,OWNER_NAME,RGR_PUMP_D,WELLTYPE,WELL_TYPE_,DLIC_NUM,APPROVED,INSTALLED,WELL_DEPTH,WATER_LEVE,CASING_DEP,CASING_DIA,CASING_TYP,PUMP_TYPE,PUMP_POWER,PUMPRATE,TESTEDRATE,DRAW_DOWN,COMPLETION,DRILL_LOG,WELL_CANCE,CADASTRAL,COUNTY,WATERSHED,BASIN_NAME,SUBBASIN_N,AMA,QUAD_CODE,WHOLE_TOWN,HALF_TOWNS,NORTHSOUTH,WHOLE_RANG,HALF_RANGE,EASTWEST,SECTION,QUARTER_16,QACRE160DI,QUARTER_40,QACRE40DIR,QUARTER_10,QACRE10DIR,UTM_X_METE,UTM_Y_METE,APPLICATIO,ADDRESS1,ADDRESS2,CITY,STATE,ZIP,ZIP4,WATER_USE,latitude,longitude
0,azGW0,,,1,1,55,60000,ARIZONA SONORAN COPPER COMPANY (USA) INC,NO,NON-EXEMPT,NON-EXEMPT,0,,,1790,257,60,20,OPEN HOLE IN AQUIFER,NO PUMP CODE LISTED,NO POWER CODE LISTED,0,0,0,,,N,D05005035ABA,PINAL,SANTA CRUZ RIVER,PINAL AMA,ELOY,PINAL,D,5,0,S,5,0,E,35,A,NE,B,NW,A,NE,423912.1,3646244.0,1/1/1900 0:00:00,ATTN: TRAVIS SNYDER,850 W. ELLIOT RD. STE 106,TEMPE,AZ,85284,,DEWATERING,32.95186,-111.81407
1,azGW1,,,2,2,55,60001,ROUTE 14 INVESTMENT PARTNERS LLC,YES,NON-EXEMPT - NON-SERVICE,NON-EXEMPT,611,1/24/2003 0:00:00,7/2/1984 0:00:00,1000,520,1000,12,STEEL - PERFORATED OR SLOTTED CASING,SUBMERSIBLE,ELECTRIC MOTOR 16 - 100 HP,200,400,75,,,,B05002008CCB,MARICOPA,AGUA FRIA RIVER,PHOENIX AMA,WEST SALT RIVER VALLEY,PHOENIX,B,5,0,N,2,0,W,8,C,SW,C,SW,B,NW,361297.1,3739323.0,2/25/1985 0:00:00,ATTN: MARK REPANICH,33040 N 203RD AVE,WITTMANN,AZ,85361,,INDUSTRIAL,33.78498,-112.49814
2,azGW10,,,11,11,55,84580,"H & R HENRY FARMS,",NO,NON-EXEMPT - REPLACEMENT WELL IN NEW LOCATION,NON-EXEMPT,0,,,0,0,0,0,NO CASING CODE LISTED,NO PUMP CODE LISTED,NO POWER CODE LISTED,0,0,0,,,Y,B01003026BBB,MARICOPA,LOWER GILA RIVER,PHOENIX AMA,WEST SALT RIVER VALLEY,PHOENIX,B,1,0,N,3,0,W,26,B,NW,B,NW,B,NW,356989.9,3697339.0,6/30/1980 0:00:00,PO BOX 66,,BUCKEYE,AZ,85326,,IRRIGATION,33.40587,-112.53792
3,azGW100,,,101,101,55,84955,"GEDRGE, LAUREL,",NO,EXEMPT,EXEMPT,35,,8/16/1980 0:00:00,180,90,180,5,PLASTIC OR PVC,NO PUMP CODE LISTED,NO POWER CODE LISTED,0,0,0,,,,A06003007ACC,MARICOPA,AGUA FRIA RIVER,PHOENIX AMA,EAST SALT RIVER VALLEY,PHOENIX,A,6,0,N,3,0,E,7,A,NE,C,SW,C,SW,399170.1,3749371.0,7/8/1980 0:00:00,43442 N 13TH AVE,,PHOENIX,AZ,85027,,DOMESTIC,33.87987,-112.0903
4,azGW1000,,,1001,1001,55,86084,"ANAMAX MINIG CO,",NO,NON-EXEMPT,NON-EXEMPT,0,,11/14/1980 0:00:00,390,260,387,10,STEEL - PERFORATED OR SLOTTED CASING,SUBMERSIBLE,ELECTRIC MOTOR 16 - 100 HP,240,240,30,,,,D18013005ABA,PIMA,SANTA CRUZ RIVER,TUCSON AMA,UPPER SANTA CRUZ,TUCSON,D,18,0,S,13,0,E,5,A,NE,B,NW,A,NE,496769.6,3529202.0,12/15/1980 0:00:00,PO BOX 127,,SAHUARITA,AZ,85629,,DRAINAGE,31.89865,-111.03417


In [4]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfgw['WaDEUUID']

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" # create customID for temp solution
df['in_WaterSourceTypeCV'] = "Groundwater"

# Site Info
df['in_RegulatoryOverlayUUIDs'] = ""
df['in_WaterSourceUUID'] = "" # ???
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = dfgw['COUNTY'].str.title()
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfgw['latitude']
df['in_Longitude'] = dfgw['longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POD" + dfgw['CADASTRAL'].replace("", 0).fillna(0).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = "Well" # these should all be well records
df['in_StateCV'] = "AZ"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_MethodUUID'] = "AZwr_M1" # for goundwater (separate from SW project)
df['in_OrganizationUUID'] = "AZwr_O1"
df['in_SiteUUID'] = "" # ???
df['in_VariableSpecificUUID'] =  "AZwr_V1" # for CFS (separate from SW project)
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfgw['PUMPRATE'].astype(float)
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfgw['REGISTRY_I'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_AllocationOwner'] = dfgw['OWNER_NAME']
df['in_AllocationPriorityDate'] = ""
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = "12/31"
df['in_AllocationTimeframeStart'] = "01/01"
#df['in_AllocationTypeCV'] = dfgw['WELL_TYPE_'] # skip for now
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = ""
df['in_BeneficialUseCategory'] = dfgw['WATER_USE'].str.title()
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 1 # all these gw records should be considered exempt for us.
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = "https://app.azwater.gov/WellRegistry/Detail.aspx?" + dfgw['REGISTRY_I'].replace("", 0).fillna(0).astype(int).astype(str)

dfgwOut = df.copy()
print(len(dfgwOut))
dfgwOut.head()

229051


Unnamed: 0,WaDEUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_RegulatoryOverlayUUIDs,in_WaterSourceUUID,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_MethodUUID,in_OrganizationUUID,in_SiteUUID,in_VariableSpecificUUID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,azGW0,,,,,,Groundwater,,,,,Pinal,4326,,,,32.95186,-111.81407,,,POD,,PODD05005035ABA,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,0.0,,60000,ARIZONA SONORAN COPPER COMPANY (USA) INC,,,12/31,01/01,,,Dewatering,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
1,azGW1,,,,,,Groundwater,,,,,Maricopa,4326,,,,33.78498,-112.49814,,,POD,,PODB05002008CCB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,200.0,,60001,ROUTE 14 INVESTMENT PARTNERS LLC,,,12/31,01/01,,,Industrial,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
2,azGW10,,,,,,Groundwater,,,,,Maricopa,4326,,,,33.40587,-112.53792,,,POD,,PODB01003026BBB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,0.0,,84580,"H & R HENRY FARMS,",,,12/31,01/01,,,Irrigation,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
3,azGW100,,,,,,Groundwater,,,,,Maricopa,4326,,,,33.87987,-112.0903,,,POD,,PODA06003007ACC,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,0.0,,84955,"GEDRGE, LAUREL,",,,12/31,01/01,,,Domestic,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
4,azGW1000,,,,,,Groundwater,,,,,Pima,4326,,,,31.89865,-111.03417,,,POD,,PODD18013005ABA,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,240.0,,86084,"ANAMAX MINIG CO,",,,12/31,01/01,,,Drainage,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...


In [5]:
frames = [dfgwOut] # add dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

229051


## Clean output dataframes

In [6]:
# AZwr Groundwater PUMPRATE is in GPM, need to convert to CFS
# 448.8 CFS = 1 GPM

# Clean owner name up
def ConvertGPMToCFSFunc(Val):
    Val = Val / 448.8 
    return Val

outdf['in_AllocationFlow_CFS'] = outdf.apply(lambda row: ConvertGPMToCFSFunc(row['in_AllocationFlow_CFS']), axis=1)
outdf['in_AllocationFlow_CFS'].unique()

array([ 0.        ,  0.4456328 ,  0.53475936, ...,  4.89527629,
        0.67959002, 51.24777184])

In [7]:
# Clean owner name up
def cleanOwnerDataFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;,/\)(-]", "", Val).title().strip()
    return Val

In [8]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Arizona Sonoran Copper Company Usa Inc',
       'Route 14 Investment Partners Llc', 'H  R Henry Farms', ...,
       'Guy Freeman', 'Mohr And Leap', 'All Star Gas Inc Of Az'],
      dtype=object)

In [9]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array([''], dtype=object)

In [10]:
outdf['in_SiteName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [11]:
outdf['in_County'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['Pinal', 'Maricopa', 'Pima', 'Yavapai', 'Cochise', 'Navajo',
       'Santa Cruz', 'Mohave', 'Gila', 'La Paz', 'Apache', 'Yuma',
       'Coconino', 'Graham', 'Greenlee'], dtype=object)

In [12]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [13]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array([''], dtype=object)

In [14]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater'], dtype=object)

In [15]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['Pinal', 'Maricopa', 'Pima', 'Yavapai', 'Cochise', 'Navajo',
       'Santa Cruz', 'Mohave', 'Gila', 'La Paz', 'Apache', 'Yuma',
       'Coconino', 'Graham', 'Greenlee'], dtype=object)

In [16]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [17]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Well'], dtype=object)

In [18]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array([''], dtype=object)

In [19]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Arizona Sonoran Copper Company Usa Inc',
       'Route 14 Investment Partners Llc', 'H  R Henry Farms', ...,
       'Guy Freeman', 'Mohr And Leap', 'All Star Gas Inc Of Az'],
      dtype=object)

In [20]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Dewatering', 'Industrial', 'Irrigation', 'Domestic', 'Drainage',
       'Stock', 'Municipal Uses, Commercial', 'Test', 'Municipal Uses',
       'Commercial', '', 'Mining', 'Reserved', 'Monitoring',
       'Other - Production', 'No Water Use', 'Test, Monitoring',
       'Domestic, Stock', 'No Use Code On Noi', 'Test, No Water Use',
       'Other - Mineral Explore', 'Monitoring, No Water Use',
       'No Water Use, Monitoring', 'No Use Code On Noi, Recovery',
       'Recovery, Municipal Uses, Industrial', 'Remediation',
       'Irrigation, Commercial, Domestic, Municipal Uses, Mining, Stock',
       'Utility (Water Co)', 'Test, Industrial', 'Domestic, Irrigation',
       'Recovery', 'Municipal Uses, Recovery', 'Domestic, Monitoring',
       'Municipal Uses, No Water Use', 'Test, Remediation',
       'Test, Reserved', 'Monitoring, Monitoring',
       'Irrigation, Municipal Uses, Recovery', 'Irrigation, Industrial',
       'Industrial, Municipal Uses', 'Domestic, Irrigation, Stock'

In [21]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna("")
outdf['in_Latitude'].unique()

array([32.95186342, 33.78497696, 33.40586557, ..., 35.49350294,
       32.54821538, 34.61574124])

In [22]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna("")
outdf['in_Longitude'].unique()

array([-111.81406538, -112.49814261, -112.53791578, ..., -113.59392918,
       -114.62650306, -111.91439603])

In [23]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['NaT'], dtype='datetime64[ns]')

In [24]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array(['', 0.44563279857397503, 0.5347593582887701, ...,
       4.895276292335116, 0.6795900178253119, 51.24777183600713],
      dtype=object)

In [25]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array([''], dtype=object)

In [26]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1'], dtype=object)

## Review and Export

In [27]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229051 entries, 0 to 229050
Data columns (total 66 columns):
 #   Column                                        Non-Null Count   Dtype         
---  ------                                        --------------   -----         
 0   WaDEUUID                                      229051 non-null  object        
 1   in_Geometry                                   229051 non-null  object        
 2   in_GNISFeatureNameCV                          229051 non-null  object        
 3   in_WaterQualityIndicatorCV                    229051 non-null  object        
 4   in_WaterSourceName                            229051 non-null  object        
 5   in_WaterSourceNativeID                        229051 non-null  object        
 6   in_WaterSourceTypeCV                          229051 non-null  object        
 7   in_RegulatoryOverlayUUIDs                     229051 non-null  object        
 8   in_WaterSourceUUID                            229051 n

In [28]:
outdf

Unnamed: 0,WaDEUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_RegulatoryOverlayUUIDs,in_WaterSourceUUID,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_MethodUUID,in_OrganizationUUID,in_SiteUUID,in_VariableSpecificUUID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,azGW0,,,,,wadeID1,Groundwater,,,,,Pinal,4326,,,,32.95186,-111.81407,,,POD,,PODD05005035ABA,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,60000,Arizona Sonoran Copper Company Usa Inc,NaT,,12/31,01/01,,,Dewatering,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
1,azGW1,,,,,wadeID1,Groundwater,,,,,Maricopa,4326,,,,33.78498,-112.49814,,,POD,,PODB05002008CCB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,0.44563,,60001,Route 14 Investment Partners Llc,NaT,,12/31,01/01,,,Industrial,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
2,azGW10,,,,,wadeID1,Groundwater,,,,,Maricopa,4326,,,,33.40587,-112.53792,,,POD,,PODB01003026BBB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,84580,H R Henry Farms,NaT,,12/31,01/01,,,Irrigation,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
3,azGW100,,,,,wadeID1,Groundwater,,,,,Maricopa,4326,,,,33.87987,-112.09030,,,POD,,PODA06003007ACC,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,84955,Gedrge Laurel,NaT,,12/31,01/01,,,Domestic,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
4,azGW1000,,,,,wadeID1,Groundwater,,,,,Pima,4326,,,,31.89865,-111.03417,,,POD,,PODD18013005ABA,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,0.53476,,86084,Anamax Minig Co,NaT,,12/31,01/01,,,Drainage,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229046,azGW99995,,,,,wadeID1,Groundwater,,,,,Cochise,4326,,,,31.90773,-109.91406,,,POD,,PODD17024033CCB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,568422,James R Linda S Blake,NaT,,12/31,01/01,,,Domestic,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
229047,azGW99996,,,,,wadeID1,Groundwater,,,,,La Paz,4326,,,,33.63776,-114.11419,,,POD,,PODB03018004AAA,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,568423,Delford D Hyslip,NaT,,12/31,01/01,,,Domestic,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
229048,azGW99997,,,,,wadeID1,Groundwater,,,,,Pinal,4326,,,,32.65012,-111.10336,,,POD,,PODD09012016ADD,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,568424,P L 183 Llc,NaT,,12/31,01/01,,,Domestic,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...
229049,azGW99998,,,,,wadeID1,Groundwater,,,,,Yuma,4326,,,,32.54822,-114.62650,,,POD,,PODC10023021BDB,,Well,AZ,,AZwr_M1,AZwr_O1,,AZwr_V1,,,,,,,,,,,568425,Agtech,NaT,,12/31,01/01,,,Irrigation,,,,,,1,,,,,,,,,,https://app.azwater.gov/WellRegistry/Detail.as...


In [29]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pwr_azgwMain.zip', compression=dict(method='zip', archive_name='Pwr_azMain.csv'), index=False) # The output, save as a zip
#dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.