# Pre-processing (state / organization Name) Allocation data for WaDE upload.
- Purpose:  To pre-process the data into one master file for simple DataFrame creation and extraction

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# Working Directory
#print(os.getcwd()) # see the current working directory

# # set working directory, if need be
# workingDir = "A:/WSWC/Nevada/WaterAllocation" # file location
# os.chdir(workingDir)
print(os.getcwd())

C:\Users\rjame\Documents\WSWC Documents\MappingStatesDataToWaDE2.0\Nevada\WaterAllocation


## Point of Diversion Data

In [3]:
# Input File
FI_PoD = "RawInputData/PointsofDiversion.zip"
dfinPOD = pd.read_csv(FI_PoD).replace(np.nan, "")
# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "utD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('RawInputData/PointsofDiversion.zip', compression=dict(method='zip', archive_name='PointsofDiversion.csv'), index=False)

FI_POwner="RawInputData/PermitOwners.zip"
dfinPOwner = pd.read_csv(FI_POwner).replace(np.nan, "")
dfinPOwner['app'] = dfinPOwner['app'].str.replace('\s+','')
dfinPOwner= dfinPOwner.groupby('app').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()   
# WaDE UUID tracker for data assessment
# if 'WaDEUUID' not in dfinPOwner:
#     dfinPOwner['WaDEUUID'] = "utD" + dfinPOwner.index.astype(str)
#     dfinPOwner.to_csv('RawInputData/MergedPodPo.zip', compression=dict(method='zip', archive_name='MergedPodPo.csv'), index=False)

mergedData = pd.merge(dfinPOD, dfinPOwner, left_on='app', right_on='app',how='left' )#merging pod and permit owner tables

#pd.reset_option('max_columns')
pd.set_option('display.max_columns', None)
print(len(dfinPOwner))
#dfinPOD.head()
mergedData.head()
#dfinPOwner.head()


  dfinPOwner['app'] = dfinPOwner['app'].str.replace('\s+','')


108072


Unnamed: 0,ï»¿OID_,app,app_status,site_name,poly_id,cert,mou,basin,county_x,source,source_desc,duty_balance,diversion_balance,diversion_rate,pou_acre_total,priority_date,permit_date,publication_sent_date,poc_filed_date,decree_name,project_name,monitor_plan,reporting_required,meter_required,interbasin_transfer,hydro_assessment,secondary_use,permit_record,latitude,longitude,record_last_updated,GlobalID,WaDEUUID,OID_,owner_name,owner_type,owner_pct,owner_duty,owner_div_rate,owner_acre,owner_chg_app,county_y,apn,owner_remark
0,1,13503,CER,001 N47 E30 04A 1,2884.0,4044.0,IRR,1,HU,UG,WELL NO. 1,200.0,1.5,1.5,50.0,9/25/1950 8:00:00,2/16/1951 8:00:00,10/16/1950 8:00:00,7/21/1952 8:00:00,,,,,0.0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.98573,-118.62207,6/30/2023 8:00:00,{51D0646E-1F4E-415E-8063-B9787AB58523},utD0,89608961,"DEPAOLI, ROBERT R. AND DEBRA M.,ERQUIAGA, JOE","C,O",0.0,"0.0,200.0","0.0,1.5","0.0,50.0",,,,ENCUMBERED BY DEED OF TRUST DOC. #2022-04516
1,2,15809,DEN,,,,IRR,1,HU,STR,WILDER CREEK,746.67,4.0,4.0,320.0,9/23/1954 8:00:00,,9/29/1954 8:00:00,,,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...,41.98833,-118.52723,,{6958262B-EAC8-47BB-8FB6-6FE21BE74922},utD1,14901,"HOLLOWAY, DARLENE L.",B,0.0,0.0,0.0,0.0,,,,
2,3,18001,CER,001 N47 E30 04B 2,,5536.0,IRR,1,HU,UG,,32.58,0.045,0.045,0.0,5/14/1959 8:00:00,11/12/1959 8:00:00,6/4/1959 8:00:00,6/7/1961 8:00:00,,,,,0.0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.98868,-118.63691,1/3/2023 8:00:00,{45E43581-DED0-4B0F-BF39-AA74C0694B63},utD2,1998619987,"SMITH, IRWIN E. AND MARIE A.,HUMBOLDT COUNTY","C,O",0.0,"32.58,0.0","0.045,0.0","0.0,11.48",,,,
3,4,19510,CAN,,,,IRR,1,HU,UG,,0.0,0.0,6.0,320.0,1/30/1961 8:00:00,8/28/1962 8:00:00,3/23/1962 8:00:00,,,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...,41.96278,-118.61611,,{D288E6E7-81CC-467C-93EF-9E64564E8E23},utD3,23822,"BLOHM, SOPHUS T.",B,0.0,0.0,0.0,0.0,,,,
4,5,19511,CAN,,,,IRR,1,HU,UG,,0.0,0.0,6.0,320.0,1/30/1961 8:00:00,8/28/1962 8:00:00,3/23/1962 8:00:00,,,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...,41.95139,-118.61639,,{5CF9E4D4-A1AC-4223-B51B-C6A9EDEDE4F3},utD4,23823,"BLOHM, CHARLES E.",B,0.0,0.0,0.0,0.0,,,,


In [4]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = mergedData['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NVwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "NVwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NVwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = "" # auto fill in below
df['in_WaterSourceTypeCV'] = mergedData['source']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = "Digitized"
df['in_County'] = mergedData['county_x']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = mergedData['latitude']
df['in_Longitude'] = mergedData['longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = mergedData['site_name']
df['in_SiteNativeID'] = "SitePODwadeID" + mergedData['ï»¿OID_'].astype(str) ##########################
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = mergedData['source_desc']
df['in_StateCV'] = "NV"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = mergedData['diversion_rate']
df['in_AllocationLegalStatusCV'] = mergedData['app_status']
df['in_AllocationNativeID'] =  mergedData['app'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = mergedData['owner_name']
df['in_AllocationPriorityDate'] = mergedData['priority_date']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = mergedData['duty_balance']
df['in_BeneficialUseCategory'] = mergedData['mou']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0 # either a 1 or 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = mergedData['pou_acre_total']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = mergedData['permit_record']

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True)
print(len(outPOD))
outPOD.head()

106834


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD0,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.98573,-118.62207,,,POD,001 N47 E30 04A 1,SitePODwadeID1,,WELL NO. 1,NV,,,,,,,,,,1.5,CER,13503,"DEPAOLI, ROBERT R. AND DEBRA M.,ERQUIAGA, JOE",9/25/1950 8:00:00,,,,,200.0,IRR,,,,,,0,,50.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
1,utD1,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,STR,,Digitized,HU,4326,,,,41.98833,-118.52723,,,POD,,SitePODwadeID2,,WILDER CREEK,NV,,,,,,,,,,4.0,DEN,15809,"HOLLOWAY, DARLENE L.",9/23/1954 8:00:00,,,,,746.67,IRR,,,,,,0,,320.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
2,utD2,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.98868,-118.63691,,,POD,001 N47 E30 04B 2,SitePODwadeID3,,,NV,,,,,,,,,,0.045,CER,18001,"SMITH, IRWIN E. AND MARIE A.,HUMBOLDT COUNTY",5/14/1959 8:00:00,,,,,32.58,IRR,,,,,,0,,0.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
3,utD3,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.96278,-118.61611,,,POD,,SitePODwadeID4,,,NV,,,,,,,,,,6.0,CAN,19510,"BLOHM, SOPHUS T.",1/30/1961 8:00:00,,,,,0.0,IRR,,,,,,0,,320.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
4,utD4,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.95139,-118.61639,,,POD,,SitePODwadeID5,,,NV,,,,,,,,,,6.0,CAN,19511,"BLOHM, CHARLES E.",1/30/1961 8:00:00,,,,,0.0,IRR,,,,,,0,,320.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...


## Place of Use Data

In [5]:
# Input File - place of use data
FI_POU = "RawInputData/shapefile/POU_Permits_All.zip"
dfinPOU = gpd.read_file(FI_POU).replace(np.nan, "") 

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "utU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('RawInputData/POU_Permits_All.zip', compression=dict(method='zip', archive_name='POU_Permits_All.csv'), index=False)

dfinPOwner = pd.read_csv(FI_POwner, encoding = "ISO-8859-1").replace(np.nan, "")
dfinPOwner['app'] = dfinPOwner['app'].str.replace('\s+','')
dfinPOwner= dfinPOwner.groupby('app').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()   
PouMergedData = pd.merge(dfinPOU, dfinPOwner, left_on='app', right_on='app',how='left' )

print(len(PouMergedData))
dfinPOU.head() 
#PouMergedData.head()

  dfinPOwner['app'] = dfinPOwner['app'].str.replace('\s+','')


32339


Unnamed: 0,OBJECTID,app,app_status,site_name,poly_id,cert,mou,basin,county,source,source_des,duty_balan,diversion_,diversio_1,pou_acre_t,priority_d,permit_dat,publicatio,poc_filed_,decree_nam,project_na,monitor_pl,reporting_,meter_requ,interbasin,hydro_asse,secondary_,permit_rec,latitude,longitude,record_las,GlobalID,Shape_Leng,Shape_Le_1,Shape_Area,geometry,WaDEUUID
0,1,24466,ABR,001 N47 E30 22ABCC2,2893,8091.0,IRR,1,HU,UG,,0.0,0.0,4.75,766.2,1966-05-26,1969-11-26,1968-07-24,1969-03-05,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.94112,-118.61158,2008-12-05,{B0D32958-E0AD-4E71-A110-2566F4AE245F},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9...",utU0
1,2,27292,ABR,001 N47 E30 15CDCD1,2893,8219.0,IRR,1,HU,UG,,0.0,0.0,4.5,766.2,1966-05-27,1973-06-05,1973-03-15,1973-09-18,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.94493,-118.61496,2013-04-15,{EE6D27A8-89D5-4DE7-A442-7C99626BFED4},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9...",utU1
2,3,27293,ABR,001 N47 E30 22ADDC1,2893,8203.0,IRR,1,HU,UG,,0.0,0.0,5.4,766.2,1964-10-06,1973-06-05,1973-03-15,1973-09-18,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.93748,-118.60367,2008-12-05,{CD4854F3-A81C-436E-B2F8-A7167E8CB92A},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9...",utU2
3,4,69076,RFP,021 N31 E20 04ACBA1,23633,,IRR,21,WA,UG,,0.0,5.0,5.0,0.0,2002-08-15,,2002-09-19,,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,40.59651,-119.73408,2011-08-09,{E9F415BF-456A-4D0A-BE83-028E49B07267},9144.90698,0.09412,0.00024,"POLYGON ((-119.74039 40.60351, -119.73043 40.6...",utU3
4,5,23168,CER,024 N35 E23 13ADBB1,3388,6820.0,IRR,24,WA,UG,WELL #4,1280.0,5.4,5.4,320.0,1966-06-07,1967-03-20,1966-09-09,1967-10-17,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,40.92284,-119.31408,2022-06-20,{D671108A-CBBF-4B56-B221-F7A5E10BC522},4737.10149,0.05162,0.00013,"POLYGON ((-119.31891 40.91926, -119.31892 40.9...",utU4


In [6]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = PouMergedData['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "NVwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "NVwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "NVwr_O1"

#dfinPOU['']

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = ""
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = PouMergedData['source']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = "Digitized"
df['in_County'] = PouMergedData['county_x']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = PouMergedData['latitude']
df['in_Longitude'] = PouMergedData['longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"  # "Place of Use"
df['in_SiteName'] = PouMergedData['site_name']
df['in_SiteNativeID'] = "SitePOUwadeID" + PouMergedData['OBJECTID'].astype(str) ###################
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = PouMergedData['source_des']
df['in_StateCV'] = "NV"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = PouMergedData['diversio_1']
df['in_AllocationLegalStatusCV'] =PouMergedData['app_status']
df['in_AllocationNativeID'] =  PouMergedData['app'].replace("", 0).fillna(0).astype(str).str.lower().str.strip()
df['in_AllocationOwner'] = PouMergedData['owner_name']
df['in_AllocationPriorityDate'] = PouMergedData['priority_d']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = PouMergedData['duty_balan']
df['in_BeneficialUseCategory'] = PouMergedData['mou']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0 # either a 1 or 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = PouMergedData['pou_acre_t']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = PouMergedData['permit_rec']

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True)
print(len(outPOU))
outPOU.head()

32339


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utU0,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.94112,-118.61158,,,POU,001 N47 E30 22ABCC2,SitePOUwadeID1,,,NV,,,,,,,,,,4.75,ABR,24466,"PINE FOREST FARMS, INC.,DENIO FARMS/G&L CURTI ...",1966-05-26,,,,,0.0,IRR,,,,,,0,,766.2,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
1,utU1,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.94493,-118.61496,,,POU,001 N47 E30 15CDCD1,SitePOUwadeID2,,,NV,,,,,,,,,,4.5,ABR,27292,"CONNECTICUT MUTUAL LIFE INSURANCE,DENIO FARMS/...",1966-05-27,,,,,0.0,IRR,,,,,,0,,766.2,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
2,utU2,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,HU,4326,,,,41.93748,-118.60367,,,POU,001 N47 E30 22ADDC1,SitePOUwadeID3,,,NV,,,,,,,,,,5.4,ABR,27293,"THE CONNECTICUT MUTUAL LIFE INS. CO.,DENIO FAR...",1964-10-06,,,,,0.0,IRR,,,,,,0,,766.2,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
3,utU3,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,WA,4326,,,,40.59651,-119.73408,,,POU,021 N31 E20 04ACBA1,SitePOUwadeID4,,,NV,,,,,,,,,,5.0,RFP,69076,BRIGHT-HOLLAND CORPORATION,2002-08-15,,,,,0.0,IRR,,,,,,0,,0.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
4,utU4,NVwr_M1,NVwr_V1,NVwr_O1,,,,,,UG,,Digitized,WA,4326,,,,40.92284,-119.31408,,,POU,024 N35 E23 13ADBB1,SitePOUwadeID5,,WELL #4,NV,,,,,,,,,,5.4,CER,23168,"FRERES, T.G.,BLACKSTONE REALTY INVESTORS LLC,M...",1966-06-07,,,,,1280.0,IRR,,,,,,0,,320.0,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...


## Concatenate POD and POU Data.  Make needed changes

In [7]:
# Concatenate dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

139173


In [8]:
# For creating County
CountyDict = {
    "HU" : "Humboldt",
    "CC" : "Carson City",
    "CH" : "Churchill",
    "CL" : "Clark",
    "DO" : "Douglas",
    "EL" : "Elko",
    "ES" : "Esmerelda",
    "EU" : "Eureka",
    "LA" : "Lander",
    "LI" : "Lincoln",
    "LY": "Lyon",
    "MI": "Mineral",
    "NY": "Nye",
    "PE": "Pershing",
    "ST": "Storey",
     "": "Unknown",
    "WA": "Washoe",
    "WP": "White Pine",
    "UK": "Unknown"}
def assignCounty(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = CountyDict[String1]
        except:
            outList = ""
    return outList

outdf['in_County'] = outdf.apply(lambda row: assignCounty(row['in_County']), axis=1)
outdf['in_County'].unique() 

array(['Humboldt', 'Washoe', 'Pershing', 'Elko', 'Unknown', 'White Pine',
       'Eureka', 'Lander', 'Nye', 'Esmerelda', 'Churchill', 'Lyon',
       'Storey', 'Douglas', 'Carson City', '', 'Mineral', 'Lincoln',
       'Clark'], dtype=object)

In [9]:
# For creating AllocationLegalStatusCV
LegalDict = {
"ABN" : "Abandoned",
"ABR" : "Abrogated",
"APP" : "Application",
"CAN" : "Canceled",
"CER" : "Certificate",
"CUR" : "Curtailed",
"DEC" : "Decreed",
"DEN": "Denied",
"EXP": "Expired",
"FOR": "Forfeited",
"PER": "Permit",
"REJ": "Rejected",
"REL": "Relinquished",
"RES": "Reserved",
"RFA": "Ready For Action",
"RFP": "Ready for Action (Protested)",
"RLP": "Relinquish a Portion",
"RSC": "Rescinded",
"RVK": "Revoked",
"RVP": "Revocable Permit",
"SUP": "Supersceded",
"SUS": "Suspended",
"VST": "Vested Rights",
"WDR": "Withdrawn"}
def assignAllocationLegalStatusCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue) == True :
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = LegalDict[String1]
        except:
            outList = ""
    return outList
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: assignAllocationLegalStatusCV(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Certificate', 'Denied', 'Canceled', 'Forfeited', 'Withdrawn',
       'Permit', 'Vested Rights', 'Decreed', 'Abrogated', 'Expired',
       'Ready For Action', 'Rejected', 'Ready for Action (Protested)',
       'Reserved', 'Abandoned', 'Supersceded', 'Application',
       'Relinquished', 'Relinquish a Portion', 'Rescinded', 'Suspended',
       'Revoked', 'Revocable Permit'], dtype=object)

In [10]:
# For creating BeneficialUse
BeneficialUseDict = {
"COM" : "Commercial",
"CON" : "Construction",
"DEC" : "As Decreed",
"DOM" : "Domestic",
"DWR" : "Dewatering",
"ENV" : "Environmental",
"EVP" : "Evaporation",
"IND": "Industrial",
"IRC": "Irrigation-Carey Act",
"IRD": "Irrigation-DLE",
"IRR": "Irrigation",
"MM": "Mining and Milling",
"MMD": "Mining Milling and Dewatering",
"MUN": "Municipal",
"OTH": "Other",
"PWR": "Power",
"QM": "Quasi-Municipal",
"REC": "Recreational",
"STK": "Stockwatering",
"STO": "Storage",
"UKN": "Unknown",
"WLD": "Wildlife"}
def assignBeneficialUse(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = BeneficialUseDict[String1]
        except:
            outList = ""

    return outList
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: assignBeneficialUse(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Irrigation', 'Domestic', 'Commercial', 'Stockwatering',
       'Mining and Milling', 'Irrigation-DLE', 'Storage',
       'Quasi-Municipal', 'Municipal', 'Recreational', 'Construction',
       'Wildlife', 'Other', 'Industrial', 'Power', 'Irrigation-Carey Act',
       'Unknown', 'As Decreed', 'Mining Milling and Dewatering',
       'Dewatering', 'Environmental', 'Evaporation'], dtype=object)

In [11]:
# For creating WaterSourceTypeCV
UnknownWSCVDict = {
"EFF" : "Reuse",
"GEO" : "Groundwater",
"LAK" : "Surface Water",
"OGW" : "Groundwater",
"OSW" : "Surface Water",
"RES" : "Reservoir",
"SPR" : "Surface Water",
"STO" : "Storage",
"STR" : "Surface Water",
"UG" : "Groundwater",
"UKN" : "Unknown"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownWSCVDict[String1]
        except:
            outList = ""
    return outList

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: assignWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Reservoir', 'Reuse', 'Storage',
       'Unknown'], dtype=object)

In [12]:
# For creating SiteTypeCV
UnknownSTCVDict = {
    "EFF":"Effluent",
    "GEO":"Geothermal",
    "LAK":"lake",
    "OGW":"Other Ground Water",
    "OSW":"Other Surface Water",
    "RES":"Reservoir",
    "SPR":"Spring",
    "STO":"Storage",
    "STR":"stream",
    "UG":"Underground",
    "UKN":"Unknown"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()  # remove whitespace chars
        try:
            outList = UnknownSTCVDict[String1]
        except:
            outList = ""
    return outList

outdf['in_SiteTypeCV'] = outdf.apply(lambda row: assignSiteTypeCV(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['', 'Geothermal', 'Spring', 'Underground'], dtype=object)

## Clean Data / data types

In [13]:
# Clean name entries of spcial characters
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [14]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array([''], dtype=object)

In [15]:
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['001 N47 E30 04A  1', '', '001 N47 E30 04B  2', ...,
       '207 N11 E62 33Acdb2', '177 N35 E62 35Acbb1',
       '065 N33 E39 35Dbbb1'], dtype=object)

In [16]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Depaoli, Robert R And Debra M,Erquiaga, Joe',
       'Holloway, Darlene L',
       'Smith, Irwin E And Marie A,Humboldt County', ...,
       'Thomas, Adam J',
       'Theisen, William L,Grady, Jonathan And Jennifer Udi Â½ Interest And The Craig And Kacie Nelson Trust Udi Â½ Interest',
       'Mcdonald, Robert'], dtype=object)

In [17]:
# Ensure Empty String / remove string value of "nan"

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [18]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array([''], dtype=object)

In [19]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Reservoir', 'Reuse', 'Storage',
       'Unknown'], dtype=object)

In [20]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['', 'Geothermal', 'Spring', 'Underground'], dtype=object)

In [21]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['001 N47 E30 04A  1', '', '001 N47 E30 04B  2', ...,
       '207 N11 E62 33Acdb2', '177 N35 E62 35Acbb1',
       '065 N33 E39 35Dbbb1'], dtype=object)

In [22]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Depaoli, Robert R And Debra M,Erquiaga, Joe',
       'Holloway, Darlene L',
       'Smith, Irwin E And Marie A,Humboldt County', ...,
       'Thomas, Adam J',
       'Theisen, William L,Grady, Jonathan And Jennifer Udi Â½ Interest And The Craig And Kacie Nelson Trust Udi Â½ Interest',
       'Mcdonald, Robert'], dtype=object)

In [23]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Irrigation', 'Domestic', 'Commercial', 'Stockwatering',
       'Mining and Milling', 'Irrigation-DLE', 'Storage',
       'Quasi-Municipal', 'Municipal', 'Recreational', 'Construction',
       'Wildlife', 'Other', 'Industrial', 'Power', 'Irrigation-Carey Act',
       'Unknown', 'As Decreed', 'Mining Milling and Dewatering',
       'Dewatering', 'Environmental', 'Evaporation'], dtype=object)

In [24]:
# Ensure Latitude entry is either numireic or blank, no 0 entries
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([41.985729, 41.988335, 41.988679, ..., 37.829444, 39.321389,
       40.685556])

In [25]:
# Ensure Longitude entry is either numireic or blank, no 0 entries
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-118.622074, -118.527229, -118.636906, ...,  114.997778,
        115.246944,  115.237778])

In [26]:
# Changing datatype of Priority Date to date fields entry
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['1950-09-25T00:00:00.000000000', '1954-09-23T00:00:00.000000000',
       '1959-05-14T00:00:00.000000000', ...,
       '1989-04-12T00:00:00.000000000', '1980-12-23T00:00:00.000000000',
       '2022-08-29T00:00:00.000000000'], dtype='datetime64[ns]')

In [27]:
# Ensure Flow entry is either numireic or blank, no 0 entries
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([1.5, 4.0, 0.045, ..., 2.2205, 0.00823, 23.21], dtype=object)

In [28]:
# Ensure Volume entry is either numireic or blank, no 0 entries
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array([200.0, 746.6699999999998, 32.58, ..., 863.28, 790.8, 876.242],
      dtype=object)

In [29]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2', 'wadeID3', 'wadeID4', 'wadeID5', 'wadeID6'],
      dtype=object)

## Drop non-Active AllocationLegalStatusCV Water Rights
- For this {state name / organization}, we don't want water rights that are considered: {enter string entries here}

In [30]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = ["Abandoned", "Abrogated", "Application", "Canceled", "Denied", "Expired", "Forfeited", "Ready For Action", "Ready for Action (Protested)", "Rejected", "Revoked", "Supersceded", "Withdrawn"]

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

63107


array(['Certificate', 'Permit', 'Vested Rights', 'Decreed', 'Reserved',
       'Relinquished', 'Relinquish a Portion', 'Rescinded', 'Suspended',
       'Revocable Permit'], dtype=object)

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [31]:
# PoU Shapefile Data
shapefileInput = "RawInputData/shapefile/POU_Permits_All.zip" # ziped folder of the shp file
dfPoUshapetemp = gpd.read_file(shapefileInput)
print(len(dfPoUshapetemp))
dfPoUshapetemp.head()

32339


Unnamed: 0,OBJECTID,app,app_status,site_name,poly_id,cert,mou,basin,county,source,source_des,duty_balan,diversion_,diversio_1,pou_acre_t,priority_d,permit_dat,publicatio,poc_filed_,decree_nam,project_na,monitor_pl,reporting_,meter_requ,interbasin,hydro_asse,secondary_,permit_rec,latitude,longitude,record_las,GlobalID,Shape_Leng,Shape_Le_1,Shape_Area,geometry
0,1,24466,ABR,001 N47 E30 22ABCC2,2893,8091.0,IRR,1,HU,UG,,0.0,0.0,4.75,766.2,1966-05-26,1969-11-26,1968-07-24,1969-03-05,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.94112,-118.61158,2008-12-05,{B0D32958-E0AD-4E71-A110-2566F4AE245F},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
1,2,27292,ABR,001 N47 E30 15CDCD1,2893,8219.0,IRR,1,HU,UG,,0.0,0.0,4.5,766.2,1966-05-27,1973-06-05,1973-03-15,1973-09-18,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.94493,-118.61496,2013-04-15,{EE6D27A8-89D5-4DE7-A442-7C99626BFED4},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
2,3,27293,ABR,001 N47 E30 22ADDC1,2893,8203.0,IRR,1,HU,UG,,0.0,0.0,5.4,766.2,1964-10-06,1973-06-05,1973-03-15,1973-09-18,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,41.93748,-118.60367,2008-12-05,{CD4854F3-A81C-436E-B2F8-A7167E8CB92A},11298.2921,0.1214,0.00034,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
3,4,69076,RFP,021 N31 E20 04ACBA1,23633,,IRR,21,WA,UG,,0.0,5.0,5.0,0.0,2002-08-15,,2002-09-19,,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,40.59651,-119.73408,2011-08-09,{E9F415BF-456A-4D0A-BE83-028E49B07267},9144.90698,0.09412,0.00024,"POLYGON ((-119.74039 40.60351, -119.73043 40.6..."
4,5,23168,CER,024 N35 E23 13ADBB1,3388,6820.0,IRR,24,WA,UG,WELL #4,1280.0,5.4,5.4,320.0,1966-06-07,1967-03-20,1966-09-09,1967-10-17,,,,,0,,,,http://water.nv.gov/permitinformation.aspx?app...,40.92284,-119.31408,2022-06-20,{D671108A-CBBF-4B56-B221-F7A5E10BC522},4737.10149,0.05162,0.00013,"POLYGON ((-119.31891 40.91926, -119.31892 40.9..."


In [32]:
# create temp dataframe to hold native ID and geometry from shapefile input
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)

# assing values to temp dataframe based on shapefile input
# for in_SiteNativeID assure ID value is the same as that listed above for POU info.
dfPoUshape['in_SiteNativeID'] = "SitePOUwadeID" + dfPoUshapetemp['OBJECTID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
print(len(dfPoUshape))
dfPoUshape.head()

32339


Unnamed: 0,in_SiteNativeID,geometry
0,SitePOUwadeID1,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
1,SitePOUwadeID2,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
2,SitePOUwadeID3,"POLYGON ((-118.60192 41.94461, -118.59707 41.9..."
3,SitePOUwadeID4,"POLYGON ((-119.74039 40.60351, -119.73043 40.6..."
4,SitePOUwadeID5,"POLYGON ((-119.31891 40.91926, -119.31892 40.9..."


## Export Data

In [33]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63107 entries, 0 to 63106
Data columns (total 63 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   WaDEUUID                                      63107 non-null  object        
 1   in_MethodUUID                                 63107 non-null  object        
 2   in_VariableSpecificUUID                       63107 non-null  object        
 3   in_OrganizationUUID                           63107 non-null  object        
 4   in_Geometry                                   63107 non-null  object        
 5   in_GNISFeatureNameCV                          63107 non-null  object        
 6   in_WaterQualityIndicatorCV                    63107 non-null  object        
 7   in_WaterSourceName                            63107 non-null  object        
 8   in_WaterSourceNativeID                        63107 non-null  obje

In [34]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,utD0,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Humboldt,4326,,,,41.98573,-118.62207,,,POD,001 N47 E30 04A 1,SitePODwadeID1,,,NV,,,,,,,,,,1.50000,Certificate,13503,"Depaoli, Robert R And Debra M,Erquiaga, Joe",1950-09-25,,,,,200.00000,Irrigation,,,,,,0,,50.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
1,utD2,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Humboldt,4326,,,,41.98868,-118.63691,,,POD,001 N47 E30 04B 2,SitePODwadeID3,,,NV,,,,,,,,,,0.04500,Certificate,18001,"Smith, Irwin E And Marie A,Humboldt County",1959-05-14,,,,,32.58000,Irrigation,,,,,,0,,0.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
2,utD6,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Humboldt,4326,,,,41.98984,-118.63263,,,POD,001 N47 E30 03B 2,SitePODwadeID7,,,NV,,,,,,,,,,0.08900,Certificate,21176,Humboldt County School District,1963-04-04,,,,,64.41621,Domestic,,,,,,0,,0.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
3,utD8,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Humboldt,4326,,,,41.99294,-118.64341,,,POD,001 N47 E30 05Aa 1,SitePODwadeID9,,,NV,,,,,,,,,,1.50000,Certificate,23502,"English, Brian L And Jill Ja,Peters, Donald G,...",1966-11-17,,,,,201.68000,Irrigation,,,,,,0,,52.50000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
4,utD15,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Humboldt,4326,,,,41.94493,-118.61496,,,POD,001 N47 E30 15Cdcd1,SitePODwadeID16,,,NV,,,,,,,,,,4.50000,Permit,83098,"Denio Farms,Denio Farms And Gl Curti Ranches L...",1966-05-27,,,,,3064.80000,Irrigation,,,,,,0,,0.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63102,utU32324,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID2,Surface Water,,Digitized,White Pine,4326,,,,38.95726,-115.20288,,,POU,207 N13 E60 26Cbdb1,SitePOUwadeID32325,,,NV,,,,,,,,,,0.25000,Certificate,13273,"W Bar None Cattle Company, Llc,Gardner, Jess",1950-02-20,,,,,270.64000,Irrigation,,,,,,0,,67.66000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
63103,utU32325,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID2,Surface Water,,Digitized,White Pine,4326,,,,39.06068,-114.96208,,,POU,207 N14 E62 24Cbba1,SitePOUwadeID32326,,,NV,,,,,,,,,,0.29500,Certificate,1634,"Rowe, C L",1910-03-25,,,,,105.32000,Irrigation,,,,,,0,,29.54000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
63104,utU32326,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID2,Surface Water,,Digitized,Clark,4326,,,,36.71152,-114.69595,,,POU,219 S14 E65 15Ddcd1,SitePOUwadeID32327,,,NV,,,,,,,,,,3.50000,Certificate,50734,Nevada Power Company Lessee,1905-01-01,,,,,1000.00000,Industrial,,,,,,0,,0.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...
63105,utU32327,NVwr_M1,NVwr_V1,NVwr_O1,,,,,wadeID1,Groundwater,,Digitized,Clark,4326,,,,36.73396,-114.74836,,,POU,219 S14 E65 07Adda1,SitePOUwadeID32328,,,NV,,,,,,,,,,5.00000,Permit,58269,Moapa Valley Water District,1992-10-27,,,,,1085.94000,Municipal,,,,,,0,,0.00000,,,,,,,,http://water.nv.gov/permitinformation.aspx?app...


In [35]:
# Export the output dataframe
# change output name / abbreviation to match native state provdier and wade data type 
outdf.to_csv('RawInputData/Pwr_nvMain.zip', compression=dict(method='zip', archive_name='Pwr_nvMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.
print("Done")

Done
