# Pre-processing IdahoAllocation data for WaDE upload.
- Purpose:  To pre-process the data into one main file for simple DataFrame creation and extraction

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Idaho/WaterAllocation"
os.chdir(workingDir)

## Point of Diversion Data

In [3]:
# Input File
FI_POD = "RawInputData/shapefiles/Water_Right_PODs.zip"
dfinPOD = gpd.read_file(FI_POD).replace(np.nan, "")
dfinPOD = dfinPOD.drop(['geometry'], axis=1) # don't want geometry for POD sites.

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "idD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('RawInputData/Water_Right_PODs.zip', compression=dict(method='zip', archive_name='Water_Right_PODs.csv'), index=False)

print(len(dfinPOD))
dfinPOD.head(1)

263784


Unnamed: 0,OBJECTID,WaterRight,BasinNumbe,SequenceNu,SplitSuffi,VersionNum,Status,Basis,PriorityDa,Owner,OverallMax,Source,SourceQual,TributaryO,Tributar_1,WaterDistr,DiversionN,DataSource,MetalTagNu,DiversionT,RightID,PointOfDiv,WRReport,WRDocs,WRMap,SpatialDat,TrustOrNon,Uses,OverallM_1,Longitude,Latitude,WaDEUUID
0,1,63-29423,63,29423,,0,Active,Decreed,1906-03-01,SAND SPRINGS RANCH & CO,0.02,UNNAMED STREAM,,COW CREEK,,63,,,,E,572156,603830,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,0,,STOCKWATER,0.0,-115.84801,43.45637,idD0


In [4]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "IDwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "IDwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "IDwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['Source'].replace(np.nan, "").astype(str).astype(str).str.title()
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = "" # autfo fill in below

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = dfinPOD['DataSource']
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = dfinPOD['DiversionN']
df['in_SiteNativeID'] = "POD" + dfinPOD['PointOfDiv'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "ID"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = dfinPOD['Basis']
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['OverallMax'].astype(float)
df['in_AllocationLegalStatusCV'] = dfinPOD['Status']
df['in_AllocationNativeID'] =  dfinPOD['WaterRight'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOD['Owner']
df['in_AllocationPriorityDate'] = dfinPOD['PriorityDa']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['OverallM_1'].astype(float)
df['in_BeneficialUseCategory'] = dfinPOD['Uses']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['WRReport']

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outPOD))
outPOD.head()

263784


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,idD0,IDwr_M1,IDwr_V1,IDwr_O1,,,,Unnamed Stream,,,,,,4326,,,,43.45637,-115.84801,,,POD,,POD603830,,,ID,,,,,Decreed,,,,,0.02,Active,63-29423,SAND SPRINGS RANCH & CO,1906-03-01,,,,,0.0,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
1,idD1,IDwr_M1,IDwr_V1,IDwr_O1,,,,Badger Creek,,,,,,4326,,,,43.40412,-111.84819,,,POD,,POD626789,,,ID,,,,,Decreed,,,,,0.02,Active,25-14121,JUDYS IDA MON RANCHES INC,1914-05-01,,,,,0.0,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
2,idD2,IDwr_M1,IDwr_V1,IDwr_O1,,,,Henry Creek,,,,,,4326,,,,43.37611,-111.90628,,,POD,,POD657093,,,ID,,,,,Decreed,,,,,0.02,Active,25-13956,HENRY CREEK RANCH,1922-02-25,,,,,0.0,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
3,idD3,IDwr_M1,IDwr_V1,IDwr_O1,,,,Henry Creek,,,,,,4326,,,,43.36705,-111.90395,,,POD,,POD657092,,,ID,,,,,Decreed,,,,,0.02,Active,25-13956,HENRY CREEK RANCH,1922-02-25,,,,,0.0,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
4,idD4,IDwr_M1,IDwr_V1,IDwr_O1,,,,Unnamed Slough,,,,,,4326,,,,43.43029,-111.30606,,,POD,,POD604944,,,ID,,,,,Decreed,,,,,0.0,Active,23-11213,STEPHEN J MC GRATH,1957-06-20,,,,,10.8,WILDLIFE STORAGE,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...


## Place of Use Data

In [5]:
# Input File - place of use data
FI_POU = "RawInputData/shapefiles/WaterRightPOUs.zip"
dfinPOU = gpd.read_file(FI_POU).replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "idU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('RawInputData/WaterRightPOUs.zip', compression=dict(method='zip', archive_name='WaterRightPOUs.csv'), index=False)

print(len(dfinPOU))
dfinPOU.head()

215675


Unnamed: 0,WaterRight,BasinNumbe,SequenceNu,SplitSuffi,Status,PriorityDa,DecreedDat,Owner,WaterUse,WaterUseCo,TotalAcres,AcreLimit,Source,SourceQual,TributaryO,Tributar_1,WaterDistr,LargePOU,RightID,PlaceOfUse,WRReport,WRDocs,WRMap,TrustOrNon,cent_Latit,cent_Longi,SHAPE_STAr,SHAPE_STLe,Shape_Leng,Shape_Area,geometry,WaDEUUID
0,33-10900,33,10900,,Active,1934-06-28,2001-10-11,UNITED STATES OF AMERICA ACTING THROUGH,STOCKWATER,4,0.0,0.0,LITTLE LOST RIVER,,SINKS,,33,0,222600,2,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,43.9814,-113.16981,8321866.5321,54110.17968,0.55442,0.00093,"MULTIPOLYGON (((-113.09907 43.88546, -113.0940...",idU0
1,57-10609,57,10609,,Active,1941-07-01,1996-11-15,LACHELLE WOOD,STOCKWATER,4,0.0,0.0,GROUND WATER,,,,NWD,0,361130,7,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,43.51844,-116.91302,160610.054,1603.13059,0.01714,2e-05,"POLYGON ((-116.91055 43.52022, -116.91051 43.5...",idU1
2,63-30274,63,30274,,Active,1988-08-31,2000-05-26,MARY ANN HOFMANN,DOMESTIC,43,0.0,0.0,GROUND WATER,,,,TBD,0,376520,9,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,43.58281,-116.17519,651.7807,103.72826,0.00114,0.0,"POLYGON ((-116.17500 43.58271, -116.17537 43.5...",idU2
3,36-12737,36,12737,,Active,1958-12-31,1998-05-19,HERBERT W POTEET,DOMESTIC,43,0.0,0.0,GROUND WATER,,,,EXC,0,362295,12,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,42.73658,-113.51396,161398.92655,1607.1943,0.01705,2e-05,"POLYGON ((-113.51150 42.73474, -113.51641 42.7...",idU3
4,36-12736,36,12736,,Active,1958-12-31,1998-03-20,HERBERT W POTEET,DOMESTIC,43,0.0,0.0,GROUND WATER,,,,EXC,0,362294,15,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,42.73986,-113.48937,160118.02655,1600.61972,0.01698,2e-05,"POLYGON ((-113.49018 42.73806, -113.49019 42.7...",idU4


In [6]:
# create output POU dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "IDwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "IDwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "IDwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOU['Source'].replace(np.nan, "").astype(str).str.title()
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = "" # autfo fill in below

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = "Centroid"
df['in_County'] = "WaDE Unspecified"
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['cent_Latit']
df['in_Longitude'] = dfinPOU['cent_Longi']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfinPOU['PlaceOfUse'].replace("", 0).fillna(0).astype(int).astype(str)
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "ID"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfinPOU['Status']
df['in_AllocationNativeID'] =  dfinPOU['WaterRight'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfinPOU['Owner']
df['in_AllocationPriorityDate'] = dfinPOU['PriorityDa']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = ""
df['in_BeneficialUseCategory'] = dfinPOU['WaterUse']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfinPOU['AcreLimit']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOU['WRReport']

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True)
print(len(outPOU))
outPOU.head()

215675


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,idU0,IDwr_M1,IDwr_V1,IDwr_O1,,,,Little Lost River,,,,Centroid,WaDE Unspecified,4326,,,,43.9814,-113.16981,,,POU,,POU2,,,ID,,,,,,,,,,,Active,33-10900,UNITED STATES OF AMERICA ACTING THROUGH,1934-06-28,,,,,,STOCKWATER,,,,,,0,,0.0,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
1,idU1,IDwr_M1,IDwr_V1,IDwr_O1,,,,Ground Water,,,,Centroid,WaDE Unspecified,4326,,,,43.51844,-116.91302,,,POU,,POU7,,,ID,,,,,,,,,,,Active,57-10609,LACHELLE WOOD,1941-07-01,,,,,,STOCKWATER,,,,,,0,,0.0,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
2,idU2,IDwr_M1,IDwr_V1,IDwr_O1,,,,Ground Water,,,,Centroid,WaDE Unspecified,4326,,,,43.58281,-116.17519,,,POU,,POU9,,,ID,,,,,,,,,,,Active,63-30274,MARY ANN HOFMANN,1988-08-31,,,,,,DOMESTIC,,,,,,0,,0.0,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
3,idU3,IDwr_M1,IDwr_V1,IDwr_O1,,,,Ground Water,,,,Centroid,WaDE Unspecified,4326,,,,42.73658,-113.51396,,,POU,,POU12,,,ID,,,,,,,,,,,Active,36-12737,HERBERT W POTEET,1958-12-31,,,,,,DOMESTIC,,,,,,0,,0.0,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
4,idU4,IDwr_M1,IDwr_V1,IDwr_O1,,,,Ground Water,,,,Centroid,WaDE Unspecified,4326,,,,42.73986,-113.48937,,,POU,,POU15,,,ID,,,,,,,,,,,Active,36-12736,HERBERT W POTEET,1958-12-31,,,,,,DOMESTIC,,,,,,0,,0.0,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...


## Concatenate POD and POU Data.  Make needed changes

In [7]:
# Concatenate dataframes
frames = [outPOD, outPOU]  # list all out dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

479459


## Clean Data / data types

In [8]:
# Fix ID Owner Name
# Given ID Owner is not full concatenated record. Use provided xlsx from Danielle Favreau to fix.

fileInput = "RawInputData/ownername_USactingThrough_WaDE_05112023.xlsx"
df_idown = pd.read_excel(fileInput)
df_idown['WRNO'] = df_idown['WRNO'].replace(" ", "").replace("", 0).fillna(0).astype(str).str.strip()
df_idown['OrgName2'] = df_idown['OrgName2'].replace("DIRECTOR PN CODE-", "").astype(str).str.strip()
IdahoOwnerNameFixdict = pd.Series(df_idown.OrgName2.values, index=df_idown.WRNO.astype(str)).to_dict()


# Retreive WaDE Custom water source native ID
def retrieveOwnerName(valID, valOwn):
    try:
        outString = IdahoOwnerNameFixdict[valID]
    except:
        outString = valOwn
    return outString

outdf['in_AllocationOwner'] = outdf.apply(lambda row: retrieveOwnerName(row['in_AllocationNativeID'], row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['SAND SPRINGS RANCH & CO', 'JUDYS IDA MON RANCHES INC',
       'HENRY CREEK RANCH', ..., 'PAUL SCHLOTTERBECK',
       'LAURA CHIEZE DAVIS', 'ROSS WORTHINGTON'], dtype=object)

In [9]:
# Clean name entries of spcial characters
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [10]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Unnamed Stream', 'Badger Creek', 'Henry Creek', ...,
       'Lashbrook Pond', 'Black Pine Creek', 'North Canyon Hollow Creek'],
      dtype=object)

In [11]:
outdf['in_County'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['', 'Wade Unspecified'], dtype=object)

In [12]:
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['', 'Patch Canal', 'Surmeier, Daniel E And Judy L', ...,
       'P16 Pipeline', 'Chuck Slough Submersible Pump',
       'River 2" Intake To Pump'], dtype=object)

In [13]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Sand Springs Ranch Co', 'Judys Ida Mon Ranches Inc',
       'Henry Creek Ranch', ..., 'Paul Schlotterbeck',
       'Laura Chieze Davis', 'Ross Worthington'], dtype=object)

In [14]:
# Ensure Empty String / remove string value of "nan"

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [15]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Unnamed Stream', 'Badger Creek', 'Henry Creek', ...,
       'Lashbrook Pond', 'Black Pine Creek', 'North Canyon Hollow Creek'],
      dtype=object)

In [16]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array([''], dtype=object)

In [17]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array([''], dtype=object)

In [18]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['', 'Patch Canal', 'Surmeier, Daniel E And Judy L', ...,
       'P16 Pipeline', 'Chuck Slough Submersible Pump',
       'River 2" Intake To Pump'], dtype=object)

In [19]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Sand Springs Ranch Co', 'Judys Ida Mon Ranches Inc',
       'Henry Creek Ranch', ..., 'Paul Schlotterbeck',
       'Laura Chieze Davis', 'Ross Worthington'], dtype=object)

In [20]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
uniqueList = list(set([i.strip() for i in ','.join(outdf['in_BeneficialUseCategory'].astype(str)).split(',')]))
uniqueList.sort()
uniqueList

['',
 'ADMINISTRATIVE',
 'AESTHETIC',
 'AESTHETIC FROM STORAGE',
 'AESTHETIC STORAGE',
 'COMMERCIAL',
 'COMMERCIAL FROM STORAGE',
 'COMMERCIAL STORAGE',
 'COOLING',
 'COOLING FROM STORAGE',
 'COOLING STORAGE',
 'DIVERSION TO STORAGE',
 'DOMESTIC',
 'DOMESTIC FROM STORAGE',
 'DOMESTIC STORAGE',
 'FEDERAL RESERVED USE',
 'FIRE PROTECTION',
 'FIRE PROTECTION FROM STORAGE',
 'FIRE PROTECTION STORAGE',
 'FISH HABITAT',
 'FISH HABITAT STORAGE',
 'FISH PROPAGATION',
 'FISH PROPAGATION FROM STORAGE',
 'FISH PROPAGATION STORAGE',
 'FLOOD CONTROL STORAGE',
 'GROUND WATER RECHARGE',
 'GROUND WATER RECHARGE FROM STORAGE',
 'GROUND WATER RECHARGE STORAGE',
 'HEATING',
 'INDUSTRIAL',
 'INDUSTRIAL FROM STORAGE',
 'INDUSTRIAL STORAGE',
 'IRRIGATION',
 'IRRIGATION FROM STORAGE',
 'IRRIGATION STORAGE',
 'LAKE LEVEL MAINTENANCE',
 'MINIMUM STREAM FLOW',
 'MINING',
 'MINING FROM STORAGE',
 'MINING STORAGE',
 'MITIGATION',
 'MITIGATION BY NON-USE',
 'MUNICIPAL',
 'MUNICIPAL FROM STORAGE',
 'MUNICIPAL STORA

In [21]:
# WaterSourceType
# searach water source name for keywords

WaterSourceTypeDict = {
"sub" : "Groundwater",
"ground water" : "Groundwater",
"canal" :  "Surface Water",
"channel" : "Surface Water",
"creek" : "Surface Water",
"ditch" : "Surface Water",
"drain" : "Surface Water",
"drains" : "Surface Water",
"draw" : "Surface Water",
"dry" : "Surface Water",
"fork" : "Surface Water",
"gluch" : "Surface Water",
"gulch": "Surface Water",
"hole" : "Surface Water",
"holes" : "Surface Water",
"hollow"  : "Surface Water",
"lake" :  "Surface Water",
"lakes" :  "Surface Water",
"pond" :  "Surface Water",
"reservoir" : "Surface Water",
"river" : "Surface Water",
"runoff" : "Surface Water",
"seep" : "Surface Water",
"slough" : "Surface Water",
"spring" :  "Surface Water",
"springs" :  "Surface Water",
"spr" :  "Surface Water",
"stream" : "Surface Water",
"streams" : "Surface Water",
"surface" : "Surface Water",
"swamp" : "Surface Water",
"swamps" : "Surface Water",
"wash" : "Surface Water",
"fargo wasteway" : "Surface Water",
"frozen dog wasteway" : "Surface Water",
"tunnel no 7 wasteway" : "Surface Water",
"waste water" : "Reuse",
"wastewater" : "Reuse",
"treated municipal wastewater" : "Reuse"}

def assignWaterSourceType(val):
    val = val.lower().strip()
    if val == "" or pd.isnull(val):
        outList = ""
    elif val == "ground water": 
        outList = "Groundwater"
    else:
        for i in WaterSourceTypeDict.keys():
            if i in val:
                outList = WaterSourceTypeDict[i]
                break
            else:
                outList = ""
    return outList

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: assignWaterSourceType(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Surface Water', 'Groundwater', 'Reuse', ''], dtype=object)

In [22]:
# Ensure Latitude entry is either numireic or a 0
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([43.45636835, 43.404115  , 43.37611246, ..., 43.34448762,
       43.48400252, 44.73104969])

In [23]:
# Ensure Longitude entry is either numireic or a 0
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-115.84801203, -111.84818566, -111.90628416, ..., -114.29687671,
       -114.27674501, -116.04609688])

In [24]:
# Changing datatype of Priority Date to date fields entry
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf["in_AllocationPriorityDate"].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

<DatetimeArray>
['1906-03-01 00:00:00', '1914-05-01 00:00:00', '1922-02-25 00:00:00',
 '1957-06-20 00:00:00', '1957-04-01 00:00:00', '1994-09-01 00:00:00',
 '1972-12-24 00:00:00', '1914-04-13 00:00:00', '1934-06-28 00:00:00',
 '1960-06-01 00:00:00',
 ...
 '1872-05-25 00:00:00', '1897-05-05 00:00:00', '1969-11-26 00:00:00',
 '1893-05-15 00:00:00', '2009-01-06 00:00:00', '1884-09-15 00:00:00',
 '2016-07-07 00:00:00', '2019-05-13 00:00:00', '2021-05-18 00:00:00',
 '2022-04-19 00:00:00']
Length: 27681, dtype: datetime64[ns]

In [25]:
# Ensure Flow entry is either numireic or a 0
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([0.02, '', 0.06, ..., 11.06, 15.7, 14.66], dtype=object)

In [26]:
# Ensure Volume entry is either numireic or a 0
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array(['', 10.8, 14.6, ..., 3532.0, 118.7, 3298.5], dtype=object)

In [27]:
# Creating WaDE Custom water source native ID for easy water source identification
# use unique WaterSourceName and WaterSourceType values
# ----------------------------------------------------------------------------------------------------

# Create temp in_WaterSourceNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_WaterSourceName'] = outdf['in_WaterSourceName'].astype(str).str.strip()
dfTempID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_WaterSourceNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_WaterSourceName'].astype(str) + dfTempID['in_WaterSourceTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_WaterSourceNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_WaterSourceNativeID'], 
                                                                              row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeId1', 'wadeId2', 'wadeId3', ..., 'wadeId6792', 'wadeId6793',
       'wadeId6794'], dtype=object)

In [28]:
# Creating WaDE Custom site native ID for easy site identification
# use Unique Latitude, Longitude, SiteName and SiteTypeCV values
# ----------------------------------------------------------------------------------------------------

# Create temp in_SiteNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_Latitude'] = outdf['in_Latitude'].astype(str).str.strip()
dfTempID['in_Longitude'] = outdf['in_Longitude'].astype(str).str.strip()
dfTempID['in_SiteName'] = outdf['in_SiteName'].astype(str).str.strip()
dfTempID['in_SiteTypeCV'] = outdf['in_SiteTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_SiteNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_Latitude'].astype(str) + dfTempID['in_Longitude'].astype(str) + dfTempID['in_SiteName'].astype(str)+ dfTempID['in_SiteTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_SiteNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB, valC, valD):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip() + str(valC).strip() + str(valD).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_SiteNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_SiteNativeID'], 
                                                                       row['in_Latitude'], row['in_Longitude'],
                                                                       row['in_SiteName'], row['in_SiteTypeCV']), axis=1)
outdf['in_SiteNativeID'].unique()

array(['POD603830', 'POD626789', 'POD657093', ..., 'POU882087',
       'POU882488', 'POU882882'], dtype=object)

## Drop non-Active AllocationLegalStatusCV Water Rights
- For this {state name / organization}, we don't want water rights that are considered: {enter string entries here}

In [29]:
# drop non-active AllocationLegalStatusCV values specific to that state.

# drop the list
dropLegalStatusList = [""] # enter string entries here

# drop rows from above list
outdf = outdf[outdf.in_AllocationLegalStatusCV.isin(dropLegalStatusList) == False].reset_index(drop=True)

print(len(outdf))
outdf['in_AllocationLegalStatusCV'].unique()

479459


array(['Active', 'active'], dtype=object)

## Shapefile Data
- For attaching geometry to POU csv inputs.

In [32]:
# Shapefile input
# use same as input above

dfPoUshapetemp = dfinPOU.copy()
print(len(dfPoUshapetemp))
dfPoUshapetemp.head(1)

215675


Unnamed: 0,WaterRight,BasinNumbe,SequenceNu,SplitSuffi,Status,PriorityDa,DecreedDat,Owner,WaterUse,WaterUseCo,TotalAcres,AcreLimit,Source,SourceQual,TributaryO,Tributar_1,WaterDistr,LargePOU,RightID,PlaceOfUse,WRReport,WRDocs,WRMap,TrustOrNon,cent_Latit,cent_Longi,SHAPE_STAr,SHAPE_STLe,Shape_Leng,Shape_Area,geometry,WaDEUUID
0,33-10900,33,10900,,Active,1934-06-28,2001-10-11,UNITED STATES OF AMERICA ACTING THROUGH,STOCKWATER,4,0.0,0.0,LITTLE LOST RIVER,,SINKS,,33,0,222600,2,https://research.idwr.idaho.gov/apps/waterrigh...,https://research.idwr.idaho.gov/apps/Shared/Lf...,https://maps.idwr.idaho.gov/apps/Shared/PrintM...,,43.9814,-113.16981,8321866.5321,54110.17968,0.55442,0.00093,"MULTIPOLYGON (((-113.09907 43.88546, -113.0940...",idU0


In [33]:
# create temp dataframe to hold native ID and geometry from shapefile input
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)

# assing values to temp dataframe based on shapefile input
# for in_SiteNativeID assure ID value is the same as that listed above for POU info.
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['PlaceOfUse'].replace("", 0).fillna(0).astype(int).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
print(len(dfPoUshape))
dfPoUshape.head()

213632


Unnamed: 0,in_SiteNativeID,geometry
0,POU2,"MULTIPOLYGON (((-113.09907 43.88546, -113.0940..."
1,POU7,"POLYGON ((-116.91055 43.52022, -116.91051 43.5..."
2,POU9,"POLYGON ((-116.17500 43.58271, -116.17537 43.5..."
3,POU12,"POLYGON ((-113.51150 42.73474, -113.51641 42.7..."
4,POU15,"POLYGON ((-113.49018 42.73806, -113.49019 42.7..."


## Export Data

In [34]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 479459 entries, 0 to 479458
Data columns (total 63 columns):
 #   Column                                        Non-Null Count   Dtype         
---  ------                                        --------------   -----         
 0   WaDEUUID                                      479459 non-null  object        
 1   in_MethodUUID                                 479459 non-null  object        
 2   in_VariableSpecificUUID                       479459 non-null  object        
 3   in_OrganizationUUID                           479459 non-null  object        
 4   in_Geometry                                   479459 non-null  object        
 5   in_GNISFeatureNameCV                          479459 non-null  object        
 6   in_WaterQualityIndicatorCV                    479459 non-null  object        
 7   in_WaterSourceName                            479459 non-null  object        
 8   in_WaterSourceNativeID                        479459 n

In [35]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,idD0,IDwr_M1,IDwr_V1,IDwr_O1,,,,Unnamed Stream,wadeId1,Surface Water,,,,4326,,,,43.45637,-115.84801,,,POD,,POD603830,,,ID,,,,,Decreed,,,,,0.02000,Active,63-29423,Sand Springs Ranch Co,1906-03-01,,,,,,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
1,idD1,IDwr_M1,IDwr_V1,IDwr_O1,,,,Badger Creek,wadeId2,Surface Water,,,,4326,,,,43.40412,-111.84819,,,POD,,POD626789,,,ID,,,,,Decreed,,,,,0.02000,Active,25-14121,Judys Ida Mon Ranches Inc,1914-05-01,,,,,,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
2,idD2,IDwr_M1,IDwr_V1,IDwr_O1,,,,Henry Creek,wadeId3,Surface Water,,,,4326,,,,43.37611,-111.90628,,,POD,,POD657093,,,ID,,,,,Decreed,,,,,0.02000,Active,25-13956,Henry Creek Ranch,1922-02-25,,,,,,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
3,idD3,IDwr_M1,IDwr_V1,IDwr_O1,,,,Henry Creek,wadeId3,Surface Water,,,,4326,,,,43.36705,-111.90395,,,POD,,POD657092,,,ID,,,,,Decreed,,,,,0.02000,Active,25-13956,Henry Creek Ranch,1922-02-25,,,,,,STOCKWATER,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
4,idD4,IDwr_M1,IDwr_V1,IDwr_O1,,,,Unnamed Slough,wadeId4,Surface Water,,,,4326,,,,43.43029,-111.30606,,,POD,,POD604944,,,ID,,,,,Decreed,,,,,,Active,23-11213,Stephen J Mc Grath,1957-06-20,,,,,10.80000,WILDLIFE STORAGE,,,,,,0,,,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
479454,idU215670,IDwr_M1,IDwr_V1,IDwr_O1,,,,Big Wood River,wadeId28,Surface Water,,Centroid,Wade Unspecified,4326,,,,43.34449,-114.29688,,,POU,,POU882085,,,ID,,,,,,,,,,,Active,37-22459,Heart Rock Ranch Llc,2010-01-19,,,,,,WILDLIFE STORAGE,,,,,,0,,0.00000,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
479455,idU215671,IDwr_M1,IDwr_V1,IDwr_O1,,,,Big Wood River,wadeId28,Surface Water,,Centroid,Wade Unspecified,4326,,,,43.34449,-114.29688,,,POU,,POU882086,,,ID,,,,,,,,,,,Active,37-22459,Heart Rock Ranch Llc,2010-01-19,,,,,,RECREATION STORAGE,,,,,,0,,0.00000,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
479456,idU215672,IDwr_M1,IDwr_V1,IDwr_O1,,,,Big Wood River,wadeId28,Surface Water,,Centroid,Wade Unspecified,4326,,,,43.34449,-114.29688,,,POU,,POU882087,,,ID,,,,,,,,,,,Active,37-22459,Heart Rock Ranch Llc,2010-01-19,,,,,,AESTHETIC STORAGE,,,,,,0,,0.00000,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...
479457,idU215673,IDwr_M1,IDwr_V1,IDwr_O1,,,,Big Wood River,wadeId28,Surface Water,,Centroid,Wade Unspecified,4326,,,,43.48400,-114.27675,,,POU,,POU882488,,,ID,,,,,,,,,,,Active,37-23393,Eccles Window Rock Ranch Llc,1883-03-24,,,,,,MITIGATION,,,,,,0,,0.00000,,,,,,,,https://research.idwr.idaho.gov/apps/waterrigh...


In [36]:
# Export the output dataframe
# change output name / abbreviation to match native state provdier and wade data type 
outdf.to_csv('RawInputData/Pwr_idMain.zip', compression=dict(method='zip', archive_name='Pwr_idMain.csv'), index=False) # The output, save as a zip
dfPoUshape.to_csv('RawInputData/P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.