# Pre-processing Oklahoma Allocation data for WaDEQA upload.
Date Updated: 04/07/2020
Purpose:  To pre-process the Oklahoma data into one master file for simple DataFrame creation and extraction.  To validate datatypes and other data related informattion.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
#Working Directory and Input File
workingDir = "G:/Shared drives/WaDE Data/Oklahoma/WaterAllocation/RawInputData"
os.chdir(workingDir)

## POD Division Data
- groundwater wells
- surface water divisions

In [3]:
# groundwater
# Input File
PGW_Input = "Permitted_Groundwater_Wells_input.zip"
df_PGW = pd.read_csv(PGW_Input)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_PGW:
    df_PGW['WaDEUUID'] = "okGD" + df_PGW.index.astype(str)
    df_PGW.to_csv('Permitted_Groundwater_Wells_input.zip', compression=dict(method='zip', archive_name='Permitted_Groundwater_Wells_input.csv'), index=False)

print(len(df_PGW))
df_PGW.head()

21076


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID
0,-101.89635,36.57473,561,9753,19980623,36.57473,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753,okGD0
1,-101.57512,36.51634,752,50052,20020591,36.51634,-101.57511,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20T00:00:00.000Z,2003-05-03T00:00:00.000Z,,,50052,okGD1
2,-99.05251,34.58286,944,53324,20040578,34.58285,-99.0525,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53324,okGD2
3,-99.05032,34.59012,954,53325,20040578,34.59012,-99.05031,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,SW,NW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53325,okGD3
4,-99.05032,34.58649,945,53326,20040578,34.58649,-99.05031,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,NW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53326,okGD4


In [4]:
# surface Water
# Input File
PSWDP_Input = "Permitted_Surface_Water_Diversion_Points_input.csv"
df_PSWDP = pd.read_csv(PSWDP_Input)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_PSWDP:
    df_PSWDP['WaDEUUID'] = "okSD" + df_PSWDP.index.astype(str)
    df_PSWDP.to_csv('Permitted_Surface_Water_Diversion_Points_input.zip', compression=dict(method='zip', archive_name='Permitted_Surface_Water_Diversion_Points_input.csv'), index=False)

print(len(df_PSWDP))
df_PSWDP.head()

3411


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID
0,-95.34167,34.61633,3452,18546,19980032,34.61633,-95.34167,Permit,Surface Water,Active,"Ralston, Leo",SE,SE,NE,30,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21T00:00:00.000Z,1998-11-10T00:00:00.000Z,11140105.0,1030.0,18546,okSD0
1,-94.64993,34.98829,10252,16743,19980036,34.98829,-94.64992,Permit,Surface Water,Active,"Neff, Tommy Gene",NW,SE,SE,15,06N,25EI,Le Flore,Regular,317.0,Irrigation,1998-09-02T00:00:00.000Z,1998-12-08T00:00:00.000Z,11110105.0,2010.0,16743,okSD1
2,-96.2121,33.96548,11360,37886,19990006,33.96547,-96.21209,Permit,Surface Water,Active,"Brewer, C J",SE,NW,NW,12,07S,10EI,Bryan,Regular,66.0,Irrigation,1999-02-01T00:00:00.000Z,1999-04-13T00:00:00.000Z,11140102.0,1060.0,37886,okSD2
3,-98.44861,35.43416,15855,13868,19990014,35.43416,-98.44861,Permit,Surface Water,Active,"Duncan, George",NE,NE,NE,15,11N,12WI,Caddo,Regular,125.0,"Recreation, Fish, Wildlife",1999-04-08T00:00:00.000Z,1999-06-08T00:00:00.000Z,11130302.0,1083.0,13868,okSD3
4,-95.84907,34.36722,4237,26136,19990035,34.36721,-95.84906,Permit,Surface Water,Active,Stream Natural Resources L C,NE,NE,SE,20,02S,14EI,Atoka,Regular,396.0,"Recreation, Fish, Wildlife",1999-09-27T00:00:00.000Z,2003-01-14T00:00:00.000Z,11140103.0,1041.0,26136,okSD4


In [5]:
# Concatenate - Both datasets share the same columns.
dfPOD = pd.concat([df_PGW, df_PSWDP], ignore_index=True).reset_index(drop=True)

print(len(dfPOD))
dfPOD.head(1)

24487


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID
0,-101.89635,36.57473,561,9753,19980623,36.57473,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753,okGD0


In [6]:
# PODorPOUSite insert
dfPOD['in_PODorPOUSite'] = "POD"
dfPOD.head(3)

Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID,in_PODorPOUSite
0,-101.89635,36.57473,561,9753,19980623,36.57473,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753,okGD0,POD
1,-101.57512,36.51634,752,50052,20020591,36.51634,-101.57511,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20T00:00:00.000Z,2003-05-03T00:00:00.000Z,,,50052,okGD1,POD
2,-99.05251,34.58286,944,53324,20040578,34.58285,-99.0525,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53324,okGD2,POD


In [7]:
#Changing datatype of used date fields. 
dfPOD['DATE_FILED'] = pd.to_datetime(dfPOD['DATE_FILED'], errors = 'coerce')
dfPOD['DATE_FILED'] = pd.to_datetime(dfPOD['DATE_FILED'].dt.strftime('%m/%d/%Y'))

dfPOD['DATE_ISSUED'] = pd.to_datetime(dfPOD['DATE_ISSUED'], errors = 'coerce')
dfPOD['DATE_ISSUED'] = pd.to_datetime(dfPOD['DATE_ISSUED'].dt.strftime('%m/%d/%Y'))

In [8]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPOD['LATITUDE']
dfSiteNativeID['in_Longitude'] = dfPOD['LONGITUDE']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOD['in_SiteNativeID'] = dfPOD.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE']), axis=1)
dfPOD['in_SiteNativeID'] = "POD" + dfPOD['in_SiteNativeID'].astype(str)
dfPOD.head(2)

Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID,in_PODorPOUSite,in_SiteNativeID
0,-101.89635,36.57473,561,9753,19980623,36.57473,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20,1999-09-14,,,9753,okGD0,POD,PODwadeID1
1,-101.57512,36.51634,752,50052,20020591,36.51634,-101.57511,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20,2003-05-03,,,50052,okGD1,POD,PODwadeID2


## Place of Use Data

In [9]:
# Input File
AOU_Input = "OK_AreasofUse_input.zip"
dfPOU = pd.read_csv(AOU_Input)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfPOU:
    dfPOU['WaDEUUID'] = "okU" + dfPOU.index.astype(str)
    dfPOU.to_csv('OK_AreasofUse_input.zip', compression=dict(method='zip', archive_name='OK_AreasofUse_input.csv'), index=False)

print(len(dfPOU))
dfPOU.head()

4349


Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE,WaDEUUID
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.51415,-97.21902,okU0
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.51414,-97.21464,okU1
2,279,80,52093,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,SE,NE,23,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.5449,-97.26688,okU2
3,280,81,52094,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,,SE,23,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.53949,-97.26909,okU3
4,277,82,52091,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,SW,NW,24,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.54484,-97.26262,okU4


In [10]:
# PODorPOUSite insert
dfPOU['in_PODorPOUSite'] = "POU"
dfPOU.head(3)

Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE,WaDEUUID,in_PODorPOUSite
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.51415,-97.21902,okU0,POU
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.51414,-97.21464,okU1,POU
2,279,80,52093,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,SE,NE,23,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.5449,-97.26688,okU2,POU


In [11]:
#Changing datatype of used date fields. 
dfPOU['DATE_FILED'] = pd.to_datetime(dfPOU['DATE_FILED'], errors = 'coerce')
dfPOU['DATE_FILED'] = pd.to_datetime(dfPOU['DATE_FILED'].dt.strftime('%m/%d/%Y'))

dfPOU['DATE_ISSUED'] = pd.to_datetime(dfPOU['DATE_ISSUED'], errors = 'coerce')
dfPOU['DATE_ISSUED'] = pd.to_datetime(dfPOU['DATE_ISSUED'].dt.strftime('%m/%d/%Y'))

In [12]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPOU['LATITUDE']
dfSiteNativeID['in_Longitude'] = dfPOU['LONGITUDE']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOU['in_SiteNativeID'] = dfPOU.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE']), axis=1)
dfPOU['in_SiteNativeID'] = "POU" + dfPOU['in_SiteNativeID'].astype(str)
dfPOU.head(2)

Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE,WaDEUUID,in_PODorPOUSite,in_SiteNativeID
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",1976-08-11,1976-12-14,11130303,1081,34.51415,-97.21902,okU0,POU,POUwadeID1
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",1976-08-11,1976-12-14,11130303,1081,34.51414,-97.21464,okU1,POU,POUwadeID2


## Concaenate POD and POU

In [13]:
# Concatenate
# Both datasets share the same columns.
dfin = pd.concat([dfPOD, dfPOU], ignore_index=True).reset_index(drop=True).replace(np.nan, '')

print(len(dfin))
dfin.head()

28836


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,WaDEUUID,in_PODorPOUSite,in_SiteNativeID,OID_
0,-101.89635,36.57473,561,9753,19980623,36.57473,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20,1999-09-14,,,9753.0,okGD0,POD,PODwadeID1,
1,-101.57512,36.51634,752,50052,20020591,36.51634,-101.57511,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20,2003-05-03,,,50052.0,okGD1,POD,PODwadeID2,
2,-99.05251,34.58286,944,53324,20040578,34.58285,-99.0525,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53324.0,okGD2,POD,PODwadeID3,
3,-99.05032,34.59012,954,53325,20040578,34.59012,-99.05031,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,SW,NW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53325.0,okGD3,POD,PODwadeID4,
4,-99.05032,34.58649,945,53326,20040578,34.58649,-99.05031,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,NW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53326.0,okGD4,POD,PODwadeID5,


In [14]:
#Fixing Beneficial Uses PRIMARY_PURPOSE
def fixRecFishWild(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == 'Recreation, Fish, Wildlife':
        outList = 'Recreation Fish Wildlife'
    else:
        outList = colrowValue
    return outList

dfin['PRIMARY_PURPOSE'] = dfin.apply(lambda row: fixRecFishWild(row['PRIMARY_PURPOSE']), axis=1)
dfin['PRIMARY_PURPOSE'].unique()

array(['Agriculture', 'Irrigation', 'Industrial', 'Public Supply',
       'Commercial', 'Power', 'Recreation Fish Wildlife', 'Mining',
       'Other', ''], dtype=object)

In [15]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfin['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "OKwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "OKwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "OKwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = "WaDE Unspecified"
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = dfin['WATER']

# Site Info
df['in_CoordinateAccuracy'] = "WaDE Unspecified"
df['in_CoordinateMethodCV'] = "WaDE Unspecified"
df['in_County'] = dfin['COUNTY']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = dfin['HYDRO_UNIT']
df['in_Latitude'] = dfin['LATITUDE']
df['in_Longitude'] = dfin['LONGITUDE']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = dfin['in_PODorPOUSite'] # see above
df['in_SiteName'] = "WaDE Unspecified"
df['in_SiteNativeID'] = dfin['in_SiteNativeID']
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = "WaDE Unspecified"
df['in_StateCV'] = "OK"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = dfin['DATE_FILED']
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = ""
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = ""
df['in_AllocationLegalStatusCV'] = dfin['STATUS']
df['in_AllocationNativeID'] =  dfin['PERMIT_NUMBER'].replace("", 0).fillna(0).astype(str)
df['in_AllocationOwner'] = dfin['ENTITY_NAME']
df['in_AllocationPriorityDate'] = dfin['DATE_ISSUED']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = ""
df['in_AllocationTimeframeStart'] = ""
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfin['TOTAL_PERMITTED_ACRE_FEET']
df['in_BeneficialUseCategory'] = dfin['PRIMARY_PURPOSE']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = ""

outdf = df.copy()
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outdf))
outdf.head()

28836


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,okGD0,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Texas,4326,,,,36.57473,-101.89634,,,POD,WaDE Unspecified,PODwadeID1,,WaDE Unspecified,OK,,1998-11-20,,,,,,,,,Active,19980623,"Prestage Farms of Oklahoma, LLC",1999-09-14,,,,,10.0,Agriculture,,,,,,0,,,,,,,,,,
1,okGD1,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Texas,4326,,,,36.51634,-101.57511,,,POD,WaDE Unspecified,PODwadeID2,,WaDE Unspecified,OK,,2002-09-20,,,,,,,,,Active,20020591,Long Family Partnership,2003-05-03,,,,,1280.0,Irrigation,,,,,,0,,,,,,,,,,
2,okGD2,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.58285,-99.0525,,,POD,WaDE Unspecified,PODwadeID3,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,,Active,20040578,"McElroy, Johnny R and Dana D",2005-05-10,,,,,314.0,Irrigation,,,,,,0,,,,,,,,,,
3,okGD3,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.59012,-99.05031,,,POD,WaDE Unspecified,PODwadeID4,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,,Active,20040578,"McElroy, Johnny R and Dana D",2005-05-10,,,,,314.0,Irrigation,,,,,,0,,,,,,,,,,
4,okGD4,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.58649,-99.05031,,,POD,WaDE Unspecified,PODwadeID5,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,,Active,20040578,"McElroy, Johnny R and Dana D",2005-05-10,,,,,314.0,Irrigation,,,,,,0,,,,,,,,,,


## Data Fixes

In [16]:
# Clean owner name up
def cleanOwnerDataFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;,/\)(-]", "", Val).title().strip()
    return Val
outdf['in_AllocationOwner'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Prestage Farms Of Oklahoma Llc', 'Long Family Partnership',
       'Mcelroy Johnny R And Dana D', ...,
       'Adams Revocable Trusts Thomas L  Susan R', 'Gossett Stan  Deanna',
       'Kinchen Casey'], dtype=object)

In [17]:
# Fixing empty string names

def fixEmptyString(val):
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = "WaDE Unspecified"
    else:
        outString = val
    return outString

In [18]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array(['Active', 'Pending'], dtype=object)

In [19]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: fixEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Prestage Farms Of Oklahoma Llc', 'Long Family Partnership',
       'Mcelroy Johnny R And Dana D', ...,
       'Adams Revocable Trusts Thomas L  Susan R', 'Gossett Stan  Deanna',
       'Kinchen Casey'], dtype=object)

In [20]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Agriculture', 'Irrigation', 'Industrial', 'Public Supply',
       'Commercial', 'Power', 'Recreation Fish Wildlife', 'Mining',
       'Other', 'WaDE Unspecified'], dtype=object)

In [21]:
# in_Latitude & in_Longitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna(0)
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna(0)
outdf.head(1)

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,okGD0,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,,Groundwater,WaDE Unspecified,WaDE Unspecified,Texas,4326,,,,36.57473,-101.89634,,,POD,WaDE Unspecified,PODwadeID1,,WaDE Unspecified,OK,,1998-11-20,,,,,,,,,Active,19980623,Prestage Farms Of Oklahoma Llc,1999-09-14,,,,,10.0,Agriculture,,,,,,0,,,,,,,,,,


In [22]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

array(['1999-09-14T00:00:00.000000000', '2003-05-03T00:00:00.000000000',
       '2005-05-10T00:00:00.000000000', ...,
       '2020-09-15T00:00:00.000000000', '2021-03-16T00:00:00.000000000',
       '2021-04-20T00:00:00.000000000'], dtype='datetime64[ns]')

In [23]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').fillna(0)
outdf['in_AllocationFlow_CFS'].unique()

array([0.])

In [24]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').fillna(0)
outdf['in_AllocationVolume_AF'].unique()

array([  10. , 1280. ,  314. , ..., 1968. ,  295.5,  473.4])

In [25]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
    if not (ml.empty):  # check if the series is empty
        outList = ml.iloc[0]
    else:
        outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2'], dtype=object)

## Shapefile Data
- For attaching gemetry to csv inputs.

In [26]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/OK_PoU2.shp')
dfPoUshapetemp.head(3)

Unnamed: 0,OBJECTID,RECORD_ID,PERMIT_NUM,RECORD_TYP,WATER,STATUS,ENTITY_NAM,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYP,TOTAL_PERM,PRIMARY_PU,DATE_FILED,DATE_ISSUE,HYDRO_UNIT,STREAM_SYS,Lattitude,Longitude,geometry
0,3793,18547,19980032,Permit,Surface Water,Active,"Ralston, Leo",,SE,NE,30,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.61758,-95.34225,"POLYGON ((-95.34225 34.61576, -95.34444 34.615..."
1,3792,20509,19980032,Permit,Surface Water,Active,"Ralston, Leo",,E2,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.61944,-95.33352,"POLYGON ((-95.33572 34.61944, -95.33572 34.623..."
2,3791,20003,19980032,Permit,Surface Water,Active,"Ralston, Leo",E2,NW,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.62126,-95.33682,"POLYGON ((-95.33572 34.61944, -95.33792 34.619..."


In [27]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPoUshapetemp['Lattitude']
dfSiteNativeID['in_Longitude'] = dfPoUshapetemp['Longitude']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPoUshapetemp['in_SiteNativeID'] = dfPoUshapetemp.apply(lambda row: retrieveSiteNativeID( row['Lattitude'], row['Longitude']), axis=1)
dfPoUshapetemp.head(2)

Unnamed: 0,OBJECTID,RECORD_ID,PERMIT_NUM,RECORD_TYP,WATER,STATUS,ENTITY_NAM,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYP,TOTAL_PERM,PRIMARY_PU,DATE_FILED,DATE_ISSUE,HYDRO_UNIT,STREAM_SYS,Lattitude,Longitude,geometry,in_SiteNativeID
0,3793,18547,19980032,Permit,Surface Water,Active,"Ralston, Leo",,SE,NE,30,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.61758,-95.34225,"POLYGON ((-95.34225 34.61576, -95.34444 34.615...",wadeID1
1,3792,20509,19980032,Permit,Surface Water,Active,"Ralston, Leo",,E2,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.61944,-95.33352,"POLYGON ((-95.33572 34.61944, -95.33572 34.623...",wadeID2


In [28]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['in_SiteNativeID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
print(len(dfPoUshape))
dfPoUshape.head(3)

4151


Unnamed: 0,in_SiteNativeID,geometry
0,POUwadeID1,"POLYGON ((-95.34225 34.61576, -95.34444 34.615..."
1,POUwadeID2,"POLYGON ((-95.33572 34.61944, -95.33572 34.623..."
2,POUwadeID3,"POLYGON ((-95.33572 34.61944, -95.33792 34.619..."


## Export Data

In [29]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28836 entries, 0 to 28835
Data columns (total 63 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   WaDEUUID                                      28836 non-null  object        
 1   in_MethodUUID                                 28836 non-null  object        
 2   in_VariableSpecificUUID                       28836 non-null  object        
 3   in_OrganizationUUID                           28836 non-null  object        
 4   in_Geometry                                   28836 non-null  object        
 5   in_GNISFeatureNameCV                          28836 non-null  object        
 6   in_WaterQualityIndicatorCV                    28836 non-null  object        
 7   in_WaterSourceName                            28836 non-null  object        
 8   in_WaterSourceNativeID                        28836 non-null  obje

In [30]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,okGD0,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID1,Groundwater,WaDE Unspecified,WaDE Unspecified,Texas,4326,,,,36.57473,-101.89634,,,POD,WaDE Unspecified,PODwadeID1,,WaDE Unspecified,OK,,1998-11-20,,,,,,,,0.00000,Active,19980623,Prestage Farms Of Oklahoma Llc,1999-09-14,,,,,10.00000,Agriculture,,,,,,0,,,,,,,,,,
1,okGD1,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID1,Groundwater,WaDE Unspecified,WaDE Unspecified,Texas,4326,,,,36.51634,-101.57511,,,POD,WaDE Unspecified,PODwadeID2,,WaDE Unspecified,OK,,2002-09-20,,,,,,,,0.00000,Active,20020591,Long Family Partnership,2003-05-03,,,,,1280.00000,Irrigation,,,,,,0,,,,,,,,,,
2,okGD2,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID1,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.58285,-99.05250,,,POD,WaDE Unspecified,PODwadeID3,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,0.00000,Active,20040578,Mcelroy Johnny R And Dana D,2005-05-10,,,,,314.00000,Irrigation,,,,,,0,,,,,,,,,,
3,okGD3,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID1,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.59012,-99.05031,,,POD,WaDE Unspecified,PODwadeID4,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,0.00000,Active,20040578,Mcelroy Johnny R And Dana D,2005-05-10,,,,,314.00000,Irrigation,,,,,,0,,,,,,,,,,
4,okGD4,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID1,Groundwater,WaDE Unspecified,WaDE Unspecified,Tillman,4326,,,,34.58649,-99.05031,,,POD,WaDE Unspecified,PODwadeID5,,WaDE Unspecified,OK,,2004-09-07,,,,,,,,0.00000,Active,20040578,Mcelroy Johnny R And Dana D,2005-05-10,,,,,314.00000,Irrigation,,,,,,0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28831,okU4344,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID2,Surface Water,WaDE Unspecified,WaDE Unspecified,Grant,4326,,,,36.73521,-97.88601,,,POU,WaDE Unspecified,POUwadeID4147,,WaDE Unspecified,OK,,2020-09-29,,,,,,,,0.00000,Active,20200018,Kinchen Casey,2021-03-16,,,,,32.00000,Irrigation,,,,,,0,,,,,,,,,,
28832,okU4345,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID2,Surface Water,WaDE Unspecified,WaDE Unspecified,Grant,4326,,,,36.73340,-97.88102,,,POU,WaDE Unspecified,POUwadeID4148,,WaDE Unspecified,OK,,2020-09-29,,,,,,,,0.00000,Active,20200018,Kinchen Casey,2021-03-16,,,,,32.00000,Irrigation,,,,,,0,,,,,,,,,,
28833,okU4346,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID2,Surface Water,WaDE Unspecified,WaDE Unspecified,Grant,4326,,,,36.73459,-97.88387,,,POU,WaDE Unspecified,POUwadeID4149,,WaDE Unspecified,OK,,2020-09-29,,,,,,,,0.00000,Active,20200018,Kinchen Casey,2021-03-16,,,,,32.00000,Irrigation,,,,,,0,,,,,,,,,,
28834,okU4347,OKwr_M1,OKwr_V1,OKwr_O1,,,,WaDE Unspecified,wadeID2,Surface Water,WaDE Unspecified,WaDE Unspecified,Grant,4326,,,,36.73309,-97.88340,,,POU,WaDE Unspecified,POUwadeID4150,,WaDE Unspecified,OK,,2020-09-29,,,,,,,,0.00000,Active,20200018,Kinchen Casey,2021-03-16,,,,,32.00000,Irrigation,,,,,,0,,,,,,,,,,


In [31]:
# Export the output dataframe
outdf.to_csv('Pwr_okMain.zip', index=False, compression="zip")  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.