# Pre-processing Oklahoma Allocation data for WaDEQA upload.
Date Updated: 04/07/2020
Purpose:  To pre-process the Oklahoma data into one master file for simple DataFrame creation and extraction.  To validate datatypes and other data related informattion.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
import geopandas as gpd # the library that lets us read in shapefiles
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input File
workingDir = "G:/Shared drives/WaDE Data/Oklahoma/WaterAllocation/RawInputData"
os.chdir(workingDir)

## POD Division Data

In [3]:
# Input Files
PGW_Input = "Permitted_Groundwater_Wells_input.csv"
PSWDP_Input = "Permitted_Surface_Water_Diversion_Points_input.csv"

# Dataframe creation
df_PGW = pd.read_csv(PGW_Input)
df_PSWDP = pd.read_csv(PSWDP_Input)

# Concatenate - Both datasets share the same columns.
dfPOD = pd.concat([df_PGW, df_PSWDP], ignore_index=True).reset_index(drop=True)

print(len(dfPOD))
dfPOD.head(1)

24487


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2
0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753


In [4]:
# PODorPOUSite insert
dfPOD['in_PODorPOUSite'] = "POD"
dfPOD.head(3)

Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,in_PODorPOUSite
0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753,POD
1,-101.57512,36.516345,752,50052,20020591,36.516338,-101.575112,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20T00:00:00.000Z,2003-05-03T00:00:00.000Z,,,50052,POD
2,-99.052511,34.582855,944,53324,20040578,34.582849,-99.052503,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53324,POD


In [5]:
#Changing datatype of used date fields. 
dfPOD['DATE_FILED'] = pd.to_datetime(dfPOD['DATE_FILED'], errors = 'coerce')
dfPOD['DATE_FILED'] = pd.to_datetime(dfPOD['DATE_FILED'].dt.strftime('%m/%d/%Y'))

dfPOD['DATE_ISSUED'] = pd.to_datetime(dfPOD['DATE_ISSUED'], errors = 'coerce')
dfPOD['DATE_ISSUED'] = pd.to_datetime(dfPOD['DATE_ISSUED'].dt.strftime('%m/%d/%Y'))

In [6]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEOK_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPOD['LATITUDE']
dfSiteNativeID['in_Longitude'] = dfPOD['LONGITUDE']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOD['in_SiteNativeID'] = dfPOD.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE']), axis=1)
dfPOD['in_SiteNativeID'] = "POD" + dfPOD['in_SiteNativeID'].astype(str)
dfPOD.head(2)

Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,in_PODorPOUSite,in_SiteNativeID
0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20,1999-09-14,,,9753,POD,PODWaDEOK_S1
1,-101.57512,36.516345,752,50052,20020591,36.516338,-101.575112,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20,2003-05-03,,,50052,POD,PODWaDEOK_S2


## Place of Use Data

In [7]:
# Input Files
AOU_Input = "OK_AreasofUse_input.csv"

# Dataframe creation
# dfPOU = pd.read_csv(AOU_Input, encoding = "ISO-8859-1")
dfPOU = pd.read_csv(AOU_Input)

print(len(dfPOU))
dfPOU.head(3)

4349


Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.514148,-97.219024
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.514137,-97.21464
2,279,80,52093,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,SE,NE,23,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.544898,-97.266883


In [8]:
# PODorPOUSite insert
dfPOU['in_PODorPOUSite'] = "POU"
dfPOU.head(3)

Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE,in_PODorPOUSite
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.514148,-97.219024,POU
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",8/11/1976 0:00,12/14/1976 0:00,11130303,1081,34.514137,-97.21464,POU
2,279,80,52093,19580175,Permit,Surface Water,Active,"Arbuckle Enterprises, LLC",,SE,NE,23,01N,01WI,Garvin,Vested,200.0,Irrigation,4/16/1958 0:00,8/12/1969 0:00,11130303,1081,34.544898,-97.266883,POU


In [9]:
#Changing datatype of used date fields. 
dfPOU['DATE_FILED'] = pd.to_datetime(dfPOU['DATE_FILED'], errors = 'coerce')
dfPOU['DATE_FILED'] = pd.to_datetime(dfPOU['DATE_FILED'].dt.strftime('%m/%d/%Y'))

dfPOU['DATE_ISSUED'] = pd.to_datetime(dfPOU['DATE_ISSUED'], errors = 'coerce')
dfPOU['DATE_ISSUED'] = pd.to_datetime(dfPOU['DATE_ISSUED'].dt.strftime('%m/%d/%Y'))

In [10]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEOK_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPOU['LATITUDE']
dfSiteNativeID['in_Longitude'] = dfPOU['LONGITUDE']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPOU['in_SiteNativeID'] = dfPOU.apply(lambda row: retrieveSiteNativeID( row['LATITUDE'], row['LONGITUDE']), axis=1)
dfPOU['in_SiteNativeID'] = "POU" + dfPOU['in_SiteNativeID'].astype(str)
dfPOU.head(2)

Unnamed: 0,OID_,OBJECTID,RECORD_ID,PERMIT_NUMBER,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,LATITUDE,LONGITUDE,in_PODorPOUSite,in_SiteNativeID
0,359,69,15030,19760098,Permit,Surface Water,Active,"Cline, Judy",,W2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",1976-08-11,1976-12-14,11130303,1081,34.514148,-97.219024,POU,POUWaDEOK_S1
1,358,70,15501,19760098,Permit,Surface Water,Active,"Cline, Judy",,E2,E2,32,01N,01EI,Garvin,Regular,30.0,"Recreation, Fish, Wildlife",1976-08-11,1976-12-14,11130303,1081,34.514137,-97.21464,POU,POUWaDEOK_S2


## Concaenate POD and POU

In [11]:
# Concatenate
# Both datasets share the same columns.
df = pd.concat([dfPOD, dfPOU], ignore_index=True).reset_index(drop=True)

print(len(df))
df.head()

28836


Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,in_PODorPOUSite,in_SiteNativeID,OID_
0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20,1999-09-14,,,9753.0,POD,PODWaDEOK_S1,
1,-101.57512,36.516345,752,50052,20020591,36.516338,-101.575112,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20,2003-05-03,,,50052.0,POD,PODWaDEOK_S2,
2,-99.052511,34.582855,944,53324,20040578,34.582849,-99.052503,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53324.0,POD,PODWaDEOK_S3,
3,-99.050317,34.590121,954,53325,20040578,34.590116,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,SW,NW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53325.0,POD,PODWaDEOK_S4,
4,-99.050317,34.586494,945,53326,20040578,34.586489,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,NW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53326.0,POD,PODWaDEOK_S5,


## Data Fix

In [12]:
#Fixing Beneficial Uses PRIMARY_PURPOSE
def fixRecFishWild(colrowValue):
    if colrowValue == 'Recreation, Fish, Wildlife':
        outList = 'Recreation Fish Wildlife'
    else:
        outList = colrowValue
    return outList

df['PRIMARY_PURPOSE'] = df.apply(lambda row: fixRecFishWild(row['PRIMARY_PURPOSE']), axis=1)

In [13]:
# swapping order owner name

def createOwnerName(val):
    if val == "" or pd.isnull(val):
        outString = ""
    else:
        val = str(val)
        val = val.strip()
        if "," in val:
            x = val.split(",")
            outString = str(x[0]).strip() + " " + str(x[1]).strip()
        else:
            outString = val
            
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: createOwnerName(row['ENTITY_NAME']), axis=1)
df.head()

Unnamed: 0,X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2,in_PODorPOUSite,in_SiteNativeID,OID_,in_AllocationOwner
0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.89634,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20,1999-09-14,,,9753.0,POD,PODWaDEOK_S1,,Prestage Farms of Oklahoma LLC
1,-101.57512,36.516345,752,50052,20020591,36.516338,-101.575112,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20,2003-05-03,,,50052.0,POD,PODWaDEOK_S2,,Long Family Partnership
2,-99.052511,34.582855,944,53324,20040578,34.582849,-99.052503,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53324.0,POD,PODWaDEOK_S3,,McElroy Johnny R and Dana D
3,-99.050317,34.590121,954,53325,20040578,34.590116,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,SW,NW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53325.0,POD,PODWaDEOK_S4,,McElroy Johnny R and Dana D
4,-99.050317,34.586494,945,53326,20040578,34.586489,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,NW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07,2005-05-10,,,53326.0,POD,PODWaDEOK_S5,,McElroy Johnny R and Dana D


## Shapefile Data
- For attaching gemetry to csv inputs.

In [14]:
# PoU Shapefile Data
# Shapefile input
ShapeFileInput = gpd.read_file('shapefile/OK_PoU.shp')
dfPoUshapetemp = pd.DataFrame(ShapeFileInput)
dfPoUshapetemp.head(3)

Unnamed: 0,OBJECTID,RECORD_ID,PERMIT_NUM,RECORD_TYP,WATER,STATUS,ENTITY_NAM,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYP,TOTAL_PERM,PRIMARY_PU,DATE_FILED,DATE_ISSUE,HYDRO_UNIT,STREAM_SYS,Lattitude,Longitude,geometry
0,3793,18547,19980032,Permit,Surface Water,Active,"Ralston, Leo",,SE,NE,30,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.617575,-95.342249,"POLYGON ((-95.34225 34.61576, -95.34444 34.615..."
1,3792,20509,19980032,Permit,Surface Water,Active,"Ralston, Leo",,E2,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.619445,-95.333524,"POLYGON ((-95.33572 34.61944, -95.33572 34.623..."
2,3791,20003,19980032,Permit,Surface Water,Active,"Ralston, Leo",E2,NW,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.621256,-95.336819,"POLYGON ((-95.33572 34.61944, -95.33792 34.619..."


In [15]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEOK_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPoUshapetemp['Lattitude']
dfSiteNativeID['in_Longitude'] = dfPoUshapetemp['Longitude']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPoUshapetemp['in_SiteNativeID'] = dfPoUshapetemp.apply(lambda row: retrieveSiteNativeID( row['Lattitude'], row['Longitude']), axis=1)
dfPoUshapetemp.head(2)

Unnamed: 0,OBJECTID,RECORD_ID,PERMIT_NUM,RECORD_TYP,WATER,STATUS,ENTITY_NAM,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYP,TOTAL_PERM,PRIMARY_PU,DATE_FILED,DATE_ISSUE,HYDRO_UNIT,STREAM_SYS,Lattitude,Longitude,geometry,in_SiteNativeID
0,3793,18547,19980032,Permit,Surface Water,Active,"Ralston, Leo",,SE,NE,30,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.617575,-95.342249,"POLYGON ((-95.34225 34.61576, -95.34444 34.615...",WaDEOK_S1
1,3792,20509,19980032,Permit,Surface Water,Active,"Ralston, Leo",,E2,NW,29,02N,19EI,Pushmataha,Regular,228.0,Irrigation,1998-08-21,1998-11-10,11140105,1030,34.619445,-95.333524,"POLYGON ((-95.33572 34.61944, -95.33572 34.623...",WaDEOK_S2


In [16]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['in_SiteNativeID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
print(len(dfPoUshape))
dfPoUshape.head(3)

4151


Unnamed: 0,in_SiteNativeID,geometry
0,POUWaDEOK_S1,"POLYGON ((-95.34225 34.61576, -95.34444 34.615..."
1,POUWaDEOK_S2,"POLYGON ((-95.33572 34.61944, -95.33572 34.623..."
2,POUWaDEOK_S3,"POLYGON ((-95.33572 34.61944, -95.33792 34.619..."


In [17]:
dfPoUshape['in_SiteNativeID'].unique()

array(['POUWaDEOK_S1', 'POUWaDEOK_S2', 'POUWaDEOK_S3', ...,
       'POUWaDEOK_S4149', 'POUWaDEOK_S4150', 'POUWaDEOK_S4151'],
      dtype=object)

## Export Data

In [18]:
#Exporting to Finished File
df.to_csv('P_OklahomaMaster.csv', index=False)  # The output
dfPoUshape.to_csv('P_OklahomaGeometry.csv', index=False) # The output geometry.