# Pre-processing Oklahoma Allocation data for WaDEQA upload.
Date Updated: 04/07/2020
Purpose:  To pre-process the Oklahoma data into one master file for simple DataFrame creation and extraction.  To validate datatypes and other data related informattion.

Useful Links to Data:

Permitted Surface Water Diversion Points
http://home-owrb.opendata.arcgis.com/datasets/permitted-surface-water-diversion-points?geometry=-119.379%2C31.373%2C-77.565%2C37.701  

Permitted Groundwater Wells (Point coverage)
http://home-owrb.opendata.arcgis.com/datasets/permitted-groundwater-wells  

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Oklahoma/WaterAllocation/RawInputData"
os.chdir(workingDir)

PGW_Input = "Permitted_Groundwater_Wells.csv"
PSWDP_Input = "Permitted_Surface_Water_Diversion_Points.csv"

In [3]:
#Dataframe creation
df_PGW = pd.read_csv(PGW_Input, encoding = "ISO-8859-1")
df_PSWDP = pd.read_csv(PSWDP_Input, encoding = "ISO-8859-1")

#Merge / Concatenate
#Both datasets share the same columns.  We will concatenate both dataframes together into one long dataframe instead of merging.
df = pd.concat([df_PGW, df_PSWDP], ignore_index=True).reset_index()

In [4]:
df

Unnamed: 0,index,ï»¿X,Y,OBJECTID,RECORD_ID,PERMIT_NUMBER,LATITUDE,LONGITUDE,RECORD_TYPE,WATER,STATUS,ENTITY_NAME,QUARTER3,QUARTER2,QUARTER1,SECTION,TOWNSHIP,RANGE,COUNTY,PERMIT_TYPE,TOTAL_PERMITTED_ACRE_FEET,PRIMARY_PURPOSE,DATE_FILED,DATE_ISSUED,HYDRO_UNIT,STREAM_SYSTEM,RECORD_ID2
0,0,-101.896349,36.574734,561,9753,19980623,36.574728,-101.896340,Permit,Groundwater,Active,"Prestage Farms of Oklahoma, LLC",SW,SW,SE,5,01N,11EC,Texas,Regular,10.0,Agriculture,1998-11-20T00:00:00.000Z,1999-09-14T00:00:00.000Z,,,9753
1,1,-101.575120,36.516345,752,50052,20020591,36.516338,-101.575112,Permit,Groundwater,Active,Long Family Partnership,SE,SE,SW,29,01N,14EC,Texas,Regular,1280.0,Irrigation,2002-09-20T00:00:00.000Z,2003-05-03T00:00:00.000Z,,,50052
2,2,-99.052511,34.582855,944,53324,20040578,34.582849,-99.052503,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NW,SW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53324
3,3,-99.050317,34.590121,954,53325,20040578,34.590116,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,SW,NW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53325
4,4,-99.050317,34.586494,945,53326,20040578,34.586489,-99.050308,Permit,Groundwater,Active,"McElroy, Johnny R and Dana D",NE,NW,SW,1,01N,18WI,Tillman,Regular,314.0,Irrigation,2004-09-07T00:00:00.000Z,2005-05-10T00:00:00.000Z,,,53326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24482,24482,-94.758234,33.745804,14060,57263,20080020,33.745797,-94.758227,Permit,Surface Water,Active,OKFA LLC,NW,SE,NW,25,09S,24EI,McCurtain,Regular,300.0,Irrigation,2008-07-18T00:00:00.000Z,2008-11-12T00:00:00.000Z,11140106.0,1010.0,57263
24483,24483,-97.554256,34.717414,4469,57330,19680182,34.717408,-97.554248,Permit Application,Surface Water,Pending,"Duncan, City of",,,,19,03N,03WI,Garvin,,7000.0,Public Supply,1968-04-18T00:00:00.000Z,,11130303.0,1081.0,57330
24484,24484,-99.775547,35.642717,19610,57356,19690112,35.642711,-99.775539,Permit Application,Surface Water,Pending,"Tracy, Dale",,,S2,32,14N,24WI,Roger Mills,,200.0,Irrigation,1969-02-20T00:00:00.000Z,,11130301.0,1084.0,57356
24485,24485,-98.450240,35.157975,11974,57315,19790012,35.157969,-98.450232,Permit Application,Surface Water,Pending,"Chickasha, City of",SW,NW,NE,22,08N,12WI,Caddo,,5000.0,Public Supply,1979-01-26T00:00:00.000Z,,11130302.0,1083.0,57315


In [5]:
df.columns

Index(['index', 'ï»¿X', 'Y', 'OBJECTID', 'RECORD_ID', 'PERMIT_NUMBER',
       'LATITUDE', 'LONGITUDE', 'RECORD_TYPE', 'WATER', 'STATUS',
       'ENTITY_NAME', 'QUARTER3', 'QUARTER2', 'QUARTER1', 'SECTION',
       'TOWNSHIP', 'RANGE', 'COUNTY', 'PERMIT_TYPE',
       'TOTAL_PERMITTED_ACRE_FEET', 'PRIMARY_PURPOSE', 'DATE_FILED',
       'DATE_ISSUED', 'HYDRO_UNIT', 'STREAM_SYSTEM', 'RECORD_ID2'],
      dtype='object')

In [6]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

index                          int64
ï»¿X                         float64
Y                            float64
OBJECTID                       int64
RECORD_ID                      int64
PERMIT_NUMBER                 object
LATITUDE                     float64
LONGITUDE                    float64
RECORD_TYPE                   object
WATER                         object
STATUS                        object
ENTITY_NAME                   object
QUARTER3                      object
QUARTER2                      object
QUARTER1                      object
SECTION                        int64
TOWNSHIP                      object
RANGE                         object
COUNTY                        object
PERMIT_TYPE                   object
TOTAL_PERMITTED_ACRE_FEET    float64
PRIMARY_PURPOSE               object
DATE_FILED                    object
DATE_ISSUED                   object
HYDRO_UNIT                   float64
STREAM_SYSTEM                float64
RECORD_ID2                     int64
d

In [7]:
#Changing datatype of used date fields. 
df['DATE_FILED'] = pd.to_datetime(df['DATE_FILED'], errors = 'coerce')
df['DATE_FILED'] = pd.to_datetime(df["DATE_FILED"].dt.strftime('%m/%d/%Y'))

df['DATE_ISSUED'] = pd.to_datetime(df['DATE_ISSUED'], errors = 'coerce')
df['DATE_ISSUED'] = pd.to_datetime(df["DATE_ISSUED"].dt.strftime('%m/%d/%Y'))

In [8]:
#Fixing Beneficial Uses PRIMARY_PURPOSE
def fixRecFishWild(colrowValue):
    if colrowValue == 'Recreation, Fish, Wildlife':
        outList = 'Recreation Fish Wildlife'
    else:
        outList = colrowValue
    return outList

df['PRIMARY_PURPOSE'] = df.apply(lambda row: fixRecFishWild(row['PRIMARY_PURPOSE']), axis=1)

In [9]:
#Exporting to Finished File
df.to_csv('P_OklahomaMaster.csv', index=False)  # The output