# Pre-processing North Dakota Allocation data for WaDEQA upload.
Date Updated: 04/08/2020
Purpose:  To pre-process the North Dakaota data into one master file for simple DataFrame creation and extraction.  To validate datatypes and other data related informattion.

Useful Links to Data:

Data obtained from the map services of North Dakota State Water Commission (NDSWC)
https://mapservice.swc.nd.gov/

Downloaded “Water permits” layer displayed by “Use type”.  Open the shapefile in QGIS and export layer to csv file:  Permits.csv

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/NorthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

Permit_Input = "dontopen_NDPermitData.csv"

In [3]:
#Dataframe creation
df = pd.read_csv(Permit_Input, encoding = "ISO-8859-1")

In [4]:
df

Unnamed: 0,FID,permit_ind,permit_num,pod_index,pod,permit_hol,priority_d,use_type,status,date_issue,date_cance,beneficial,county,hu_sub_bas,aquifer,subaquifer,req_acft,req_acre,req_rate,req_storag,app_acft,app_acre,app_rate,app_storag,pod_status,source,irrigation,source_nam,mainstem,impound_lo,impound_na,return_des,discharge_,period_sta,period_end,return_qua,held_acft,held_acre,held_rate,held_stora,longitude,latitude
0,0,1,4407,1,13007302B,"KETTERLING, ROLAND & LORRAINE",3/4/1991 0:00:00,Irrigation,Denied,,,11/11/1111 0:00:00,McIntosh,Beaver,,,204.0,135.2,1000.0,0.0,0.0,0.0,0.0,0.0,Denied,Ground Water,Sprinkler,,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-99.789880,46.111300
1,1,2,1E,2,15310236CC,"HYDE, GEORGE H.",8/15/1901 0:00:00,Irrigation,Cancelled,,,,McKenzie,Lake Sakakawea,,,80.0,80.0,448.8,0.0,0.0,0.0,0.0,0.0,Cancelled,Surface Water,Combination,,1,,,,,,,0.0,0.0,0.0,0.0,0.0,-103.752160,48.026220
2,2,3,2B,3,15310236BA,"SLATER, A. L.",9/2/1901 0:00:00,Irrigation,Cancelled,,,,McKenzie,Lake Sakakawea,,,160.0,160.0,448.8,0.0,0.0,0.0,0.0,0.0,Cancelled,Surface Water,Combination,,1,,,,,,,0.0,0.0,0.0,0.0,0.0,-103.746910,48.037070
3,3,4,2D,4,14910026AB,"GUDMUNSEN, ROBERT AND LOWRAINE",1/26/1906 0:00:00,Irrigation,Perfected,4/30/1937 0:00:00,,7/1/1937 0:00:00,McKenzie,Lower Little Missouri,,,291.0,291.0,1615.6,0.0,291.0,291.0,1615.6,0.0,Active,Surface Water,Flooding,,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-103.441260,47.701760
4,4,5,3C,5,15009823DC,"HARTEL, LEMOINE",2/3/1906 0:00:00,Irrigation,Cancelled,1/10/1990 0:00:00,6/7/2017 0:00:00,7/1/1984 0:00:00,McKenzie,Lower Little Missouri,,,240.0,240.0,1350.0,0.0,132.0,132.0,1350.0,0.0,Cancelled,Surface Water,Flooding,,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-103.184202,47.791708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12208,12208,44951,7090,92657,15109223C,"CATES EARTH SCIENCE TECHNOLOGIES, INC.",2/6/2020 0:00:00,Industrial,Application In Processing,,,,Mountrail,Lake Sakakawea,,,1000.0,0.0,2500.0,0.0,0.0,0.0,0.0,0.0,Deferred,Ground Water,,,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-102.423254,47.880159
12209,12209,44953,7086,92662,14408619B,"SCHEID, RICHARD, NANCY AND LEE",1/21/2020 0:00:00,Irrigation,Application In Processing,,,,Mercer,Knife,,,130.0,85.4,800.0,0.0,0.0,0.0,0.0,0.0,Deferred,Surface Water,,Knife River,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-101.632591,47.281109
12210,12210,44953,7086,92661,14408619A,"SCHEID, RICHARD, NANCY AND LEE",1/21/2020 0:00:00,Irrigation,Application In Processing,,,,Mercer,Knife,,,130.0,85.4,800.0,0.0,0.0,0.0,0.0,0.0,Deferred,Surface Water,Sprinkler,Knife River,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-101.622469,47.281104
12211,12211,44954,7105,92664,14409703DW,"LAPIERRE, BRENT",4/3/2020 0:00:00,Industrial,Application In Processing,,,,Dunn,Knife,,,200.0,0.0,600.0,0.0,0.0,0.0,0.0,0.0,Deferred,Surface Water,,Little Knife River,0,,,,,,,0.0,0.0,0.0,0.0,0.0,-102.955651,47.317180


In [5]:
df.columns

Index(['FID', 'permit_ind', 'permit_num', 'pod_index', 'pod', 'permit_hol',
       'priority_d', 'use_type', 'status', 'date_issue', 'date_cance',
       'beneficial', 'county', 'hu_sub_bas', 'aquifer', 'subaquifer',
       'req_acft', 'req_acre', 'req_rate', 'req_storag', 'app_acft',
       'app_acre', 'app_rate', 'app_storag', 'pod_status', 'source',
       'irrigation', 'source_nam', 'mainstem', 'impound_lo', 'impound_na',
       'return_des', 'discharge_', 'period_sta', 'period_end', 'return_qua',
       'held_acft', 'held_acre', 'held_rate', 'held_stora', 'longitude',
       'latitude'],
      dtype='object')

In [6]:
#technique to check datatype of long dataframes.
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

FID             int64
permit_ind      int64
permit_num     object
pod_index       int64
pod            object
permit_hol     object
priority_d     object
use_type       object
status         object
date_issue     object
date_cance     object
beneficial     object
county         object
hu_sub_bas     object
aquifer        object
subaquifer     object
req_acft      float64
req_acre      float64
req_rate      float64
req_storag    float64
app_acft      float64
app_acre      float64
app_rate      float64
app_storag    float64
pod_status     object
source         object
irrigation     object
source_nam     object
mainstem        int64
impound_lo     object
impound_na     object
return_des     object
discharge_     object
period_sta     object
period_end     object
return_qua    float64
held_acft     float64
held_acre     float64
held_rate     float64
held_stora    float64
longitude     float64
latitude      float64
dtype: object


In [7]:
#Changing datatype of used date fields. 
df['priority_d'] = pd.to_datetime(df['priority_d'], errors = 'coerce')
df['priority_d'] = pd.to_datetime(df["priority_d"].dt.strftime('%m/%d/%Y'))

df['date_issue'] = pd.to_datetime(df['date_issue'], errors = 'coerce')
df['date_issue'] = pd.to_datetime(df["date_issue"].dt.strftime('%m/%d/%Y'))

df['date_cance'] = pd.to_datetime(df['date_cance'], errors = 'coerce')
df['date_cance'] = pd.to_datetime(df["date_cance"].dt.strftime('%m/%d/%Y'))

In [8]:
#Removing NaN, and missing (999) values from AllocaitonAmount, AllocaitonMaxium
df['req_rate'] = df['req_rate'].fillna(0)
df['req_rate'] = df['req_rate'].replace(999, 0)

df['req_acft'] = df['req_acft'].fillna(0)
df['req_acft'] = df['req_acft'].replace(999, 0)
df['req_acft'] = df['req_acft'].replace(999, 0)

In [9]:
#Changing format to title to remove a few dupcliates
df['source_nam'] = df['source_nam'].str.title()
df['source'] = df['source'].str.title()
df['permit_hol'] = df['permit_hol'].str.title()

#Removing white space from certin text fields.
df['source_nam'] = df['source_nam'].str.strip()
df['source'] = df['source'].str.strip()
df['permit_hol'] = df['permit_hol'].str.strip()

df['county'] = df['county'].str.strip()
df['aquifer'] = df['aquifer'].str.strip()
df['pod'] = df['pod'].str.strip()
df['status'] = df['status'].str.strip()
df['use_type'] = df['use_type'].str.strip()
df['permit_num'] = df['permit_num'].str.strip()

In [10]:
#Exporting to Finished File
df.to_csv('P_NorthDakotaMaster.csv', index=False)  # The output