# Pre-processing North Dakota Allocation data for WaDEQA upload.
Date Updated: 04/08/2020

Purpose:  To pre-process the North Dakota data into one master file for simple DataFrame creation and extraction.  To validate datatypes and other data related information.

Useful Links to Data:  Data obtained from the map services of North Dakota State Water Commission (NDSWC)
https://mapservice.swc.nd.gov/.  Downloaded “Water Permits” layer displayed by “Use type”.  Open the shapefile in QGIS and export layer to csv file:  Permits.csv

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/NorthDakota/WaterAllocation/RawInputData"
os.chdir(workingDir)

Permit_Input = "Permits_input.csv"

In [3]:
#Dataframe creation
df = pd.read_csv(Permit_Input, encoding = "ISO-8859-1")

In [4]:
# Changing datatype of date fields to fit WaDE.
df['priority_d'] = pd.to_datetime(df['priority_d'], errors = 'coerce')
df['priority_d'] = pd.to_datetime(df["priority_d"].dt.strftime('%m/%d/%Y'))

df['date_issue'] = pd.to_datetime(df['date_issue'], errors = 'coerce')
df['date_issue'] = pd.to_datetime(df["date_issue"].dt.strftime('%m/%d/%Y'))

df['date_cance'] = pd.to_datetime(df['date_cance'], errors = 'coerce')
df['date_cance'] = pd.to_datetime(df["date_cance"].dt.strftime('%m/%d/%Y'))

In [5]:
# Changing format to Title to clean up text.
df['source_nam'] = df['source_nam'].str.title()
df['source'] = df['source'].str.title()
df['permit_hol'] = df['permit_hol'].str.title()

# Removing white space from certain text fields to help clean up text.
df['source_nam'] = df['source_nam'].str.strip()
df['source'] = df['source'].str.strip()
df['permit_hol'] = df['permit_hol'].str.strip()

df['county'] = df['county'].str.strip()
df['aquifer'] = df['aquifer'].str.strip()
df['pod'] = df['pod'].str.strip()
df['status'] = df['status'].str.strip()
df['use_type'] = df['use_type'].str.strip()
df['permit_num'] = df['permit_num'].str.strip()

In [6]:
#technique to check datatype of long dataframes.
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

permit_ind             int64
permit_num            object
pod_index              int64
pod                   object
permit_hol            object
priority_d    datetime64[ns]
use_type              object
status                object
date_issue    datetime64[ns]
date_cance    datetime64[ns]
beneficial            object
county                object
hu_sub_bas            object
aquifer               object
subaquifer            object
req_acft             float64
req_acre             float64
req_rate             float64
req_storag           float64
app_acft             float64
app_acre             float64
app_rate             float64
app_storag           float64
pod_status            object
source                object
irrigation            object
source_nam            object
mainstem               int64
impound_lo            object
impound_na            object
return_des            object
discharge_            object
period_sta            object
period_end            object
return_qua    

In [7]:
#Exporting to Finished File
df.to_csv('P_NorthDakotaMaster.csv', index=False)  # The output