# Pre-processing Kansas Allocation data for WaDEQA upload.
Date Updated: 01/05/2020
Purpose:  To pre-process the Kansas data into one master file for simple DataFrame creation and extraction

Data comes in water quantity data (qty) and location data (wimas).  Will need to combine qty data with the wimas data via wr_ID + pdiv_id key.  See 'KS_Allocation Schema Mapping to WaDE_QA.xlsx' for additional mapping information.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Working Directory and Input Files
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Kansas/WaterAllocation/RawInputData"
os.chdir(workingDir)

qty_Input = "qty_input.csv"
wimas_input = "wimas_input.csv"

In [3]:
#qty Dataframe creation
df_qty = pd.read_csv(qty_Input)
df_qty['KeyJoin'] = df_qty['wr_id'].astype(str) + "_" + df_qty['pdiv_id'].astype(str)
df_qty

Unnamed: 0,wr_id,right_type,vcnty_code,wr_num,wr_qual,umw_code,source,fo_num,basin,stream,gmd,county,wrf_status,pdiv_id,twp,twp_dir,rng,rng_dir,sect,dwr_id,feet_north,feet_west,qual4,qual3,qual2,qual1,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid,wimas_date,KeyJoin
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1,36,134.000,134.000,AF,1,1043567968,12/07/2020,36_22899
1,37,A,,37,0,IND,S,1,8,8,,JO,NK,41446,12,S,22,E,20,2,2900,2800,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680,2540,,SW,SW,NE,1,119538,480.000,480.000,AF,1,1040286889,12/07/2020,51_37651
3,54,A,,54,0,IRR,S,2,59,59,,HG,NK,21065,21,S,24,W,7,3,,,,NW,SE,SE,,54,260.000,260.000,AF,1,,12/07/2020,54_21065
4,54,A,,54,0,IRR,S,2,59,59,,HG,NK,50470,21,S,24,W,7,1,,,,SW,SW,SE,,54,260.000,260.000,AF,1,,12/07/2020,54_50470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45661,87071,A,,29938,D1,IRR,G,4,33,,3.0,FI,NK,76072,26,S,34,W,11,6,4910,4835,,NW,NW,NW,1,143781,272.000,272.000,AF,2,1040541552,12/07/2020,87071_76072
45662,87072,A,,29938,D2,IRR,G,4,33,,3.0,FI,NK,3374,26,S,34,W,12,3,3960,1320,,,NC,NE,1,143783,272.000,272.000,AF,2,1040132430,12/07/2020,87072_3374
45663,87072,A,,29938,D2,IRR,G,4,33,,3.0,FI,NK,70467,26,S,34,W,12,7,4915,4865,,NW,NW,NW,1,143784,272.000,272.000,AF,2,1040470045,12/07/2020,87072_70467
45664,87130,A,,30431,D3,IRR,G,2,57,,5.0,RN,NK,5005,22,S,10,W,4,6,1320,1320,,,NC,SE,1,143851,20.000,20.000,AF,1,1043804227,12/07/2020,87130_5005


In [4]:
#wimas Dataframe creation
df_wimas = pd.read_csv(wimas_input)
df_wimas['KeyJoin'] = df_wimas['wr_id'].astype(str) + "_" + df_wimas['pdiv_id'].astype(str)
df_wimas

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,wr_id,right_type,vested_county_code,wr_num,wr_qualifier,umw_code,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id,qual1,qual2,qual3,qual4,longitude,latitude,fpdiv_comment,feet_north,feet_west,basin_num,gmd,fo_num,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid,wimas_date,KeyJoin
0,1,A,,1,0,IRR,0,S,FO,06-MAY-1941,66333,1,20,S,17,E,2,1,SE,SE,SW,,-95.49291,38.33276,,336.0,934.0,5,,1,AN,1804.0,,,,,,12/07/2020,1_66333
1,2,A,,2,0,IRR,0,S,NQ,16-JUN-1941,22340,1,15,S,2,W,7,1,SW,W2,E2,NC,-97.59142,38.75795,,1320.0,4290.0,17,,3,SA,17.0,,,,,,12/07/2020,2_22340
2,2,A,,2,0,IRR,0,S,NQ,16-JUN-1941,48165,1,15,S,2,W,7,3,SE,NE,NW,,-97.57887,38.76063,,2300.0,700.0,17,,3,SA,17.0,,,,,,12/07/2020,2_48165
3,2,A,,2,0,IRR,0,S,NQ,16-JUN-1941,50982,0,15,S,2,W,7,4,,,,,-97.58413,38.76310,Created at migration for old water use,3200.0,2200.0,17,,3,SA,17.0,,,,,,12/07/2020,2_50982
4,29,A,,29,0,IRR,0,S,NQ,21-AUG-1942,14928,1,7,S,15,W,17,1,,,,,-99.01219,39.44180,,1760.0,1060.0,25,,3,OB,25.0,,,,,,12/07/2020,29_14928
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84490,87130,A,,30431,D3,IRR,1,G,NK,04-OCT-1977,5005,1,22,S,10,W,4,6,SE,NC,,,-98.42216,38.16226,,1320.0,1320.0,57,5.0,2,RN,,1.0,,,,1.043804e+09,12/07/2020,87130_5005
84491,87131,A,,30431,D4,IRR,1,G,NK,04-OCT-1977,5005,1,22,S,10,W,4,6,SE,NC,,,-98.42216,38.16226,,1320.0,1320.0,57,5.0,2,RN,,1.0,,,,1.043804e+09,12/07/2020,87131_5005
84492,87138,A,,50445,0,IRR,1,G,AY,14-SEP-2020,9102,1,33,S,3,E,22,1,NE,SE,NE,,-97.07749,37.16842,,3750.0,120.0,33,,2,CL,,1.0,,,,1.043950e+09,12/07/2020,87138_9102
84493,87152,A,,50448,0,STK,1,G,AY,28-SEP-2020,86624,1,26,S,25,W,6,3,SW,SW,SW,,-100.10480,37.81049,,398.0,4799.0,33,3.0,2,FO,,1.0,,,,1.046769e+09,12/07/2020,87152_86624


In [5]:
#Merging dataframes into one, using left-join.
df = pd.merge(df_qty, df_wimas, left_on='KeyJoin', right_on='KeyJoin', how='inner').drop_duplicates()
df

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1,36,134.000,134.000,AF,1,1043567968,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,07-JAN-1943,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.99820,,,,18,,3,OT,,1.0,,,,1.043568e+09,12/07/2020
1,37,A,,37,0,IND,S,1,8,8,,JO,NK,41446,12,S,22,E,20,2,2900,2800,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,09-JAN-1943,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680,2540,,SW,SW,NE,1,119538,480.000,480.000,AF,1,1040286889,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,06-AUG-1945,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.84250,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1.040287e+09,12/07/2020
3,54,A,,54,0,IRR,S,2,59,59,,HG,NK,21065,21,S,24,W,7,3,,,,NW,SE,SE,,54,260.000,260.000,AF,1,,12/07/2020,54_21065,54,A,,54,0,IRR,1,S,NK,18-AUG-1945,21065,1,21,S,24,W,7,3,SE,SE,NW,,-99.99492,38.23579,,,,59,,2,HG,59.0,,,,,,12/07/2020
4,54,A,,54,0,IRR,S,2,59,59,,HG,NK,50470,21,S,24,W,7,1,,,,SW,SW,SE,,54,260.000,260.000,AF,1,,12/07/2020,54_50470,54,A,,54,0,IRR,1,S,NK,18-AUG-1945,50470,1,21,S,24,W,7,1,SE,SW,SW,,-100.00070,38.23389,,,,59,,2,HG,59.0,,,,,,12/07/2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47308,87071,A,,29938,D1,IRR,G,4,33,,3.0,FI,NK,76072,26,S,34,W,11,6,4910,4835,,NW,NW,NW,1,143781,272.000,272.000,AF,2,1040541552,12/07/2020,87071_76072,87071,A,,29938,D1,IRR,1,G,NK,31-MAY-1977,76072,1,26,S,34,W,11,6,NW,NW,NW,,-101.01540,37.80835,,4910.0,4835.0,33,3.0,4,FI,,1.0,,,,1.040542e+09,12/07/2020
47309,87072,A,,29938,D2,IRR,G,4,33,,3.0,FI,NK,3374,26,S,34,W,12,3,3960,1320,,,NC,NE,1,143783,272.000,272.000,AF,2,1040132430,12/07/2020,87072_3374,87072,A,,29938,D2,IRR,1,G,NK,31-MAY-1977,3374,1,26,S,34,W,12,3,NE,NC,,,-100.98470,37.80586,,3960.0,1320.0,33,3.0,4,FI,,1.0,,,,1.040132e+09,12/07/2020
47310,87072,A,,29938,D2,IRR,G,4,33,,3.0,FI,NK,70467,26,S,34,W,12,7,4915,4865,,NW,NW,NW,1,143784,272.000,272.000,AF,2,1040470045,12/07/2020,87072_70467,87072,A,,29938,D2,IRR,1,G,NK,31-MAY-1977,70467,1,26,S,34,W,12,7,NW,NW,NW,,-100.99730,37.80822,,4915.0,4865.0,33,3.0,4,FI,,1.0,,,,1.040470e+09,12/07/2020
47311,87130,A,,30431,D3,IRR,G,2,57,,5.0,RN,NK,5005,22,S,10,W,4,6,1320,1320,,,NC,SE,1,143851,20.000,20.000,AF,1,1043804227,12/07/2020,87130_5005,87130,A,,30431,D3,IRR,1,G,NK,04-OCT-1977,5005,1,22,S,10,W,4,6,SE,NC,,,-98.42216,38.16226,,1320.0,1320.0,57,5.0,2,RN,,1.0,,,,1.043804e+09,12/07/2020


In [6]:
#Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [7]:
#Making Sure datatype of Long, Lat & amount are Float
df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')

df['auth_quant'] = pd.to_numeric(df['auth_quant'], errors='coerce')

In [8]:
#Right Type Code
rightTypeDict = {
"A" : "Appropriation",
"B" : "Basin Term",
"D" : "Domestic",
"P" : "Temporary",
"T" : "Term",
"V" : "Vested"
}

def retrieveRightType(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = rightTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['Right Type'] = df.apply(lambda row: retrieveRightType(row['right_type_x']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation


In [9]:
#BenUse Code
useTypeDict = {
"ART" : "Artificial Recharge",
"CON" : "Contamination Remediation",
"DEW" : "Dewatering",
"DOM" : "Domestic",
"FPR" : "Fire Protection",
"HYD" : "Hydraulic Dredging",
"IND" : "Industrial",
"IRR" : "Irrigation",
"MUN" : "Municipal",
"REC" : "Recreation",
"SED" : "Sediment Storage",
"STK" : "Stockwater",
"THX" : "Thermal Exchange",
"WTR" : "Water Power"
}

def retrieveUseType(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = useTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['BenUse'] = df.apply(lambda row: retrieveUseType(row['umw_code_x']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type,BenUse
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation,Irrigation
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation,Industrial
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation,Irrigation


In [10]:
#Watersource Type Code
wsTypeDict = {
"S" : "Surface Water",
"G" : "Groundwater"}

def retrieveWSType(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = wsTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['WatersourceType'] = df.apply(lambda row: retrieveWSType(row['source']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type,BenUse,WatersourceType
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation,Irrigation,Groundwater
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation,Industrial,Surface Water
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation,Irrigation,Groundwater


In [11]:
#Status Type Code
statusTypeDict = {
"AA" : "Vested Active",
"AM" : "Dismissed After Vested",
"AY" : "Pending Initial Review",
"FO" : "Dismissed Prior to Approval",
"GA" : "Denied Prior to approval",
"GM" : "Reinstated Prior to Approval",
"GY" : "Approved Pending Completion",
"HK" : "Extended Time to Complete",
"HW" : "Dismissed Pending Completion",
"II" : "Reinstated Pending Completion",
"IU" : "Partial Completion",
"JG" : "Partial Completion Extended Time to Complete",
"JM" : "Inspected Prior to Completion",
"KE" : "Completed Pending Inspection",
"KK" : "Completed Extended Time to Perfect",
"KQ" : "Dismissed Pending Inspection",
"LC" : "Reinstated Pending inspection",
"LG" : "Completed Partial inspection",
"LK" : "Partial Inspection Extended Time to Perfect",
"LO" : "Inspected Pending Perfection",
"LR" : "Inspected Pending Perfection Extended Time to Perfect",
"LU" : "Dismissed Pending Perfection",
"LZ" : "Reinstated Pending Perfection",
"MM" : "Proposed Certificate",
"MR" : "Proposed Certificate Extended Time to Perfect",
"NK" : "Certificated Issued",
"NQ" : "Dismissed After Certificated Issued",
"NT" : "Reinstated After Certificate Issued",
"NV" : "Reinstated After Vested"
}

def retrieveStatusType(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = statusTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['Status'] = df.apply(lambda row: retrieveStatusType(row['wrf_status']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type,BenUse,WatersourceType,Status
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation,Industrial,Surface Water,Certificated Issued
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued


In [12]:
#Basin Code
basinDict = {
"1" : "Missouri River",
"2" : "S F Big Nemaha River",
"3" : "Marais Des Cygnes River",
"4" : "Sugar Creek",
"5" : "Pottawatomie Creek",
"6" : "Little Osage River",
"7" : "Marmaton River",
"8" : "Kansas River",
"9" : "Stranger Creek",
"10" : "Wakarusa River",
"11" : "Delaware River",
"12" : "Vermillion Creek",
"13" : "Big Blue River",
"14" : "Black Vermillion River",
"15" : "Little Blue River",
"16" : "Mill Creek",
"17" : "Smoky Hill River",
"18" : "Saline River",
"19" : "Big Creek",
"20" : "Hackberry Creek",
"21" : "Ladder Creek",
"22" : "N F Smoky Hill River",
"23" : "Solomon River",
"24" : "Salt Creek",
"25" : "S F Solomon River",
"26" : "N F Solomon River",
"27" : "Republican River",
"28" : "Prairie Dog Creek",
"29" : "Sappa Creek",
"30" : "Beaver Creek",
"31" : "S F Republican River",
"32" : "Arikaree River",
"33" : "Arkansas River",
"34" : "Neosho River",
"35" : "Spring River",
"36" : "Cottonwood River",
"37" : "Verdigris River",
"38" : "Caney River",
"39" : "Elk River",
"40" : "Fall River",
"41" : "Cimarron River",
"42" : "Bluff Creek (cimarron)",
"43" : "Crooked Creek",
"44" : "N F Cimarron River",
"45" : "Bear Creek",
"46" : "Salt Fork Arkansas River",
"47" : "Medicine Lodge River",
"48" : "Chikaskia River",
"49" : "Bluff Creek (chikaskia)",
"50" : "Sandy Creek",
"51" : "Walnut River",
"52" : "Ninnescah River",
"53" : "N F Ninnescah River",
"54" : "S F Ninnescah River",
"55" : "Little Arkansas River",
"56" : "Cow Creek",
"57" : "Rattlesnake Creek",
"58" : "Walnut Creek",
"59" : "Pawnee River",
"60" : "Buckner Creek",
"61" : "Whitewoman Creek",
"62" : "Driftwood Creek"
}

def retrieveBasin(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = basinDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['BasinName'] = df.apply(lambda row: retrieveBasin(row['basin']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type,BenUse,WatersourceType,Status,BasinName
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued,Saline River
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation,Industrial,Surface Water,Certificated Issued,Kansas River
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued,Smoky Hill River


In [13]:
#County Code
countyDict = {
"AL" : "Allen",
"AN" : "Anderson",
"AT" : "Atchison",
"BA" : "Barber",
"BT" : "Barton",
"BB" : "Bourbon",
"BR" : "Brown",
"BU" : "Butler",
"CS" : "Chase",
"CQ" : "Chautauqua",
"CK" : "Cherokee",
"CN" : "Cheyenne",
"CA" : "Clark",
"CY" : "Clay",
"CD" : "Cloud",
"CF" : "Coffey",
"CM" : "Comanche",
"CL" : "Cowley",
"CR" : "Crawford",
"DC" : "Decatur",
"DK" : "Dickinson",
"DP" : "Doniphan",
"DG" : "Douglas",
"ED" : "Edwards",
"EK" : "Elk",
"EL" : "Ellis",
"EW" : "Ellsworth",
"FI" : "Finney",
"FO" : "Ford",
"FR" : "Franklin",
"GE" : "Geary",
"GO" : "Gove",
"GH" : "Graham",
"GT" : "Grant",
"GY" : "Gray",
"GL" : "Greeley",
"GW" : "Greenwood",
"HM" : "Hamilton",
"HP" : "Harper",
"HV" : "Harvey",
"HS" : "Haskell",
"HG" : "Hodgeman",
"JA" : "Jackson",
"JF" : "Jefferson",
"JW" : "Jewell",
"JO" : "Johnson",
"KE" : "Kearny",
"KM" : "Kingman",
"KW" : "Kiowa",
"LB" : "Labette",
"LE" : "Lane",
"LV" : "Leavenworth",
"LC" : "Lincoln",
"LN" : "Linn",
"LG" : "Logan",
"LY" : "Lyon",
"MN" : "Marion",
"MS" : "Marshall",
"MP" : "McPherson",
"ME" : "Meade",
"MI" : "Miami",
"MC" : "Mitchell",
"MG" : "Montgomery",
"MR" : "Morris",
"MT" : "Morton",
"NM" : "Nemaha",
"NO" : "Neosho",
"NS" : "Ness",
"NT" : "Norton",
"OS" : "Osage",
"OB" : "Osborne",
"OT" : "Ottawa",
"PN" : "Pawnee",
"PL" : "Phillips",
"PT" : "Pottawatomie",
"PR" : "Pratt",
"RA" : "Rawlins",
"RN" : "Reno",
"RP" : "Republic",
"RC" : "Rice",
"RL" : "Riley",
"RO" : "Rooks",
"RH" : "Rush",
"RS" : "Russell",
"SA" : "Saline",
"SC" : "Scott",
"SG" : "Sedgwick",
"SW" : "Seward",
"SN" : "Shawnee",
"SD" : "Sheridan",
"SH" : "Sherman",
"SM" : "Smith",
"SF" : "Stafford",
"ST" : "Stanton",
"SV" : "Stevens",
"SU" : "Sumner",
"TH" : "Thomas",
"TR" : "Trego",
"WB" : "Wabaunsee",
"WA" : "Wallace",
"WS" : "Washington",
"WH" : "Wichita",
"WL" : "Wilson",
"WO" : "Woodson",
"WY" : "Wyandotte"
}


def retrieveCounty(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        try:
            outList = countyDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['CountyName'] = df.apply(lambda row: retrieveCounty(row['county']), axis=1)
df.head(3)

Unnamed: 0,wr_id_x,right_type_x,vcnty_code,wr_num_x,wr_qual,umw_code_x,source,fo_num_x,basin,stream,gmd_x,county,wrf_status,pdiv_id_x,twp,twp_dir,rng,rng_dir,sect,dwr_id_x,feet_north_x,feet_west_x,qual4_x,qual3_x,qual2_x,qual1_x,nwb,quant_id,auth_quant,add_quant,quant_unit,qstor_ind,well_kid_x,wimas_date_x,KeyJoin,wr_id_y,right_type_y,vested_county_code,wr_num_y,wr_qualifier,umw_code_y,wrfile_active_ind,source_of_supply,current_status_code,priority_date,pdiv_id_y,fpdiv_active_ind,township,township_dir,range_num,range_dir,section_num,dwr_id_y,qual1_y,qual2_y,qual3_y,qual4_y,longitude,latitude,fpdiv_comment,feet_north_y,feet_west_y,basin_num,gmd_y,fo_num_y,county_code,stream_num,num_wells,lot_number,lot_qualifier_one,lot_qualifier_two,well_kid_y,wimas_date_y,Right Type,BenUse,WatersourceType,Status,BasinName,CountyName
0,36,A,,36,0,IRR,G,3,18,,,OT,NK,22899,12,S,5,W,22,1,,,,SW,NW,NW,1.0,36,134.0,134.0,AF,1,1043567968.0,12/07/2020,36_22899,36,A,,36,0,IRR,1,G,NK,1943-01-07,22899,1,12,S,5,W,22,1,NW,NW,SW,,-97.87295,38.9982,,,,18,,3,OT,,1.0,,,,1043568000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued,Saline River,Ottawa
1,37,A,,37,0,IND,S,1,8,8.0,,JO,NK,41446,12,S,22,E,20,2,2900.0,2800.0,,,,,,101838,42522.503,42522.503,AF,1,,12/07/2020,37_41446,37,A,,37,0,IND,1,S,NK,1943-01-09,41446,1,12,S,22,E,20,2,,,,,-94.99305,38.99368,,2900.0,2800.0,8,,1,JO,8.0,,3.0,,,,12/07/2020,Appropriation,Industrial,Surface Water,Certificated Issued,Kansas River,Johnson
2,51,A,,51,0,IRR,G,4,17,,1.0,SC,NK,37651,17,S,32,W,27,1,2680.0,2540.0,,SW,SW,NE,1.0,119538,480.0,480.0,AF,1,1040286889.0,12/07/2020,51_37651,51,A,,51,0,IRR,1,G,NK,1945-08-06,37651,1,17,S,32,W,27,1,NE,SW,SW,,-100.8425,38.54818,,2680.0,2540.0,17,1.0,4,SC,,1.0,,,,1040287000.0,12/07/2020,Appropriation,Irrigation,Groundwater,Certificated Issued,Smoky Hill River,Scott


In [14]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

wr_id_x                         int64
right_type_x                   object
vcnty_code                     object
wr_num_x                        int64
wr_qual                        object
umw_code_x                     object
source                         object
fo_num_x                        int64
basin                           int64
stream                         object
gmd_x                         float64
county                         object
wrf_status                     object
pdiv_id_x                       int64
twp                             int64
twp_dir                        object
rng                             int64
rng_dir                        object
sect                            int64
dwr_id_x                        int64
feet_north_x                   object
feet_west_x                    object
qual4_x                        object
qual3_x                        object
qual2_x                        object
qual1_x                        object
nwb         

In [15]:
#Exporting to Finished File
df.to_csv('P_KansasMaster.csv', index=False)  # The output