# Pre-processing Wyoming Allocation data for WaDEQA upload.
Date Updated: 10/11/2020
Purpose:  To pre-process the Wyoming data into one master file for simple DataFrame creation and extraction

Notes:
Merging GW and SW sources into one workable input.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

# POD Data

### POD Groundwater

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Wyoming/WaterAllocation/RawInputData"
os.chdir(workingDir)

GW_Input = "POD_GW_DepthI_FC_input.csv"
dfwyg = pd.read_csv(GW_Input)
dfwyg.head(10)

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
dfwyg['PriorityDate'] = pd.to_datetime(dfwyg['PriorityDate'])
dfwyg['PriorityDate'] = pd.to_datetime(dfwyg["PriorityDate"].dt.strftime('%m/%d/%Y'))
dfwyg.head(3)

In [None]:
#Creating WaDE Owner Field.  
#Create from Owner field. If empty, use LastName + FirstName fields.

def retrieveOwner(Com, FN, LN):
    Com = str(Com).strip()
    FN = str(FN).strip()
    LN = str(LN).strip()
    if Com == "" or pd.isnull(Com):
        outList = LN + ", " + FN
    else:
        outList = Com
    return outList
dfwyg['WaDEOwner'] = dfwyg.apply(lambda row: retrieveOwner(row['Company'], row['FirstName'], row['LastName']), axis=1)
dfwyg.head(3)

In [None]:
#Creating Beneficial Use.
#Need to translate WY abbreviatoins to a workable format.

BenUseDict = {
"AESCNG" : "Coal Bed Natural Gas",
"AESFIS" : "Fish Propagation (Aesthetics)",
"AESGWR" : "Ground Water Recharge (Aesthetics)",
"AESREC" : "Recreation (Aesthetics)",
"AESSTK" : "Stock (Aesthetics)",
"AESWET" : "Wetlands (Aesthetics)",
"AESWIL" : "Wildlife (Aesthetics)",
"AQU" : "Aquaculture",
"BOT" : "Bottling Water",
"CAG" : "Commercial Agriculture",
"CBM" : "Coal Bed Methane - Ground Water",
"CHE" : "Chemical",
"CIS" : "Consumptive Instream Flow",
"CMU" : "Combined Uses",
"CNG_SW" : "Coal Bed Natural Gas",
"COM" : "Commercial",
"CUL" : "Culinary",
"DAI" : "Dairy",
"DEW" : "Mine Dewatering",
"DOM_GW" : "Domestic - Ground Water",
"DOM_SW" : "Domestic - Surface Water",
"DPA" : "Domestic (Phase 2 Award)",
"DRI" : "Drilling",
"DSP" : "Domestic Supply",
"DTA" : "Dust Abatement",
"ECAP" : "Existing Capacity",
"ERO" : "Erosion Control",
"FIR" : "Fire Protection",
"FIS" : "Fish Propagation",
"FLO" : "Flood Control",
"FTH" : "Flow Through",
"GWR" : "Ground Water Recharge",
"HEX" : "Heat Extraction",
"HWY" : "Highway Construction",
"HYD" : "Hydropower",
"HYT" : "Hydrostatic Testing",
"ICE" : "Ice Cutting",
"IFA" : "Instream Flow (Phase 2 Award)",
"IND_GW" : "Industrial - Ground Water",
"IND_SW" : "Industrial - Surface Water",
"IRR_GW" : "Irrigation - Ground Water",
"IRR_SW" : "Irrigation - Surface Water",
"ISF" : "Instream Flow",
"LAK" : "Maintain Natural Lake Level (Phase 2 Award)",
"LAW" : "Large Scale Landscape",
"MAI" : "Maintenance (Equipment Washing)",
"MAN" : "Manufacturing",
"MEC" : "Mechanical ",
"MED" : "Medicinal",
"MEM" : "Municipal (Emergency)",
"MIL" : "Milling",
"MIN" : "Mining",
"MIS" : "Miscellaneous - Ground Water",
"MON" : "Monitor",
"MUN_GW" : "Municipal - Ground Water",
"MUN_SW" : "Municipal - Surface Water",
"NAT" : "Natural Flow (Phase 2 Award)",
"O&G" : "Oil and Gas Well Drilling",
"OIL" : "Oil",
"OTH" : "Other",
"OTH_CM" : "Other - Commercial",
"OTH_IN" : "Other - Industrial",
"OTH_TM" : "Other - Temporary",
"P&S" : "Potable and Sanitary Supply",
"PCT" : "Pollution Control",
"POW" : "Power",
"RAI" : "Railroad",
"RDC" : "Road Construction",
"REC" : "Recreation",
"REF" : "Refining",
"RES" : "Reservoir Supply",
"REW" : "Reclamation Watering",
"S&D" : "Stock and Domestic",
"SDG" : "Gpm For Domestic or Stock",
"SDU" : "Stock and Domestic",
"SED" : "Sediment Control",
"SNO" : "Snow Making",
"STE" : "Stream",
"STK" : "Stock Watering",
"STKNDMS" : "Stock and Domestic",
"STO" : "Stock",
"STS" : "Stock",
"STW" : "Stock Watering",
"SWD" : "Subdivision",
"SWP" : "Stock Water Pipeline",
"TEM" : "Temporary",
"TENL" : "Total Enlargement",
"TRA" : "Transportation",
"TST" : "Test Well",
"TWR" : "Tree Watering",
"UTL" : "Utilities",
"W&S" : "Wild and Scenic",
"WDR" : "Well Drilling",
"WET" : "Wetlands",
"WHL" : "Water Hauls",
"WL" : "Wildlife"}

def retrieveBenUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        x = String1.split(';')
        try:
            outList = []
            for i in range(len(x)):
                y = x[i].strip()
                y = BenUseDict[y]
                outList.append(y)
            outString = ", ".join(str(e) for e in outList)
        except:
            outString = "Unspecified"
    return outString

dfwyg['WaDEBenUse'] = dfwyg.apply(lambda row: retrieveBenUse(row['Uses']), axis=1)
dfwyg.head(10)

In [None]:
# Create output dataframe for ground water
columnslist = [   
    ### Water Source Info ###
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_Latitude",
    "in_Longitude",
    "in_SiteName",
    "in_SiteTypeCV",
    "in_PODorPOUSite",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationFlow_CFS",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationLegalStatusCV",
    "in_BeneficialUseCategory",
    "in_IrrigatedAcreage"]

dfground = pd.DataFrame(columns=columnslist, index=dfwyg.index)

In [None]:
#############################################################################################
#WaterSource
dfground['in_WaterSourceTypeCV'] = 'Groundwater'
dfground['in_WaterSourceName'] = "Unspecified"
                                    
#Site
dfground['in_Latitude'] = dfwyg['Latitude_Double'].astype(float)
dfground['in_Longitude'] = dfwyg['Longitude_Double'].astype(float)
dfground['in_SiteName'] = dfwyg['FacilityName'].astype(str)
dfground['in_SiteTypeCV'] = dfwyg['Facility_type'].astype(str)
dfground['in_PODorPOUSite'] = "POD"

#AllocationAmount_fact
dfground['in_AllocationFlow_CFS'] = dfwyg['Total_Flow_CFS___Appropriation_GPM_'].astype(float)
dfground['in_AllocationNativeID'] = dfwyg['WR_Number'].astype(str)
dfground['in_AllocationOwner'] = dfwyg['WaDEOwner'].astype(str)
dfground['in_AllocationPriorityDate'] = dfwyg['PriorityDate']
dfground['in_AllocationLegalStatusCV'] = dfwyg['SummaryWRStatus'].astype(str)
dfground['in_BeneficialUseCategory'] = dfwyg['WaDEBenUse'].astype(str)
dfground['in_IrrigatedAcreage'] = ""

In [None]:
# Remove NaN values
dfground = dfground.replace(np.nan, '')  # Replaces NaN values with blank.
dfground.head(3)

### POD Surface Water

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Wyoming/WaterAllocation/RawInputData"
os.chdir(workingDir)

SW_Input = "POD_SW_DepthI_FC_input.csv"
dfwys = pd.read_csv(SW_Input)
dfwys.head(3)

In [None]:
# #Update datatype of Priority Date to fit WaDE 2.0 structure
# dfwys['PriorityDate'] = pd.to_datetime(dfwys['PriorityDate'])
# dfwys['PriorityDate'] = pd.to_datetime(dfwys["PriorityDate"].dt.strftime('%m/%d/%Y'))
# dfwys.head(3)

In [None]:
#Creating WaDE Owner Field.  
#Create from Owner field. If empty, use LastName + FirstName fields.

def retrieveOwner(Com, FN, LN):
    Com = str(Com).strip()
    FN = str(FN).strip()
    LN = str(LN).strip()
    if Com == "" or pd.isnull(Com):
        outList = LN + ", " + FN
    else:
        outList = Com
    return outList
dfwys['WaDEOwner'] = dfwys.apply(lambda row: retrieveOwner(row['Company'], row['FirstName'], row['LastName']), axis=1)
dfwys.head(3)

In [None]:
#Creating Beneficial Use.
#Need to translate WY abbreviatoins to a workable format.

BenUseDict = {
"AESCNG" : "Coal Bed Natural Gas",
"AESFIS" : "Fish Propagation (Aesthetics)",
"AESGWR" : "Ground Water Recharge (Aesthetics)",
"AESREC" : "Recreation (Aesthetics)",
"AESSTK" : "Stock (Aesthetics)",
"AESWET" : "Wetlands (Aesthetics)",
"AESWIL" : "Wildlife (Aesthetics)",
"AQU" : "Aquaculture",
"BOT" : "Bottling Water",
"CAG" : "Commercial Agriculture",
"CBM" : "Coal Bed Methane - Ground Water",
"CHE" : "Chemical",
"CIS" : "Consumptive Instream Flow",
"CMU" : "Combined Uses",
"CNG_SW" : "Coal Bed Natural Gas",
"COM" : "Commercial",
"CUL" : "Culinary",
"DAI" : "Dairy",
"DEW" : "Mine Dewatering",
"DOM_GW" : "Domestic - Ground Water",
"DOM_SW" : "Domestic - Surface Water",
"DPA" : "Domestic (Phase 2 Award)",
"DRI" : "Drilling",
"DSP" : "Domestic Supply",
"DTA" : "Dust Abatement",
"ECAP" : "Existing Capacity",
"ERO" : "Erosion Control",
"FIR" : "Fire Protection",
"FIS" : "Fish Propagation",
"FLO" : "Flood Control",
"FTH" : "Flow Through",
"GWR" : "Ground Water Recharge",
"HEX" : "Heat Extraction",
"HWY" : "Highway Construction",
"HYD" : "Hydropower",
"HYT" : "Hydrostatic Testing",
"ICE" : "Ice Cutting",
"IFA" : "Instream Flow (Phase 2 Award)",
"IND_GW" : "Industrial - Ground Water",
"IND_SW" : "Industrial - Surface Water",
"IRR_GW" : "Irrigation - Ground Water",
"IRR_SW" : "Irrigation - Surface Water",
"ISF" : "Instream Flow",
"LAK" : "Maintain Natural Lake Level (Phase 2 Award)",
"LAW" : "Large Scale Landscape",
"MAI" : "Maintenance (Equipment Washing)",
"MAN" : "Manufacturing",
"MEC" : "Mechanical ",
"MED" : "Medicinal",
"MEM" : "Municipal (Emergency)",
"MIL" : "Milling",
"MIN" : "Mining",
"MIS" : "Miscellaneous - Ground Water",
"MON" : "Monitor",
"MUN_GW" : "Municipal - Ground Water",
"MUN_SW" : "Municipal - Surface Water",
"NAT" : "Natural Flow (Phase 2 Award)",
"O&G" : "Oil and Gas Well Drilling",
"OIL" : "Oil",
"OTH" : "Other",
"OTH_CM" : "Other - Commercial",
"OTH_IN" : "Other - Industrial",
"OTH_TM" : "Other - Temporary",
"P&S" : "Potable and Sanitary Supply",
"PCT" : "Pollution Control",
"POW" : "Power",
"RAI" : "Railroad",
"RDC" : "Road Construction",
"REC" : "Recreation",
"REF" : "Refining",
"RES" : "Reservoir Supply",
"REW" : "Reclamation Watering",
"S&D" : "Stock and Domestic",
"SDG" : "Gpm For Domestic or Stock",
"SDU" : "Stock and Domestic",
"SED" : "Sediment Control",
"SNO" : "Snow Making",
"STE" : "Stream",
"STK" : "Stock Watering",
"STKNDMS" : "Stock and Domestic",
"STO" : "Stock",
"STS" : "Stock",
"STW" : "Stock Watering",
"SWD" : "Subdivision",
"SWP" : "Stock Water Pipeline",
"TEM" : "Temporary",
"TENL" : "Total Enlargement",
"TRA" : "Transportation",
"TST" : "Test Well",
"TWR" : "Tree Watering",
"UTL" : "Utilities",
"W&S" : "Wild and Scenic",
"WDR" : "Well Drilling",
"WET" : "Wetlands",
"WHL" : "Water Hauls",
"WL" : "Wildlife"}

def retrieveBenUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        x = String1.split(';')
        try:
            outList = []
            for i in range(len(x)):
                y = x[i].strip()
                y = BenUseDict[y]
                outList.append(y)
            outString = ", ".join(str(e) for e in outList)
        except:
            outString = "Unspecified"
    return outString

dfwys['WaDEBenUse'] = dfwys.apply(lambda row: retrieveBenUse(row['Uses']), axis=1)
dfwys.head(3)

In [None]:
# Create output dataframe for ground water
columnslist = [   
    ### Water Source Info ###
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_Latitude",
    "in_Longitude",
    "in_SiteName",
    "in_SiteTypeCV",
    "in_PODorPOUSite",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationFlow_CFS",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationLegalStatusCV",
    "in_BeneficialUseCategory",
    "in_IrrigatedAcreage"
]

df_Surface = pd.DataFrame(columns=columnslist, index=dfwys.index)

In [None]:
#############################################################################################
#WaterSource
df_Surface['in_WaterSourceTypeCV'] = 'Surface Water'
df_Surface['in_WaterSourceName'] = dfwys['Stream_Source'].astype(str)
                                    
#Site
df_Surface['in_Latitude'] = dfwys['Latitude_Double'].astype(float)
df_Surface['in_Longitude'] = dfwys['Longitude_Double'].astype(float)
df_Surface['in_SiteName'] = dfwys['FacilityName'].astype(str)
df_Surface['in_SiteTypeCV'] = dfwys['Facility_type'].astype(str)
df_Surface['in_PODorPOUSite'] = "POD"

#AllocationAmount_fact
df_Surface['in_AllocationFlow_CFS'] = dfwys['Total_Flow_CFS___Appropriation_GPM_'].astype(float)
df_Surface['in_AllocationNativeID'] = dfwys['WR_Number'].astype(str)
df_Surface['in_AllocationOwner'] = dfwys['WaDEOwner'].astype(str)
df_Surface['in_AllocationPriorityDate'] = dfwys['PriorityDate']
df_Surface['in_AllocationLegalStatusCV'] = dfwys['SummaryWRStatus'].astype(str)
df_Surface['in_BeneficialUseCategory'] = dfwys['WaDEBenUse'].astype(str)
df_Surface['in_IrrigatedAcreage'] = ""

In [None]:
# Remove NaN values
df_Surface = df_Surface.replace(np.nan, '')  # Replaces NaN values with blank.
df_Surface.head(3)

### POD Concatenate

In [None]:
# Concatenate
frames = [dfground, df_Surface]
dfPOD = pd.concat(frames)
dfPOD

# POU Data

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Wyoming/WaterAllocation/RawInputData"
os.chdir(workingDir)

POU_Input = "POU_input.csv"
df = pd.read_csv(POU_Input)
df.head(10)

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
df['PriorityDate'] = pd.to_datetime(df['PriorityDate'])
df['PriorityDate'] = pd.to_datetime(df["PriorityDate"].dt.strftime('%m/%d/%Y'))
df.head(3)

In [None]:
#Creating WaDE Owner Field.  
#Create from Owner field. If empty, use LastName + FirstName fields.

def retrieveOwner(Com, FN, LN):
    Com = str(Com).strip()
    FN = str(FN).strip()
    LN = str(LN).strip()
    if Com == "" or pd.isnull(Com):
        outList = LN + ", " + FN
    else:
        outList = Com
    return outList
df['WaDEOwner'] = df.apply(lambda row: retrieveOwner(row['Company'], row['FirstName'], row['LastName']), axis=1)
df.head(3)

In [None]:
#Creating Beneficial Use.
#Need to translate WY abbreviatoins to a workable format.

BenUseDict = {
"AESCNG" : "Coal Bed Natural Gas",
"AESFIS" : "Fish Propagation (Aesthetics)",
"AESGWR" : "Ground Water Recharge (Aesthetics)",
"AESREC" : "Recreation (Aesthetics)",
"AESSTK" : "Stock (Aesthetics)",
"AESWET" : "Wetlands (Aesthetics)",
"AESWIL" : "Wildlife (Aesthetics)",
"AQU" : "Aquaculture",
"BOT" : "Bottling Water",
"CAG" : "Commercial Agriculture",
"CBM" : "Coal Bed Methane - Ground Water",
"CHE" : "Chemical",
"CIS" : "Consumptive Instream Flow",
"CMU" : "Combined Uses",
"CNG_SW" : "Coal Bed Natural Gas",
"COM" : "Commercial",
"CUL" : "Culinary",
"DAI" : "Dairy",
"DEW" : "Mine Dewatering",
"DOM_GW" : "Domestic - Ground Water",
"DOM_SW" : "Domestic - Surface Water",
"DPA" : "Domestic (Phase 2 Award)",
"DRI" : "Drilling",
"DSP" : "Domestic Supply",
"DTA" : "Dust Abatement",
"ECAP" : "Existing Capacity",
"ERO" : "Erosion Control",
"FIR" : "Fire Protection",
"FIS" : "Fish Propagation",
"FLO" : "Flood Control",
"FTH" : "Flow Through",
"GWR" : "Ground Water Recharge",
"HEX" : "Heat Extraction",
"HWY" : "Highway Construction",
"HYD" : "Hydropower",
"HYT" : "Hydrostatic Testing",
"ICE" : "Ice Cutting",
"IFA" : "Instream Flow (Phase 2 Award)",
"IND_GW" : "Industrial - Ground Water",
"IND_SW" : "Industrial - Surface Water",
"IRR_GW" : "Irrigation - Ground Water",
"IRR_SW" : "Irrigation - Surface Water",
"ISF" : "Instream Flow",
"LAK" : "Maintain Natural Lake Level (Phase 2 Award)",
"LAW" : "Large Scale Landscape",
"MAI" : "Maintenance (Equipment Washing)",
"MAN" : "Manufacturing",
"MEC" : "Mechanical ",
"MED" : "Medicinal",
"MEM" : "Municipal (Emergency)",
"MIL" : "Milling",
"MIN" : "Mining",
"MIS" : "Miscellaneous - Ground Water",
"MON" : "Monitor",
"MUN_GW" : "Municipal - Ground Water",
"MUN_SW" : "Municipal - Surface Water",
"NAT" : "Natural Flow (Phase 2 Award)",
"O&G" : "Oil and Gas Well Drilling",
"OIL" : "Oil",
"OTH" : "Other",
"OTH_CM" : "Other - Commercial",
"OTH_IN" : "Other - Industrial",
"OTH_TM" : "Other - Temporary",
"P&S" : "Potable and Sanitary Supply",
"PCT" : "Pollution Control",
"POW" : "Power",
"RAI" : "Railroad",
"RDC" : "Road Construction",
"REC" : "Recreation",
"REF" : "Refining",
"RES" : "Reservoir Supply",
"REW" : "Reclamation Watering",
"S&D" : "Stock and Domestic",
"SDG" : "Gpm For Domestic or Stock",
"SDU" : "Stock and Domestic",
"SED" : "Sediment Control",
"SNO" : "Snow Making",
"STE" : "Stream",
"STK" : "Stock Watering",
"STKNDMS" : "Stock and Domestic",
"STO" : "Stock",
"STS" : "Stock",
"STW" : "Stock Watering",
"SWD" : "Subdivision",
"SWP" : "Stock Water Pipeline",
"TEM" : "Temporary",
"TENL" : "Total Enlargement",
"TRA" : "Transportation",
"TST" : "Test Well",
"TWR" : "Tree Watering",
"UTL" : "Utilities",
"W&S" : "Wild and Scenic",
"WDR" : "Well Drilling",
"WET" : "Wetlands",
"WHL" : "Water Hauls",
"WL" : "Wildlife"}

def retrieveBenUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        x = String1.split(';')
        try:
            outList = []
            for i in range(len(x)):
                y = x[i].strip()
                y = BenUseDict[y]
                outList.append(y)
            outString = ", ".join(str(e) for e in outList)
        except:
            outString = "Unspecified"
    return outString

df['WaDEBenUse'] = df.apply(lambda row: retrieveBenUse(row['Uses']), axis=1)
df.head(10)

In [None]:
# Create output dataframe for ground water
columnslist = [   
    ### Water Source Info ###
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_Latitude",
    "in_Longitude",
    "in_SiteName",
    "in_SiteTypeCV",
    "in_PODorPOUSite",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationFlow_CFS",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationLegalStatusCV",
    "in_BeneficialUseCategory",
    "in_IrrigatedAcreage"]

dfPOU = pd.DataFrame(columns=columnslist, index=df.index)

In [None]:
#############################################################################################
#WaterSource
dfPOU['in_WaterSourceTypeCV'] = "Unspecified"
dfPOU['in_WaterSourceName'] = df['SupplySource'].astype(str)
                                    
#Site
dfPOU['in_Latitude'] = df['Latitude'].astype(float)
dfPOU['in_Longitude'] = df['Longitude'].astype(float)
dfPOU['in_SiteName'] = df['FacilityName'].astype(str)
dfPOU['in_SiteTypeCV'] = df['FacilityType'].astype(str)
dfPOU['in_PODorPOUSite'] = "POU"

#AllocationAmount_fact
dfPOU['in_AllocationFlow_CFS'] = ""
dfPOU['in_AllocationNativeID'] = df['WRNumber'].astype(str)
dfPOU['in_AllocationOwner'] = df['WaDEOwner'].astype(str)
dfPOU['in_AllocationPriorityDate'] = df['PriorityDate']
dfPOU['in_AllocationLegalStatusCV'] = df['SummaryWRStatus'].astype(str)
dfPOU['in_BeneficialUseCategory'] = df['WaDEBenUse'].astype(str)
dfPOU['in_IrrigatedAcreage'] = df['Acres'].astype(float)

In [None]:
# Remove NaN values
dfPOU = dfPOU.replace(np.nan, "").replace("nan", "")  # Replaces NaN values with blank.
dfPOU.head(3)

# Concatenate POD & POU

In [None]:
# Concatenate
frames = [dfPOD, dfPOU]
dfout = pd.concat(frames)
dfout = dfout.replace(np.nan, "").drop_duplicates()
dfout = dfout.replace("nan", "").drop_duplicates()

print(len(dfout))
dfout

## WaDE Custom Elements (due to missing state site info)

In [None]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfout['in_Latitude']
dfSiteNativeID['in_Longitude'] = dfout['in_Longitude']
dfSiteNativeID['in_SiteName'] = dfout['in_SiteName']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B, D):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B) &
                                (dfSiteNativeID['in_SiteName'] == D), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_SiteNativeID'] = dfout.apply(lambda row: retrieveSiteNativeID( row['in_Latitude'], row['in_Longitude'], row['in_SiteName']), axis=1)
dfout

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = dfout['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
dfout

## Export Outputs

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

In [None]:
#Exporting to Finished File
dfout.to_csv('P_WyomingMaster.csv', index=False)  # The output