# Preprocessing Utah Site Specific data for WaDEQA upload.
- Date Updated: 12/03/2021
- Purpose:  To preprocess the UDWRi and UDWRe data into one master file for simple DataFrame creation and extraction

Notes:
- Will treat UDWRi System + UDWRe data as POUs, and UDWRi Source data as PODs.  Will tie together with "CommunityWaterSupplySystem" WaDE field.
- For annual data, assume start = 01/01 & end =  12/31 for now.
- Seperate out water use System data data by customer type / benefical use (e.g. Domestic, Commerical, Industrial, Insitutional, Total).
- Seperate out the water use Source data by monthly and again by annual.
- create missing elements (water source type).

In [1]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Utah/SiteSpecificAmounts/RawInputData"
os.chdir(workingDir)

## UDWRi System (POU) Data

In [3]:
# Dataframe Creation - system Data
sys_Input = "UDWRi_SystemData_input.csv"
df_sys = pd.read_csv(sys_Input, encoding = "ISO-8859-1")
df_sys.head(1)

Unnamed: 0,System ID,System Name,System Type,System Status,History Year,Date Received,County,Use Cooling Percent,Use Process Percent,Use Domestic Percent,Use Miscellaneous Percent,Irrigation (Lawn and Garden) Percent,Acres Irrigated,Irrigation (Agriculture),Acres Irrigated.1,DEQ ID,Population,Domestic Use,Commercial Use,Industrial Use,Institutional Use,Total Use,Domestic Connections,Commercial Connections,Industrial Connections,Institutional Connections,Total Connections,Peak Date,Peak Demand,Peak Demand Units,Peak Use Include,Peak Measurement Type,Peak Wholesale Volume,Peak Wholesale Volume Units,Sec Domestic Percent,Sec Industrial Percent,Sec Commercial Percent,Sec Institutional Percent,Sec Agriculture Percent
0,1000,Leeds Domestic Water Users Association,Public,Active,1960,12/31/1960,Washington,,,,,,,,,27010,0.0,0.0,0.0,0.0,,18301270.92,0.0,0.0,0.0,,52.0,,,,,,,,,,,,


In [4]:
# System Data - Domestic
df_sys_Domestic = pd.DataFrame(index=df_sys.index)

# SiteVariableAmounts_fact Info
df_sys_Domestic['linkKey'] = df_sys['System ID']
df_sys_Domestic['in_CommunityWaterSupplySystem'] = df_sys['System Name']
df_sys_Domestic['in_Amount'] = df_sys['Domestic Use']
df_sys_Domestic['in_PopulationServed'] = df_sys['Domestic Connections']
df_sys_Domestic['in_BenUse'] = "Domestic"
df_sys_Domestic['in_VariableCV'] = "Delivered Water Use"
df_sys_Domestic['in_VariableSpecificCV'] = "Delivered Water Use - Annual - Domestic"
df_sys_Domestic['in_CustomerTypeCV'] = "Domestic"
df_sys_Domestic['in_ReportYearCV'] = df_sys['History Year']
df_sys_Domestic['in_TimeframeEnd'] = '12/31/' + df_sys['History Year'].astype(str)
df_sys_Domestic['in_TimeframeStart'] = '01/01/' + df_sys['History Year'].astype(str)

# Water Source Info
df_sys_Domestic['in_WaterSourceTypeCV'] = "Unspecified"

print(len(df_sys_Domestic))
df_sys_Domestic.head(1)

22680


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1960,12/31/1960,01/01/1960,Unspecified


In [5]:
# System Data - Commercial
df_sys_Commercial = pd.DataFrame(index=df_sys.index)

# SiteVariableAmounts_fact Info
df_sys_Commercial['linkKey'] = df_sys['System ID']
df_sys_Commercial['in_CommunityWaterSupplySystem'] = df_sys['System Name']
df_sys_Commercial['in_Amount'] = df_sys['Commercial Use']
df_sys_Commercial['in_PopulationServed'] = df_sys['Commercial Connections']
df_sys_Commercial['in_BenUse'] = "Commercial"
df_sys_Commercial['in_VariableCV'] = "Delivered Water Use"
df_sys_Commercial['in_VariableSpecificCV'] = "Delivered Water Use - Annual - Commercial"
df_sys_Commercial['in_CustomerTypeCV'] = "Commercial"
df_sys_Commercial['in_ReportYearCV'] = df_sys['History Year']
df_sys_Commercial['in_TimeframeEnd'] = '12/31/' + df_sys['History Year'].astype(str)
df_sys_Commercial['in_TimeframeStart'] = '01/01/' + df_sys['History Year'].astype(str)

# Water Source Info
df_sys_Commercial['in_WaterSourceTypeCV'] = "Unspecified"

print(len(df_sys_Commercial))
df_sys_Commercial.head(1)

22680


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Commercial,Delivered Water Use,Delivered Water Use - Annual - Commercial,Commercial,1960,12/31/1960,01/01/1960,Unspecified


In [6]:
# System Data - Industrial
df_sys_Industrial = pd.DataFrame(index=df_sys.index)

# SiteVariableAmounts_fact Info
df_sys_Industrial['linkKey'] = df_sys['System ID']
df_sys_Industrial['in_CommunityWaterSupplySystem'] = df_sys['System Name']
df_sys_Industrial['in_Amount'] = df_sys['Industrial Use']
df_sys_Industrial['in_PopulationServed'] = df_sys['Industrial Connections']
df_sys_Industrial['in_BenUse'] = "Industrial"
df_sys_Industrial['in_VariableCV'] = "Delivered Water Use"
df_sys_Industrial['in_VariableSpecificCV'] = "Delivered Water Use - Annual - Industrial"
df_sys_Industrial['in_CustomerTypeCV'] = "Industrial"
df_sys_Industrial['in_ReportYearCV'] = df_sys['History Year']
df_sys_Industrial['in_TimeframeEnd'] = '12/31/' + df_sys['History Year'].astype(str)
df_sys_Industrial['in_TimeframeStart'] = '01/01/' + df_sys['History Year'].astype(str)

# Water Source Info
df_sys_Industrial['in_WaterSourceTypeCV'] = "Unspecified"

print(len(df_sys_Industrial))
df_sys_Industrial.head(1)

22680


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Industrial,Delivered Water Use,Delivered Water Use - Annual - Industrial,Industrial,1960,12/31/1960,01/01/1960,Unspecified


In [7]:
# System Data - Institutional
df_sys_Institutional = pd.DataFrame(index=df_sys.index)

# SiteVariableAmounts_fact Info
df_sys_Institutional['linkKey'] = df_sys['System ID']
df_sys_Institutional['in_CommunityWaterSupplySystem'] = df_sys['System Name']
df_sys_Institutional['in_Amount'] = df_sys['Institutional Use']
df_sys_Institutional['in_PopulationServed'] = df_sys['Institutional Connections']
df_sys_Institutional['in_BenUse'] = "Institutional"
df_sys_Institutional['in_VariableCV'] = "Delivered Water Use"
df_sys_Institutional['in_VariableSpecificCV'] = "Delivered Water Use - Annual - Institutional"
df_sys_Institutional['in_CustomerTypeCV'] = "Institutional"
df_sys_Institutional['in_ReportYearCV'] = df_sys['History Year']
df_sys_Institutional['in_TimeframeEnd'] = '12/31/' + df_sys['History Year'].astype(str)
df_sys_Institutional['in_TimeframeStart'] = '01/01/' + df_sys['History Year'].astype(str)

# Water Source Info
df_sys_Institutional['in_WaterSourceTypeCV'] = "Unspecified"

print(len(df_sys_Institutional))
df_sys_Institutional.head(1)

22680


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,,,Institutional,Delivered Water Use,Delivered Water Use - Annual - Institutional,Institutional,1960,12/31/1960,01/01/1960,Unspecified


In [8]:
# System Data - DCII
df_sys_DCII = pd.DataFrame(index=df_sys.index)

# SiteVariableAmounts_fact Info
df_sys_DCII['linkKey'] = df_sys['System ID']
df_sys_DCII['in_CommunityWaterSupplySystem'] = df_sys['System Name']
df_sys_DCII['in_Amount'] = df_sys['Total Use']
df_sys_DCII['in_PopulationServed'] = df_sys['Total Connections']
df_sys_DCII['in_BenUse'] = "DCII"
df_sys_DCII['in_VariableCV'] = "Delivered Water Use"
df_sys_DCII['in_VariableSpecificCV'] = "Delivered Water Use - Annual - DCII"
df_sys_DCII['in_CustomerTypeCV'] = "DCII"
df_sys_DCII['in_ReportYearCV'] = df_sys['History Year']
df_sys_DCII['in_TimeframeEnd'] = '12/31/' + df_sys['History Year'].astype(str)
df_sys_DCII['in_TimeframeStart'] = '01/01/' + df_sys['History Year'].astype(str)

# Water Source Info
df_sys_DCII['in_WaterSourceTypeCV'] = "Unspecified"

print(len(df_sys_DCII))
df_sys_DCII.head(1)

22680


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,18301270.92,52.0,DCII,Delivered Water Use,Delivered Water Use - Annual - DCII,DCII,1960,12/31/1960,01/01/1960,Unspecified


In [9]:
# Concatenate System Data into one long dataframe.
frames = [df_sys_Domestic, df_sys_Commercial, df_sys_Industrial, df_sys_Institutional, df_sys_DCII]
df_sys_all = pd.concat(frames)

df_sys_all = df_sys_all.replace(np.nan, "").drop_duplicates().reset_index(drop=True)
print(len(df_sys_all))
df_sys_all.head(1)

113400


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1960,12/31/1960,01/01/1960,Unspecified


#### UDWRe Culinary Service Area Data
- Need to tie the UDWRi system data to the UDWRe site info

In [10]:
# Dataframe Creation - UDWRe site information
csa_Input = "UDWRe_CulinaryWaterServiceAreas_input.csv"
df_csa = pd.read_csv(csa_Input, encoding = "ISO-8859-1")
df_csa.head(1)

Unnamed: 0,FID_1,WRENAME,WRNAME,DWNAME,SYSTEMTYPE,WATERRESID,WRID,DWSYSNUM,WRLINK,WHOLESALER,LABEL,STATE,COUNTY,BASIN,SUBAREA,SUBAREANAM,LANUM,LANAME,ENDYEAR,DATASOURCE,SOURCEDATE,EDITOR,EDITDATE,SHAPE_Leng,Shape__Are,Shape__Len,Shape_Length,Shape_Area,Longitude,Latitude
0,1,Irontown,Irontown,Irontown,C,564,11358,UTAH11070,http://waterrights.utah.gov/cgi-bin/wuseview.e...,,,Utah,Iron,Cedar/Beaver,6/3/2001,Escalante Desert,06-03-01a,Escalante Desert,2019,DWRe/Supplier,5/21/2019 0:00,ADAMCLARK,10/19/2020 0:00,5567.248025,656517.818,5567.248025,5567.248025,656517.8179,-113.44603,37.602583


In [11]:
# Output dataframe for Culinary Service Area Data

df_csaOut = pd.DataFrame(index=df_csa.index)
df_csaOut['linkKey'] = df_csa['WRID']

# Site Info
df_csaOut['in_CoordinateMethodCV'] = "Centroid of Area"
df_csaOut['in_Latitude'] = df_csa['Latitude'].astype(float)
df_csaOut['in_Longitude'] = df_csa['Longitude'].astype(float)
df_csaOut['in_PODorPOUSite'] = "POU"
df_csaOut['in_SiteName'] = df_csa['WRENAME']
df_csaOut['in_SiteNativeID'] = df_csa['WRID']
df_csaOut['in_SiteTypeCV'] = "Unspecified"

df_csaOut = df_csaOut.replace(np.nan, "").drop_duplicates().reset_index(drop=True)
print(len(df_csaOut))
df_csaOut.head(1)

1294


Unnamed: 0,linkKey,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,11358,Centroid of Area,37.602583,-113.44603,POU,Irontown,11358,Unspecified


In [12]:
# Create output dataframe.  Merge the two dataframes into one.
df_sys_out = pd.merge(df_sys_all, df_csaOut, left_on='linkKey', right_on='linkKey', how='left')

df_sys_out = df_sys_out.replace(np.nan, "").drop_duplicates().reset_index(drop=True)
print(len(df_sys_out))
df_sys_out

115270


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1960,12/31/1960,01/01/1960,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified
1,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1962,12/31/1962,01/01/1962,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified
2,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1963,12/31/1963,01/01/1963,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified
3,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1964,12/31/1964,01/01/1964,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified
4,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1965,12/31/1965,01/01/1965,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115265,11794,Nortonville Lot Owners Association Inc,4806308.571,7.0,DCII,Delivered Water Use,Delivered Water Use - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Unspecified,,,,,,,
115266,11809,Deer Creek Island Beach,1781884.0,1.0,DCII,Delivered Water Use,Delivered Water Use - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Unspecified,,,,,,,
115267,11810,Deer Creek Wallsburg (Irrigation),,,DCII,Delivered Water Use,Delivered Water Use - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Unspecified,,,,,,,
115268,11811,Deer Creek Lake,5010196.0,1.0,DCII,Delivered Water Use,Delivered Water Use - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Unspecified,,,,,,,


## UDWRi Source (POD) Data

In [13]:
# Dataframe Creation - system Data
sour_Input = "UDWRi_SourceData_input.csv"
df_sour = pd.read_csv(sour_Input, encoding = "ISO-8859-1")

# drop rows with a null value
df_sour = df_sour.dropna(subset=['Year'])

# Adjust data type of fields
df_sour['Year'] = df_sour['Year'].astype(int)

print(len(df_sour))
df_sour.head(1)

68946


Unnamed: 0,System Name,System ID,Source Name,Source ID,Source Status,Lat NAD83,Lon NAD83,Source Type,Diversion Type,Use Type,Year,Units,Method of Measurement,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Total
0,Leeds Domestic Water Users Association,1000,Oak Grove Spring (WS001),10000001,Active,37.308953,-113.428771,Spring,Withdrawal,Water Supplier,2020,gallons,Individual Meter,3330920.0,3843000.0,4645710.0,7360400.0,9212910.0,7344000.0,7344000.0,9794040.0,7786320.0,4078500.0,3423770.0,3893150.0,72056720.0


In [14]:
# Creating Water Source Type.
wsTypeDict = {
    "Well" : "Groundwater",
    "Well/Spring" : "Groundwater",
    "Well Field" : "Groundwater",
    "Well/Stream" : "Groundwater",
    "Tunnel" : "Groundwater",
    "Stream" : "Surface Water",
    "Spring" : "Surface Water",
    "Reservoir" : "Surface Water",
    "Drain" : "Surface Water",
    "Lake" : "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = wsTypeDict[String1]
        except:
            outList = "Unspecified"
    return outList

df_sour['in_WaterSourceTypeCV'] = df_sour.apply(lambda row: assignWaterSourceTypeCV(row['Source Type']), axis=1)
df_sour.head(1)

Unnamed: 0,System Name,System ID,Source Name,Source ID,Source Status,Lat NAD83,Lon NAD83,Source Type,Diversion Type,Use Type,Year,Units,Method of Measurement,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Total,in_WaterSourceTypeCV
0,Leeds Domestic Water Users Association,1000,Oak Grove Spring (WS001),10000001,Active,37.308953,-113.428771,Spring,Withdrawal,Water Supplier,2020,gallons,Individual Meter,3330920.0,3843000.0,4645710.0,7360400.0,9212910.0,7344000.0,7344000.0,9794040.0,7786320.0,4078500.0,3423770.0,3893150.0,72056720.0,Surface Water


In [15]:
# Source Data - Annual
df_sour_Ann = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Ann['linkKey'] = ""
df_sour_Ann['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Ann['in_Amount'] = df_sour['Total']
df_sour_Ann['in_PopulationServed'] = ""
df_sour_Ann['in_BenUse'] = "DCII"
df_sour_Ann['in_VariableCV'] = "Withdrawal"
df_sour_Ann['in_VariableSpecificCV'] = "Withdrawal - Annual - DCII"
df_sour_Ann['in_CustomerTypeCV'] = "DCII"
df_sour_Ann['in_ReportYearCV'] = df_sour['Year']
df_sour_Ann['in_TimeframeEnd'] = '12/31/' + df_sour['Year'].astype(str)
df_sour_Ann['in_TimeframeStart'] = '01/01/' + df_sour['Year'].astype(str)

# Water Source Info
df_sour_Ann['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Ann['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Ann['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Ann['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Ann['in_PODorPOUSite'] = "POD"
df_sour_Ann['in_SiteName'] = df_sour['Source Name']
df_sour_Ann['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Ann['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Ann))
df_sour_Ann.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,72056720.0,,DCII,Withdrawal,Withdrawal - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [16]:
# Source Data - Monthly Jan
df_sour_Jan = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Jan['linkKey'] = ""
df_sour_Jan['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Jan['in_Amount'] = df_sour['Jan'] # <- change here
df_sour_Jan['in_PopulationServed'] = ""
df_sour_Jan['in_BenUse'] = "DCII"
df_sour_Jan['in_VariableCV'] = "Withdrawal"
df_sour_Jan['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Jan['in_CustomerTypeCV'] = "DCII"
df_sour_Jan['in_ReportYearCV'] = df_sour['Year']
df_sour_Jan['in_TimeframeEnd'] = '01/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Jan['in_TimeframeStart'] = '01/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Jan['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Jan['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Jan['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Jan['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Jan['in_PODorPOUSite'] = "POD"
df_sour_Jan['in_SiteName'] = df_sour['Source Name']
df_sour_Jan['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Jan['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Jan))
df_sour_Jan.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,3330920.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,01/31/2020,01/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [17]:
# Source Data - Monthly Feb
df_sour_Feb = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Feb['linkKey'] = ""
df_sour_Feb['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Feb['in_Amount'] = df_sour['Feb'] # <- change here
df_sour_Feb['in_PopulationServed'] = ""
df_sour_Feb['in_BenUse'] = "DCII"
df_sour_Feb['in_VariableCV'] = "Withdrawal"
df_sour_Feb['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Feb['in_CustomerTypeCV'] = "DCII"
df_sour_Feb['in_ReportYearCV'] = df_sour['Year']
df_sour_Feb['in_TimeframeEnd'] = '02/28/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Feb['in_TimeframeStart'] = '02/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Feb['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Feb['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Feb['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Feb['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Feb['in_PODorPOUSite'] = "POD"
df_sour_Feb['in_SiteName'] = df_sour['Source Name']
df_sour_Feb['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Feb['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Feb))
df_sour_Feb.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,3843000.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,02/28/2020,02/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [18]:
df_sour_Feb['in_TimeframeEnd'].unique()

array(['02/28/2020', '02/28/2019', '02/28/2018', '02/28/2017',
       '02/28/2016', '02/28/2015', '02/28/2014', '02/28/2013',
       '02/28/2012', '02/28/2011', '02/28/2010', '02/28/2009',
       '02/28/2008', '02/28/2007', '02/28/2006', '02/28/2005',
       '02/28/2004', '02/28/2003', '02/28/2002', '02/28/2001',
       '02/28/2000', '02/28/1999', '02/28/1998', '02/28/1997',
       '02/28/1996', '02/28/1995', '02/28/1994', '02/28/1993',
       '02/28/1992', '02/28/1991', '02/28/1990', '02/28/1989',
       '02/28/1988', '02/28/1987', '02/28/1986', '02/28/1985',
       '02/28/1984', '02/28/1983', '02/28/1982', '02/28/1981',
       '02/28/1980', '02/28/1979', '02/28/1974', '02/28/1973',
       '02/28/1972', '02/28/1971', '02/28/1970', '02/28/1969',
       '02/28/1968', '02/28/1967', '02/28/1966', '02/28/1965',
       '02/28/1964', '02/28/1963', '02/28/1962', '02/28/1978',
       '02/28/1977', '02/28/1976', '02/28/1975', '02/28/1959',
       '02/28/1960', '02/28/1961'], dtype=object)

In [19]:
# Source Data - Monthly Mar
df_sour_Mar = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Mar['linkKey'] = ""
df_sour_Mar['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Mar['in_Amount'] = df_sour['Mar'] # <- change here
df_sour_Mar['in_PopulationServed'] = ""
df_sour_Mar['in_BenUse'] = "DCII"
df_sour_Mar['in_VariableCV'] = "Withdrawal"
df_sour_Mar['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Mar['in_CustomerTypeCV'] = "DCII"
df_sour_Mar['in_ReportYearCV'] = df_sour['Year']
df_sour_Mar['in_TimeframeEnd'] = '03/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Mar['in_TimeframeStart'] = '03/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Mar['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Mar['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Mar['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Mar['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Mar['in_PODorPOUSite'] = "POD"
df_sour_Mar['in_SiteName'] = df_sour['Source Name']
df_sour_Mar['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Mar['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Mar))
df_sour_Mar.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,4645710.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,03/31/2020,03/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [20]:
# Source Data - Monthly Apr
df_sour_Apr = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Apr['linkKey'] = ""
df_sour_Apr['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Apr['in_Amount'] = df_sour['Apr'] # <- change here
df_sour_Apr['in_PopulationServed'] = ""
df_sour_Apr['in_BenUse'] = "DCII"
df_sour_Apr['in_VariableCV'] = "Withdrawal"
df_sour_Apr['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Apr['in_CustomerTypeCV'] = "DCII"
df_sour_Apr['in_ReportYearCV'] = df_sour['Year']
df_sour_Apr['in_TimeframeEnd'] = '04/30/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Apr['in_TimeframeStart'] = '04/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Apr['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Apr['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Apr['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Apr['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Apr['in_PODorPOUSite'] = "POD"
df_sour_Apr['in_SiteName'] = df_sour['Source Name']
df_sour_Apr['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Apr['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Apr))
df_sour_Apr.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,7360400.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,04/30/2020,04/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [21]:
# Source Data - Monthly May
df_sour_May = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_May['linkKey'] = ""
df_sour_May['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_May['in_Amount'] = df_sour['May'] # <- change here
df_sour_May['in_PopulationServed'] = ""
df_sour_May['in_BenUse'] = "DCII"
df_sour_May['in_VariableCV'] = "Withdrawal"
df_sour_May['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_May['in_CustomerTypeCV'] = "DCII"
df_sour_May['in_ReportYearCV'] = df_sour['Year']
df_sour_May['in_TimeframeEnd'] = '05/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_May['in_TimeframeStart'] = '05/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_May['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_May['in_CoordinateMethodCV'] = "Representation Node"
df_sour_May['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_May['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_May['in_PODorPOUSite'] = "POD"
df_sour_May['in_SiteName'] = df_sour['Source Name']
df_sour_May['in_SiteNativeID'] =df_sour['Source ID']
df_sour_May['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_May))
df_sour_May.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,9212910.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,05/31/2020,05/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [22]:
# Source Data - Monthly Jun
df_sour_Jun = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Jun['linkKey'] = ""
df_sour_Jun['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Jun['in_Amount'] = df_sour['Jun'] # <- change here
df_sour_Jun['in_PopulationServed'] = ""
df_sour_Jun['in_BenUse'] = "DCII"
df_sour_Jun['in_VariableCV'] = "Withdrawal"
df_sour_Jun['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Jun['in_CustomerTypeCV'] = "DCII"
df_sour_Jun['in_ReportYearCV'] = df_sour['Year']
df_sour_Jun['in_TimeframeEnd'] = '06/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Jun['in_TimeframeStart'] = '06/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Jun['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Jun['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Jun['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Jun['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Jun['in_PODorPOUSite'] = "POD"
df_sour_Jun['in_SiteName'] = df_sour['Source Name']
df_sour_Jun['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Jun['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Jun))
df_sour_Jun.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,7344000.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,06/31/2020,06/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [23]:
# Source Data - Monthly Jul
df_sour_Jul = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Jul['linkKey'] = ""
df_sour_Jul['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Jul['in_Amount'] = df_sour['Jul'] # <- change here
df_sour_Jul['in_PopulationServed'] = ""
df_sour_Jul['in_BenUse'] = "DCII"
df_sour_Jul['in_VariableCV'] = "Withdrawal"
df_sour_Jul['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Jul['in_CustomerTypeCV'] = "DCII"
df_sour_Jul['in_ReportYearCV'] = df_sour['Year']
df_sour_Jul['in_TimeframeEnd'] = '07/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Jul['in_TimeframeStart'] = '07/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Jul['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Jul['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Jul['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Jul['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Jul['in_PODorPOUSite'] = "POD"
df_sour_Jul['in_SiteName'] = df_sour['Source Name']
df_sour_Jul['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Jul['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Jul))
df_sour_Jul.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,7344000.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,07/31/2020,07/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [24]:
# Source Data - Monthly Aug
df_sour_Aug = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Aug['linkKey'] = ""
df_sour_Aug['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Aug['in_Amount'] = df_sour['Aug'] # <- change here
df_sour_Aug['in_PopulationServed'] = ""
df_sour_Aug['in_BenUse'] = "DCII"
df_sour_Aug['in_VariableCV'] = "Withdrawal"
df_sour_Aug['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Aug['in_CustomerTypeCV'] = "DCII"
df_sour_Aug['in_ReportYearCV'] = df_sour['Year']
df_sour_Aug['in_TimeframeEnd'] = '08/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Aug['in_TimeframeStart'] = '08/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Aug['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Aug['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Aug['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Aug['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Aug['in_PODorPOUSite'] = "POD"
df_sour_Aug['in_SiteName'] = df_sour['Source Name']
df_sour_Aug['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Aug['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Aug))
df_sour_Aug.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,9794040.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,08/31/2020,08/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [25]:
# Source Data - Monthly Sep
df_sour_Sep = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Sep['linkKey'] = ""
df_sour_Sep['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Sep['in_Amount'] = df_sour['Sep'] # <- change here
df_sour_Sep['in_PopulationServed'] = ""
df_sour_Sep['in_BenUse'] = "DCII"
df_sour_Sep['in_VariableCV'] = "Withdrawal"
df_sour_Sep['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Sep['in_CustomerTypeCV'] = "DCII"
df_sour_Sep['in_ReportYearCV'] = df_sour['Year']
df_sour_Sep['in_TimeframeEnd'] = '09/30/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Sep['in_TimeframeStart'] = '09/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Sep['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Sep['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Sep['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Sep['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Sep['in_PODorPOUSite'] = "POD"
df_sour_Sep['in_SiteName'] = df_sour['Source Name']
df_sour_Sep['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Sep['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Sep))
df_sour_Sep.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,7786320.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,09/30/2020,09/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [26]:
# Source Data - Monthly Oct
df_sour_Oct = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Oct['linkKey'] = ""
df_sour_Oct['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Oct['in_Amount'] = df_sour['Oct'] # <- change here
df_sour_Oct['in_PopulationServed'] = ""
df_sour_Oct['in_BenUse'] = "DCII"
df_sour_Oct['in_VariableCV'] = "Withdrawal"
df_sour_Oct['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Oct['in_CustomerTypeCV'] = "DCII"
df_sour_Oct['in_ReportYearCV'] = df_sour['Year']
df_sour_Oct['in_TimeframeEnd'] = '10/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Oct['in_TimeframeStart'] = '10/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Oct['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Oct['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Oct['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Oct['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Oct['in_PODorPOUSite'] = "POD"
df_sour_Oct['in_SiteName'] = df_sour['Source Name']
df_sour_Oct['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Oct['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Oct))
df_sour_Oct.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,4078500.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,10/31/2020,10/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [27]:
# Source Data - Monthly Nov
df_sour_Nov = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Nov['linkKey'] = ""
df_sour_Nov['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Nov['in_Amount'] = df_sour['Nov'] # <- change here
df_sour_Nov['in_PopulationServed'] = ""
df_sour_Nov['in_BenUse'] = "DCII"
df_sour_Nov['in_VariableCV'] = "Withdrawal"
df_sour_Nov['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Nov['in_CustomerTypeCV'] = "DCII"
df_sour_Nov['in_ReportYearCV'] = df_sour['Year']
df_sour_Nov['in_TimeframeEnd'] = '11/30/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Nov['in_TimeframeStart'] = '11/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Nov['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Nov['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Nov['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Nov['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Nov['in_PODorPOUSite'] = "POD"
df_sour_Nov['in_SiteName'] = df_sour['Source Name']
df_sour_Nov['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Nov['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Nov))
df_sour_Nov.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,3423770.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,11/30/2020,11/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [28]:
# Source Data - Monthly Dec
df_sour_Dec = pd.DataFrame(index=df_sour.index)

# SiteVariableAmounts_fact Info
df_sour_Dec['linkKey'] = ""
df_sour_Dec['in_CommunityWaterSupplySystem'] = df_sour['System Name']
df_sour_Dec['in_Amount'] = df_sour['Dec'] # <- change here
df_sour_Dec['in_PopulationServed'] = ""
df_sour_Dec['in_BenUse'] = "DCII"
df_sour_Dec['in_VariableCV'] = "Withdrawal"
df_sour_Dec['in_VariableSpecificCV'] = "Withdrawal - Monthly - DCII"
df_sour_Dec['in_CustomerTypeCV'] = "DCII"
df_sour_Dec['in_ReportYearCV'] = df_sour['Year']
df_sour_Dec['in_TimeframeEnd'] = '12/31/' + df_sour['Year'].astype(str)  #<- change here
df_sour_Dec['in_TimeframeStart'] = '12/01/' + df_sour['Year'].astype(str) #<- change here

# Water Source Info
df_sour_Dec['in_WaterSourceTypeCV'] = df_sour['in_WaterSourceTypeCV']

# Site Info
df_sour_Dec['in_CoordinateMethodCV'] = "Representation Node"
df_sour_Dec['in_Latitude'] = df_sour['Lat NAD83'].astype(float)
df_sour_Dec['in_Longitude'] = df_sour['Lon NAD83'].astype(float)
df_sour_Dec['in_PODorPOUSite'] = "POD"
df_sour_Dec['in_SiteName'] = df_sour['Source Name']
df_sour_Dec['in_SiteNativeID'] =df_sour['Source ID']
df_sour_Dec['in_SiteTypeCV'] = df_sour['Source Type']

print(len(df_sour_Dec))
df_sour_Dec.head(1)

68946


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,3893150.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,12/31/2020,12/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


In [29]:
# Concatenate Source Data into one long dataframe.
frames = [df_sour_Ann, 
          df_sour_Jan, df_sour_Feb, df_sour_Mar, df_sour_Apr, df_sour_May, df_sour_Jun,
          df_sour_Jul, df_sour_Aug, df_sour_Sep, df_sour_Oct, df_sour_Nov, df_sour_Dec]
df_sour_out = pd.concat(frames)

df_sour_out = df_sour_out.replace(np.nan, "").drop_duplicates().reset_index(drop=True)
print(len(df_sour_out))
df_sour_out.head(1)

896298


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,,Leeds Domestic Water Users Association,72056720.0,,DCII,Withdrawal,Withdrawal - Annual - DCII,DCII,2020,12/31/2020,01/01/2020,Surface Water,Representation Node,37.308953,-113.428771,POD,Oak Grove Spring (WS001),10000001,Spring


# Concatenate System Data (POUs) with Source Data (PODs).

In [30]:
# Concatenate Source Data into one long dataframe.
frames = [df_sys_out, df_sour_out]
dfout = pd.concat(frames)

dfout = dfout.replace(np.nan, "").drop_duplicates().reset_index(drop=True)
print(len(dfout))
dfout.head(1)

1011568


Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1960,12/31/1960,01/01/1960,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified


# Clean data

In [31]:
# Convert History Year to string.

dfout['in_TimeframeEnd'] = pd.to_datetime(dfout['in_TimeframeEnd'], errors = 'coerce')
dfout['in_TimeframeEnd'] = pd.to_datetime(dfout["in_TimeframeEnd"].dt.strftime('%m/%d/%Y'))

dfout['in_TimeframeStart'] = pd.to_datetime(dfout['in_TimeframeStart'], errors = 'coerce')
dfout['in_TimeframeStart'] = pd.to_datetime(dfout["in_TimeframeStart"].dt.strftime('%m/%d/%Y'))

dfout.tail(1)

Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV
1011567,,"Ambro and Son, LLP",2054167.406,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,2020-12-31,2020-12-01,Groundwater,Representation Node,40.268534,-111.856818,POD,Well,108546036,Well


## WaDE Custom Elements (due to missing info)

In [32]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEUT_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
dfout

Unnamed: 0,linkKey,in_CommunityWaterSupplySystem,in_Amount,in_PopulationServed,in_BenUse,in_VariableCV,in_VariableSpecificCV,in_CustomerTypeCV,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,in_WaterSourceTypeCV,in_CoordinateMethodCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_WaterSourceNativeID
0,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1960,1960-12-31,1960-01-01,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified,WaDEUT_WS1
1,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1962,1962-12-31,1962-01-01,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified,WaDEUT_WS1
2,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1963,1963-12-31,1963-01-01,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified,WaDEUT_WS1
3,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1964,1964-12-31,1964-01-01,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified,WaDEUT_WS1
4,1000,Leeds Domestic Water Users Association,0.0,0.0,Domestic,Delivered Water Use,Delivered Water Use - Annual - Domestic,Domestic,1965,1965-12-31,1965-01-01,Unspecified,Centroid of Area,37.237197,-113.347061,POU,Leeds Domestic Water Users Assoc.,1000.0,Unspecified,WaDEUT_WS1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1011563,,"Magnum Holdings, LLC.",0.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2019,2019-12-31,2019-12-01,Groundwater,Representation Node,39.486279,-112.624303,POD,Well #12,108545958,Well,WaDEUT_WS3
1011564,,"Magnum Holdings, LLC.",0.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,2020-12-31,2020-12-01,Groundwater,Representation Node,39.471903,-112.59327,POD,Well #13,108545959,Well,WaDEUT_WS3
1011565,,"Magnum Holdings, LLC.",0.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2019,2019-12-31,2019-12-01,Groundwater,Representation Node,39.471903,-112.59327,POD,Well #13,108545959,Well,WaDEUT_WS3
1011566,,Ashdown Bros. Construction Inc.,0.0,,DCII,Withdrawal,Withdrawal - Monthly - DCII,DCII,2020,2020-12-31,2020-12-01,Groundwater,Representation Node,37.699086,-113.109553,POD,Ashdown Bros. Well,108545960,Well,WaDEUT_WS3


In [33]:
# Converting in_PopulationServed to float value.
dfout['in_PopulationServed'] = dfout['in_PopulationServed'].replace(np.nan, 0, regex=True)
dfout['in_PopulationServed'] = dfout['in_PopulationServed'].replace('', 0, regex=True)
dfout['in_PopulationServed'] = dfout['in_PopulationServed'].astype(int)
dfout['in_PopulationServed']

0          0
1          0
2          0
3          0
4          0
          ..
1011563    0
1011564    0
1011565    0
1011566    0
1011567    0
Name: in_PopulationServed, Length: 1011568, dtype: int32

In [34]:
#check datatype
print(len(dfout))
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

1011568
linkKey                                  object
in_CommunityWaterSupplySystem            object
in_Amount                                object
in_PopulationServed                       int32
in_BenUse                                object
in_VariableCV                            object
in_VariableSpecificCV                    object
in_CustomerTypeCV                        object
in_ReportYearCV                           int64
in_TimeframeEnd                  datetime64[ns]
in_TimeframeStart                datetime64[ns]
in_WaterSourceTypeCV                     object
in_CoordinateMethodCV                    object
in_Latitude                              object
in_Longitude                             object
in_PODorPOUSite                          object
in_SiteName                              object
in_SiteNativeID                          object
in_SiteTypeCV                            object
in_WaterSourceNativeID                   object
dtype: object


In [35]:
# Exporting output files.
dfout.to_csv('P_MasterUTSiteSpecific.csv', index=False)  # The output.

In [36]:
dfout['in_VariableSpecificCV'].unique()

array(['Delivered Water Use - Annual - Domestic',
       'Delivered Water Use - Annual - Commercial',
       'Delivered Water Use - Annual - Industrial',
       'Delivered Water Use - Annual - Institutional',
       'Delivered Water Use - Annual - DCII',
       'Withdrawal - Annual - DCII', 'Withdrawal - Monthly - DCII'],
      dtype=object)

In [37]:
dfout['in_WaterSourceNativeID'].unique()

array(['WaDEUT_WS1', 'WaDEUT_WS2', 'WaDEUT_WS3'], dtype=object)