## Create podsitetopousiterelationships.csv file
Last Updated: 11/15/2022

Purpose: To create UT site specific public supply site POD and POU relation information and populate dataframe for WaDEQA 2.0.

Notes:    
- read in main processed input file, sites, and sitespecificamounts.
- separate out sites from POU to POD
- attach time info from sitespecificamounts to both POU and POD site dataframes, then attach linking element from main processed input fill.
- Attach POU dataframe to POD datafame if a common linking element.

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/NewMexico/SS_PublicSupplyWaterUse"
os.chdir(workingDir)

### Input Files

In [3]:
# Read in main processed input file
fileInput = "RawInputData/P_nmSSPWMain.csv"
dfmp = pd.read_csv(fileInput)
print(len(dfmp))
dfmp.head(1)

2913


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey,in_WaterSourceNativeID
0,Withdrawal,Withdrawal_Annual_Unspecified_Groundwater,Groundwater,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,6.575,Unspecified,100,2010,2010-12-31,2010-01-01,BEARCAT HOMEOWNERS ASSOCIATION,WaDNMD_WS1


In [4]:
# Read in sites file
fileInput = "ProcessedInputData/Sites.csv"
dfs = pd.read_csv(fileInput)
print(len(dfs))
dfs.head(1)

1191


Unnamed: 0,SiteUUID,RegulatoryOverlayUUIDs,WaterSourceUUIDs,CoordinateAccuracy,CoordinateMethodCV,County,EPSGCodeCV,Geometry,GNISCodeCV,HUC12,HUC8,Latitude,Longitude,NHDNetworkStatusCV,NHDProductCV,PODorPOUSite,SiteName,SiteNativeID,SitePoint,SiteTypeCV,StateCV,USGSSiteID
0,NMssps_S1,,"NMssps_WS2,NMssps_WS1",Unspecified,Unspecified,BERNALILLO,4326,,,,13050001.0,34.941213,-106.273851,,,POD,Unspecified,1.0,,Unspecified,NM,


In [5]:
# Read in sitespecificamounts file
fileInput = "ProcessedInputData/sitespecificamounts.csv"
dfsa = pd.read_csv(fileInput)
dfsa = dfsa.assign(SiteUUID=dfsa['SiteUUID'].str.split(',')).explode('SiteUUID').reset_index(drop=True)
print(len(dfsa))
dfsa.head(1)

1102


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,Amount,AllocationCropDutyAmount,AssociatedNativeAllocationIDs,BeneficialUseCategory,CommunityWaterSupplySystem,CropTypeCV,CustomerTypeCV,DataPublicationDate,DataPublicationDOI,Geometry,IrrigatedAcreage,IrrigationMethodCV,PopulationServed,PowerGeneratedGWh,PowerType,PrimaryUseCategory,ReportYearCV,SDWISIdentifier,TimeframeEnd,TimeframeStart
0,NMssps_M1,NMssps_O1,NMssps_S1,NMssps_V1,NMssps_WS1,6.575,,,Unspecified,,,,11/17/2022,,,,,100,,,Unspecified,2010,,2010-12-31,2010-01-01


### Extract Data

In [6]:
# extract unique in_SiteNativeID and linkKey from main processed input file
dfmp2 = dfmp[['in_SiteNativeID', 'linkKey']]
dfmp2 = dfmp2.dropna().drop_duplicates().reset_index(drop=True)
print(len(dfmp2))
dfmp2.head(2)

1190


Unnamed: 0,in_SiteNativeID,linkKey
0,1.0,BEARCAT HOMEOWNERS ASSOCIATION
1,4.0,ROSEDALE MDWCA


In [7]:
#create POU site file
dfspou = dfs[['SiteUUID', 'SiteNativeID', 'PODorPOUSite']]
dfspou = dfspou[dfspou['PODorPOUSite'] == 'POU']
dfspou = dfspou.dropna().drop_duplicates().reset_index(drop=True)
print(len(dfspou))
dfspou.head(2)

605


Unnamed: 0,SiteUUID,SiteNativeID,PODorPOUSite
0,NMssps_S587,CR-SPRINGERTRACT,POU
1,NMssps_S588,NM3500101,POU


In [8]:
#create POD site file
dfspod = dfs[['SiteUUID', 'SiteNativeID', 'PODorPOUSite']]
dfspod = dfspod[dfspod['PODorPOUSite'] == 'POD']
dfspou = dfspou.dropna().drop_duplicates().reset_index(drop=True)
print(len(dfspou))
dfspou.head(2)

605


Unnamed: 0,SiteUUID,SiteNativeID,PODorPOUSite
0,NMssps_S587,CR-SPRINGERTRACT,POU
1,NMssps_S588,NM3500101,POU


In [9]:
# extract useable site info with StartDate & EndDate from sitespecificamounts file
dfsa2 = dfsa[['SiteUUID', 'TimeframeStart', 'TimeframeEnd']]
dfsa2 = dfsa2.dropna().drop_duplicates().reset_index(drop=True)
print(len(dfsa2))
dfsa2.head(2)

1058


Unnamed: 0,SiteUUID,TimeframeStart,TimeframeEnd
0,NMssps_S1,2010-01-01,2010-12-31
1,NMssps_S363,2010-01-01,2010-12-31


In [10]:
# left join POU sites to sitespecificamounts to main processed input file
dfmergeU = pd.DataFrame(index=dfspou.index)

dfmergeU = pd.merge(dfspou, dfsa2, on='SiteUUID', how='left')
dfmergeU = pd.merge(dfmergeU, dfmp2, left_on='SiteNativeID', right_on='in_SiteNativeID', how='left')
dfmergeU = dfmergeU.drop_duplicates().reset_index(drop=True)
print(len(dfmergeU))
dfmergeU.head(2)

605


Unnamed: 0,SiteUUID,SiteNativeID,PODorPOUSite,TimeframeStart,TimeframeEnd,in_SiteNativeID,linkKey
0,NMssps_S587,CR-SPRINGERTRACT,POU,,,CR-SPRINGERTRACT,SPRINGER TRACT
1,NMssps_S588,NM3500101,POU,,,NM3500101,BEARCAT HOMEOWNERS ASSOCIATION


In [11]:
# left join POD sites to sitespecificamounts to main processed input file
dfmergeD = pd.DataFrame(index=dfspod.index)

dfmergeD = pd.merge(dfspod, dfsa2, on='SiteUUID', how='left')
dfmergeD = pd.merge(dfmergeD, dfmp2, left_on='SiteNativeID', right_on='in_SiteNativeID', how='left')
dfmergeD = dfmergeD.drop_duplicates().reset_index(drop=True)
print(len(dfmergeD))
dfmergeD.head(2)

1109


Unnamed: 0,SiteUUID,SiteNativeID,PODorPOUSite,TimeframeStart,TimeframeEnd,in_SiteNativeID,linkKey
0,NMssps_S1,1.0,POD,2010-01-01,2010-12-31,1.0,BEARCAT HOMEOWNERS ASSOCIATION
1,NMssps_S1,1.0,POD,2015-01-01,2015-12-31,1.0,BEARCAT HOMEOWNERS ASSOCIATION


In [26]:
# Left Join POD file with POU file with matching linking elements. Drop NaN rows
dfmergeDU = pd.DataFrame(index=dfmergeD.index)

dfmergeDU = pd.merge(dfmergeD, dfmergeU, on='linkKey', how='left')
dfmergeDU = dfmergeDU.dropna(subset=['SiteUUID_y']).drop_duplicates().reset_index(drop=True)
print(len(dfmergeDU))
dfmergeDU.head(2)

98


Unnamed: 0,SiteUUID_x,SiteNativeID_x,PODorPOUSite_x,TimeframeStart_x,TimeframeEnd_x,in_SiteNativeID_x,linkKey,SiteUUID_y,SiteNativeID_y,PODorPOUSite_y,TimeframeStart_y,TimeframeEnd_y,in_SiteNativeID_y
0,NMssps_S5,103.0,POD,,,103.0,MOONGATE WEST,NMssps_S671,NM3501207,POU,,,NM3501207
1,NMssps_S13,111.0,POD,,,111.0,SOUTHWEST TINY HOMES AND RV PARK,NMssps_S679,NM3501327,POU,,,NM3501327


### Create Output Dataframe

In [27]:
columnslist = ["PODSiteUUID", "POUSiteUUID", "StartDate", "EndDate"]
outdf = pd.DataFrame(index=dfmergeDU.index, columns=columnslist)  # The output dataframe
print(len(outdf))
outdf.head(1)

98


Unnamed: 0,PODSiteUUID,POUSiteUUID,StartDate,EndDate
0,,,,


In [28]:
outdf['PODSiteUUID'] = dfmergeDU['SiteUUID_x']
outdf['POUSiteUUID'] = dfmergeDU['SiteUUID_y']
outdf['StartDate'] = dfmergeDU['TimeframeStart_x']
outdf['EndDate'] = dfmergeDU['TimeframeEnd_x']
outdf = outdf.dropna().drop_duplicates().reset_index(drop=True).sort_values(by=['PODSiteUUID', 'POUSiteUUID', 'StartDate'])
print(len(outdf))
outdf.head()

66


Unnamed: 0,PODSiteUUID,POUSiteUUID,StartDate,EndDate
4,NMssps_S102,NMssps_S773,2010-01-01,2010-12-31
5,NMssps_S102,NMssps_S773,2015-01-01,2015-12-31
6,NMssps_S105,NMssps_S604,2010-01-01,2010-12-31
7,NMssps_S105,NMssps_S604,2015-01-01,2015-12-31
8,NMssps_S118,NMssps_S790,2010-01-01,2010-12-31


In [29]:
# The working output DataFrame for WaDE 2.0 input.
if not outdf.empty:
    outdf.to_csv('ProcessedInputData/podsitetopousiterelationships.csv', index=False)