# Removing Non-Unique Wyoming State Geological SurveyAllocation records.
- Purpose:  Temp fix.  Removing Wyoming State Geological Survey records that are already posted by the existing WYwr proejct.

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# Needed Libraries / Modules
import sys

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import RemoveUnusedRecordsFile
import CreatePODSiteToPOUSiteRelationshipsFile

In [3]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/Wyoming/WaterAllocation_WSGS" # file location # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

## Input

In [4]:
# working file - WSGSwr
inputFile = "G:/Shared drives/WaDE Data/Wyoming/WaterAllocation_WSGS/ProcessedInputData/waterallocations.csv"
df = pd.read_csv(inputFile).replace(np.nan, "")
print(len(df))
df.head(1)

In [5]:
# file that holds records to delete - WYwr
inputFile = "G:/Shared drives/WaDE Data/Wyoming/WaterAllocation/ProcessedInputData/waterallocations.csv"
df_remove = pd.read_csv(inputFile).replace(np.nan, "")
print(len(df_remove))
df_remove.head(1)

## Remove duplicate files

In [6]:
removeList = df_remove['AllocationNativeID'].sort_values().unique().tolist()
removeList

In [7]:
df = df[~df['AllocationNativeID'].isin(removeList)]
print(len(df))
df.head(1)

In [8]:
# Export waterallocations with the removed records
df.to_csv('ProcessedInputData/waterallocations.csv', index=False)

In [9]:
# ---- Remove unused records ----
print("Remove unused Water Sources and Sites records not found within AllocationsAmounts_facts input csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedAllocationsAmountRecordsFileFunction(workingDirString)

In [10]:
# ---- Create POD and POU joins (podsitetopousiterelationships.csv) ----
print("Creating Pod site -to- Pou site relationships input csv...")
print("############################################################################")
CreatePODSiteToPOUSiteRelationshipsFile.CreatePODSiteToPOUSiteRelationshipsFunction(workingDirString)