# Pre-processing Montana Site Specific Reservoir and Obervation Site data for WaDE upload.

### Goal
- Create sites_input.csv, contains location and site information.
- Create cleaned input_timeseries.csv with a native site ID field.

In [None]:
# Needed Libararies

# Working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# Visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# Working with API
import requests
import io
import json

# Cleanup
import re
import time
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Montana/SS_ReservoirsGages/RawInputData"
os.chdir(workingDir)

### DataFrame Creation

In [None]:
# Dataframe Creation - location data
inputFile1 = "MGS_locations.csv"
df_loc = pd.read_csv(inputFile1)
print(len(df_loc))
df_loc.head(1)

In [None]:
# Dataframe Creation - datasets data
inputFile2 = "MGS_datasets.csv"
df_dase = pd.read_csv(inputFile2)
print(len(df_dase))
df_dase.head(1)

In [None]:
# Dataframe Creation - timeseries data
inputFile3 = "MGS_timeseries.csv"
df_ts = pd.read_csv(inputFile3)
print(len(df_ts))
df_ts.head(1)

### Location & Site Information

In [None]:
# Only working with SensorLabel = Daily Average, & ParameterLabel = Discharge or Stage, to recreatie their plots.
df_dasetemp = df_dase[(df_dase['SensorLabel'] == 'Daily Average')]
df_dasetemp = df_dasetemp[(df_dasetemp['ParameterLabel'] == 'Discharge') | (df_dasetemp['ParameterLabel'] == 'Stage')]

print(len(df_dasetemp))
df_dasetemp.head()

In [None]:
# Create VariableSpecificCV

def createVariableSpecificCV(A):
    if A == 'Discharge':
        outString = "Discharge / Flow_Daily_Discharge_Surface Water"
    if A == 'Stage':
        outString = "Reservoir Level_Daily_Stage_Surface Water"
    return outString

df_dasetemp['in_VariableSpecificCV'] = df_dasetemp.apply(lambda row: createVariableSpecificCV(row['ParameterLabel']), axis=1)

In [None]:
# trim down dataset to those fields that are needed.
df_dasetemp_sub = df_dasetemp[['LocationCode', 'SensorID', 'SensorLabel', 'TimeSeriesType', 'Parameter', 'UnitOfMeasure', 'ComputationMethod', 'ComputationPeriod', 'LastModifiedTime', 'ParameterLabel', 'in_VariableSpecificCV']]
print(len(df_dasetemp_sub))
df_dasetemp_sub.head()

In [None]:
# merge location info with trimmed down dataset data
df_loctemp=pd.DataFrame()
df_loctemp = pd.merge(df_loc, df_dasetemp_sub, on='LocationCode', how='left')
print(len(df_loctemp))
df_loctemp.head()

### Cleaned Timeseries

In [None]:
# list of SensorID to find and perserve in timesers data
SensorIDList = df_dasetemp_sub['SensorID'].tolist()
SensorIDList

In [None]:
# use only those timeseries rows who's SensorID is in the list
df_tstemp = df_ts
df_tstemp = df_tstemp[df_tstemp['SensorID'].isin(SensorIDList)].drop_duplicates().reset_index(drop=True)
print(len(df_tstemp))
df_tstemp.head()

In [None]:
# extract date and time values from Timestamp field
# -------------------------------------------------

#convert from string to datetime
df_tstemp['Timestamp'] = pd.to_datetime(df_tstemp['Timestamp']) 

# extract date, year and time, create three new fields
df_tstemp['Timestamp_Date'] = df_tstemp['Timestamp'].dt.date
df_tstemp['Timestamp_Date'] = pd.to_datetime(df_tstemp['Timestamp_Date'], errors = 'coerce')
df_tstemp['Timestamp_Date'] = pd.to_datetime(df_tstemp['Timestamp_Date'].dt.strftime('%m/%d/%Y'))

# Year
df_tstemp['Timestamp_Year'] = pd.DatetimeIndex(df_tstemp['Timestamp_Date']).year

# time
df_tstemp['Timestamp_Time'] = df_tstemp['Timestamp'].dt.time

print(len(df_tstemp))
df_tstemp.head()

In [None]:
# remove unused fields
# -------------------------------------------------
df_tstemp = df_tstemp.drop(['OID_', 'GradeCode', 'GradeName', 'ApprovalLevel'], axis=1)
df_tstemp = df_tstemp.drop_duplicates().reset_index(drop=True)
df_tstemp

### Export out csv

In [None]:
#Merge ts with database
dfsupertemp = pd.merge(df_tstemp, df_loctemp, on='SensorID', how='left')
print(len(dfsupertemp))
dfsupertemp.head()

In [None]:
# Export out to CSV.
dfsupertemp.to_csv('P_mtOSMaster.csv', index=False) # The output.