## Get monthly surface area for 10 reservoirs

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.gridspec as gs
import matplotlib.pyplot as plt
from matplotlib import pyplot

import warnings
warnings.filterwarnings('ignore')

## Match surface areas to depth gauge data
The reference table 'Depth_to_Surface_all_gauges.csv' has 0 values in it which need to be fixed
1. load depth to surface tables
2. load depth gauge data
3. Add surface area to depth gauge data

In [25]:
#read in reference table of depth to surface area
dsa = pd.read_csv('Depth_to_Surface_all_gauges.csv')
dsa = dsa[['Gauge_Name', 'Gauge_ID', 'Depth', 'Surface_Area', 'State']]

### START LOOP ###
csv = 'Library/KEEPIT_419041.csv'

#Get gauge ID
gauge_number_df = pd.read_csv(csv, nrows=1, escapechar='#')
column2 = list(gauge_number_df)[1]
gauge_number_df = gauge_number_df.rename(columns = {column2 : 'gauge_ID'})
ID = gauge_number_df.at[0, 'gauge_ID']
ID = str(ID)
gauge_name_df = pd.read_csv(csv, nrows=2, escapechar='#')
name = list(gauge_name_df)[1]

# read in gauge data. I've taken out error_bad_lines = False from the read_csv function, as it got a depreciation warning 
df = pd.read_csv(csv, 
                    skiprows=9, escapechar='#',
                         parse_dates=['Timestamp'], 
                         index_col=('Timestamp'),
                        date_parser=lambda x: pd.to_datetime(x.rsplit('+', 1)[0]))

# organise into monthly average
df = df.resample('MS').mean() #Use this pandas function MS (monthly summary) to get the mean value of each month 
df = df.astype(np.int64)
df['Gauge_ID'] = ID
df = df[['Value', 'Gauge_ID']]
df['Value'] = df['Value'].round(0)
df['Surface_Area'] = '0'
df['Timestamp'] = df.index
df = df.set_index(df['Value'])

# Get the right depth to surface area reference
dsa_df = dsa.loc[dsa['Gauge_ID'] == ID]
dsa_df = dsa_df.set_index(dsa_df['Depth'])

# use the reference table to index match surface area to depth
# I found this method on the internet
s = dsa_df.Surface_Area.reindex(df.Value).values 
df['Surface_Area'] = np.where(np.isnan(s), df['Surface_Area'],s)
### END LOOP ###

df

Unnamed: 0_level_0,Value,Gauge_ID,Surface_Area,Timestamp
Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
328,328,419041,41085000.0,1976-01-01
329,329,419041,0,1976-02-01
328,328,419041,41085000.0,1976-03-01
328,328,419041,41085000.0,1976-04-01
328,328,419041,41085000.0,1976-05-01
...,...,...,...,...
319,319,419041,18748437.5,2020-12-01
322,322,419041,25016250.0,2021-01-01
322,322,419041,25016250.0,2021-02-01
323,323,419041,27990000.0,2021-03-01


In [23]:
#save out as csv file
df.to_csv(ID+'_Depth_to_Surface_Area.csv')