# Columbia River Total Dissolved Gas (TDG) Overview Table

In [1]:
#Ignore the performance warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
from cwms_read.cwms_read import get_cwms
import pandas as pd
import pandas
from collections import OrderedDict
from datetime import datetime, timedelta
from get_avgs import oregon_method, washington_method, combine
import numpy as np
from pytz import timezone
now = datetime.now(timezone('US/Pacific')) #- timedelta(hours = 8)
#start_date = (2017, 6, 1)
#end_date = (2017, 8, 2)
start_date = (2018, 4, 1)
end = now - timedelta(days = 1)
end_date = (end.year, end.month,end.day)
start_index = '-'.join([str(x) for x in start_date])

In [3]:
%%capture

tuples = [
    
    ('McNary', 'Spill Cap', 'Starting → Change', 'kcfs'),
    ('McNary', 'Actual Spill', 'Daily Average', 'kcfs'),
    ('McNary', 'TW', 'MCPW', '% sat' ),
    ('McNary', 'd/s FB', 'JDY', '% sat'),
    
    ('John Day', 'Spill Cap', 'Starting → Change', 'kcfs'),
    ('John Day', 'Actual Spill', 'Daily Average', 'kcfs'),
    ('John Day', 'TW', 'JHAW', '% sat' ),
    ('John Day', 'd/s FB', 'TDA', '% sat'),
    
    ('The Dalles', 'Spill Cap', 'Starting → Change', 'kcfs'),
    ('The Dalles', 'Actual Spill', 'Daily Average', 'kcfs'),
    ('The Dalles', 'TW', 'TDDO', '% sat' ),
    ('The Dalles', 'd/s FB', ' BON ', '% sat'),
    
    ('Bonneville', 'Spill Cap', 'Starting → Change', 'kcfs'),
    ('Bonneville', 'Actual Spill', 'Daily Average', 'kcfs'),
    ('Bonneville', 'TW', 'CCIW', '% sat' ),
    
]

columns = pd.MultiIndex.from_tuples(tuples, names = ['Project','',' ','units'])

paths = [
    
        'MCN.Flow-Spill-Cap-Fish.Inst.~1Day.0.CENWDP-COMPUTED-PUB', 
        'MCN.Flow-Spill.Ave.1Hour.1Hour.CBT-REV',
        'MCPW.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-Combined-REV',
        'JDY.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-WAmethod-REV',
        
        'JDA.Flow-Spill-Cap-Fish.Inst.~1Day.0.CENWDP-COMPUTED-PUB', 
        'JDA.Flow-Spill.Ave.1Hour.1Hour.CBT-REV',
        'JHAW.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-Combined-REV',
        'TDA.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-WAmethod-REV',
         
        'TDA.Flow-Spill-Cap-Fish.Inst.~1Day.0.CENWDP-COMPUTED-PUB', 
        'TDA.Flow-Spill.Ave.1Hour.1Hour.CBT-REV',
        'TDDO.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-Combined-REV',
        'BON.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-WAmethod-REV',
    
        'BON.Flow-Spill-Cap-Fish.Inst.~1Day.0.CENWDP-COMPUTED-PUB', 
        'BON.Flow-Spill.Ave.1Hour.1Hour.CBT-REV',
        'CCIW.%-Saturation-TDG.Ave.~1Day.12Hours.CENWDP-COMPUTED-Combined-REV',
        
        
        ]

index = pd.date_range(start='-'.join([str(x) for x in start_date]), end='-'.join([str(x) for x in end_date]),  freq='D', name='date')
df = pd.DataFrame(columns = columns, data = np.nan, index = index)
meta = {}
for path, column in zip(paths, tuples):
    data = get_cwms(path, start_date = start_date, end_date = end_date, public = True, fill = True)
    try:
        meta.update(data.__dict__['metadata'])
        data = data.groupby(pd.Grouper(freq = 'D')).mean()
        df[column] = data.iloc[:,0]
    except: continue
df = df.round(0)


In [4]:
"""
Questionable data is defined as values that are missing 1/3 or more data for calculation

"""


column_dict = {
                
                'MCPW': ('McNary', 'TW'),
                'JDY':  ('McNary', 'd/s FB'),
                'JHAW': ('John Day','TW'),
                'TDA':  ('John Day','d/s FB'),
                'TDDO': ('The Dalles','TW'),
                'BON':  ('The Dalles','d/s FB'),
                'CCIW': ('Bonneville', 'TW'),
                
              }

site_list = list(column_dict.keys())
path = '.%-Saturation-TDG.Inst.1Hour.0.GOES-COMPUTED-REV '
site_list = [x+path for x in site_list]
questionable = pd.DataFrame(index = df.index, columns = df.columns, data = False)
qual_start = datetime(*start_date) - timedelta(days = 10)
qual_start = (qual_start.year, qual_start.month, qual_start.day)
qual_end = (now.year, now.month, now.day)
for key, value in column_dict.items():
    p = key+path
    data = get_cwms(p, start_date=qual_start, end_date=qual_end, public = True)
    try:
        series = data.iloc[:,0]
        wash = series.copy().pipe(washington_method)
        oregon = series.copy().pipe(oregon_method)
        combined = combine(oregon, wash, qual_start, end_date)
        combined.index = [x.replace(hour = 0, minute = 0, second = 0) for x in combined.index]
        combined.index.name = 'date'
        combined = combined.loc[df.index]
        quality = combined['quality'] != True
        questionable[value] = quality 
    except:continue
        



In [5]:
"""
Get the meta dictionary keys match the column names so can be used below

"""

paths.insert(-1,'PAQW.%-Saturation-TDG.Inst.1Hour.0.GOES-COMPUTED-RAW')

p = []
for path in paths:
    column_name = '_'.join(path.split('.')[:2])
    column_name = '_'.join(column_name.split('-'))
    p.append(column_name)
    
meta_col_dict = {key:value for key,value in zip(p, list(df.columns))}
temp_meta = {}
for key, value in meta.items():
    
    try:
        temp_meta.update({meta_col_dict[key]: value})
    except KeyError:
        continue
        
meta.update(temp_meta)

In [6]:
%%capture

"""
Lack of Load or involuntary spill: 6 hours of 24 hours that the project is spilling above the voluntary spill cap
with a 2 kcfs buffer
This will be the asteriks on the actual spill columns


From the urban dictionary: asteriks
Incorrect pronounciation of the word 'asterisk'. Use of the word 'asteriks' is a result of the USA's substandard education system.
Jim: Hey Lou, what's that star thingy called that you get from pressing SHIFT + 8? 

Lou: an asteriks 

Jim: Aha! I knew you were a dumbass.


"""
site_dict = OrderedDict([
            ('McNary',['MCN']),
            ('John Day',['JDA']),
            ('The Dalles', ['TDA']),
            ('Bonneville', ['BON'])
          ])

column_dict = {
    'MCN': 'McNary',
    'JDA': 'John Day',
    'TDA': 'The Dalles',
    'BON': 'Bonneville'
    
}

# Collecting the daily spill caps to compare against the hourly spill
site_list = list(sum(site_dict.values(), []))
spill_cap_path = '.Flow-Spill-Cap-Fish.Inst.~1Day.0.CENWDP-COMPUTED-PUB'
spill_cap_list = [x+spill_cap_path for x in site_list]
spill_cap = get_cwms(spill_cap_list, public = True, fill = True, start_date=start_date, end_date=end_date, timezone = 'PST')

try:
    spill_cap.columns = [x.split('_')[0] for x in spill_cap.columns]
    spill_cap.rename(columns = column_dict, inplace = True)


    # Collecting the hourly spill to compare against the daily spill caps
    spill_path = '.Flow-Spill.Ave.1Hour.1Hour.CBT-REV'
    spill_list = [x+spill_path for x in site_list]

    spill = get_cwms(spill_list, public = True, start_date=start_date, end_date=end_date, timezone = 'PST')
    spill.columns = [x.split('_')[0] for x in spill.columns]
    spill.rename(columns = column_dict, inplace = True)
    
    #subtracting 16 hours from the spill index because the spill change occurs at 1600, then I can group by day
    spill.index = spill.index - timedelta(hours = 16)


    # Group the data by day to check if the project was in involuntary spill
    sg = spill.groupby(pd.Grouper(level='date', freq='D'))
    scg = spill_cap.groupby(pd.Grouper(level='date', freq='D')).mean().groupby(pd.Grouper(level='date', freq='D'))
    date = list(scg.groups.keys())
    inv_spill = pd.DataFrame(index = date, columns = spill.columns)
    for group, value in scg:
        g = sg.get_group(group)
        s = value.iloc[0]
        inv_spill.loc[group] = g.apply(lambda x: x.gt(s+2), axis = 1).sum()>5

    """
    Create boolean df for asterik
    """    


    projects = list(set(df.columns.get_level_values(0).tolist()))
    inv_spill_bool = pd.DataFrame(data = False,index = df.index, columns = df.columns)
    for project in projects:
        inv_spill_bool.loc[:,(project, 'Actual Spill')] = inv_spill[project]

    inv_spill_bool.fillna(value = False, inplace = True)
    
except: inv_spill_bool = ''


In [7]:
"""
Minimum Generation: at least 6 hours of 24 that the project is at minimum generation
This will be the green on the actual spill columns
"""
min_gen_dict_kcfs = OrderedDict([
            ('McNary', 61.2),
            ('John Day', 61.2),
            ('The Dalles', 61.2),
            ('Bonneville', 40.8)
          ])

min_gen_kcfs = pd.Series(min_gen_dict_kcfs)

# Collecting the daily generation flow to compare against the minimum generation flow, group by day
site_list = list(sum(site_dict.values(), []))
gen_flow_path = '.Flow-Gen.Ave.1Hour.1Hour.CBT-REV'
gen_flow_list = [x+gen_flow_path for x in site_list]
gen_flow = get_cwms(gen_flow_list,col_names = site_list, public = True, start_date=start_date, end_date=end_date, timezone = 'PST')
gen_flow.columns = [x.split('_')[0] for x in gen_flow.columns]
gen_flow.rename(columns = column_dict, inplace = True)
gen_flow_grouped = gen_flow.groupby(pd.Grouper(level='date', freq='D'))
 
    
# Creat a df fill with min gen data as dummy data
date = list(gen_flow_grouped.groups.keys())
min_gen = pd.DataFrame(index = date)
for key, value in min_gen_kcfs.items():
    min_gen[key] = value

#check if proj in min generation for the day and fill min_gen with result  
min_gen.index.rename('date', inplace = True)
min_gen_grouped = min_gen.groupby(pd.Grouper(level='date', freq='D'))
for group, value in gen_flow_grouped:
    compare_df = pd.DataFrame(index = value.index)
    for column in value.columns:
        compare_df[column] =  min_gen_kcfs[column]
    bool_df = value<= compare_df
    min_gen.loc[group] = bool_df.sum()>5

"""
Create boolean df for css
"""    
    
    
projects = list(set(df.columns.get_level_values(0).tolist()))
min_gen_bool = pd.DataFrame(data = False,index = df.index, columns = df.columns)
for project in projects:
    min_gen_bool.loc[:,(project, 'Actual Spill')] = min_gen[project]


In [8]:
"""
Not Meeting TDG Gas Cap Tailwater: defined as combined OR WA value > 120.5
Not Meeting TDG Gas Cap Forebay: defined as combined OR WA value > 115.5
This will be the blue on the TW and downstream forebay columns
"""
idx = pd.IndexSlice
gas_cap_exceeds = df.copy()
tw = gas_cap_exceeds.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'TW']]
fb = gas_cap_exceeds.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'d/s FB']]
gas_cap_exceeds[tw.columns] = tw.applymap(lambda x: x > 120.5)
gas_cap_exceeds[fb.columns] = fb.applymap(lambda x: x > 115.5)
gas_cap_exceeds = gas_cap_exceeds.sort_index(axis=1)[df.columns]
gas_cap_exceeds = gas_cap_exceeds.applymap(lambda x: x if type(x) == bool else False)
gas_cap_exceeds.head()

"""
Combine gas cap by project: If true in one gauge, true for all project
"""

gas_cap_by_gauge =  pd.DataFrame(data = False,index = gas_cap_exceeds.index, columns = gas_cap_exceeds.columns)
projects = list(set(df.columns.get_level_values(0).tolist()))
for project in projects:
    data = gas_cap_exceeds.loc[:,project].sort_index(axis=1).loc[idx[:,['TW', 'd/s FB']]]
    try:
        gas_cap_by_gauge.loc[:,(project, 'TW')] = data.apply(lambda x: x.any(),axis = 1)
        gas_cap_by_gauge.loc[:,(project, 'd/s FB')] = data.apply(lambda x: x.any(),axis = 1)
    except:
        continue



In [9]:
"""
Most restrictive gauge defined as:

Bold tailrace if:
(Tailrace TDG -120) > (ds Forebay -115)
Else:
Bold ds Forebay

Bold regardless of TDG value (above or below target).


This is the updated bold bold 

"""

idx = pd.IndexSlice
most_restrictive_gauge = df.copy()
tw = most_restrictive_gauge.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'TW']]
bon = tw[['Bonneville']] > 120
tw = tw[['John Day', 'McNary', 'The Dalles']]
fb = most_restrictive_gauge.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'d/s FB']]



most_restrictive_gauge[tw.columns] = (tw - 120).values >= (fb - 115).values
most_restrictive_gauge[fb.columns] = (tw - 120).values <= (fb - 115).values
most_restrictive_gauge[('Bonneville', 'TW', 'CCIW', '% sat' )] =  bon.iloc[:,0]

most_restrictive_gauge = most_restrictive_gauge.sort_index(axis=1)[df.columns]
most_restrictive_gauge = most_restrictive_gauge.applymap(lambda x: x if type(x) == bool else False)


In [10]:

#"""
#Gas cap meets TW: defined as combined OR WA value > 119
#Gas cap meets FB: defined as combined OR WA value > 114

#This will be the bold on the TW and downstream forebay columns
#"""
#idx = pd.IndexSlice
#gas_cap_meets = df.copy()
#tw = gas_cap_meets.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'TW']]
#fb = gas_cap_meets.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'d/s FB']]
#gas_cap_meets[tw.columns] = tw.applymap(lambda x: x >= 119)
#gas_cap_meets[fb.columns] = fb.applymap(lambda x: x >= 114)
#gas_cap_meets = gas_cap_meets.sort_index(axis=1)[df.columns]
#gas_cap_meets = gas_cap_meets.applymap(lambda x: x if type(x) == bool else False)


In [11]:
"""
Convert the dataframe to string because it displays better, remove decimals, add the asterik, and format the index to be date only (no time), for all dataframes.  
If index not formated for all df's the styling will not work
"""

df_string = df.copy().round(0).astype(str).applymap(lambda x: x.replace('.0', ''))
try:
    df_string[inv_spill_bool] = df_string[inv_spill_bool] + '*'
except:
    pass
df_string = df_string.replace('nan', '--')



spill_cap = df_string.sort_index(axis = 1).loc[:,pd.IndexSlice[:,'Spill Cap']].copy()
for column in spill_cap.columns:
    series = spill_cap[column]
    for index,value in enumerate(series[1:]):
        previous_value = series[index]
        if value != previous_value:
            df_string[column][index+1] = ('{}{}{}'.format(str(previous_value), '→', str(value)))




for dataframe in [df_string ,gas_cap_by_gauge,min_gen_bool, most_restrictive_gauge, questionable]:
    dataframe.index = dataframe.index.strftime('%Y-%m-%d')



In [12]:
"""
Custom css
"""


def hover(hover_color="#ffff99"):
    return dict(selector="tbody tr:hover",
                props=[("background-color", "%s" % hover_color)])

styles = [
    
   
]



In [13]:
"""
table key
"""

blue = '#7194da'
green = '#cfff95'

def highlight(value):
    return table_css.applymap(lambda x: x)
key_list =[
            'Most restrictive gauge used to determine spill cap',
            'Project spilling above the voluntary spill cap for 6 or more hours',
            'TDG measurements exceeded the daily criteria (i.e. gas cap)',
            'Project operating at minimum generation for 6 or more hours',
            '1/3 of data or more missing for value calculation',
            'No data'
        ]
table_key = pd.DataFrame({'Table Key':key_list })                         
table_key.set_index('Table Key', inplace = True)
table_key['value']=['value','*','','','value', '--']
table_css = pd.DataFrame({'Table Key':key_list,'value':['font-weight: 900',\
                                                        '',\
                                                        'background-color: ' + blue,\
                                                        'background-color: '+ green, \
                                                        'color: red',
                                                        '']})
table_css.set_index('Table Key', inplace = True)
table_key.style.apply(highlight, axis = None)


hide_index = [{'selector': '.row_heading, .blank', 'props': [('display', 'none;')]}]
key_styles = styles # + hide_index
key_html = (table_key.style)
key_html.apply(highlight,axis = None).set_uuid('key')



Unnamed: 0_level_0,value
Table Key,Unnamed: 1_level_1
Most restrictive gauge used to determine spill cap,value
Project spilling above the voluntary spill cap for 6 or more hours,*
TDG measurements exceeded the daily criteria (i.e. gas cap),
Project operating at minimum generation for 6 or more hours,
1/3 of data or more missing for value calculation,value
No data,--


In [14]:
"""
Adding table styles
"""

def tbl_css(value,css,df):
    return df.applymap(lambda x: css if x else '')

s = df_string.style

table_styles = styles + [hover()]

html = (
          s.set_table_styles(table_styles)
          .set_caption("Spill Cap changes occur at 16:00 PST.")
       )

html \
.apply(tbl_css, css = 'font-weight: 900', df = most_restrictive_gauge, axis = None) \
.apply(tbl_css, css = 'background-color: #7194da', df = gas_cap_by_gauge, axis = None) \
.apply(tbl_css, css = 'background-color: #cfff95', df = min_gen_bool, axis = None)\
.apply(tbl_css, css = 'color: red', df = questionable, axis = None) \
.set_uuid('data_table')



Project,McNary,McNary,McNary,McNary,John Day,John Day,John Day,John Day,The Dalles,The Dalles,The Dalles,The Dalles,Bonneville,Bonneville,Bonneville
Unnamed: 0_level_1,Spill Cap,Actual Spill,TW,d/s FB,Spill Cap,Actual Spill,TW,d/s FB,Spill Cap,Actual Spill,TW,d/s FB,Spill Cap,Actual Spill,TW
Unnamed: 0_level_2,Starting → Change,Daily Average,MCPW,JDY,Starting → Change,Daily Average,JHAW,TDA,Starting → Change,Daily Average,TDDO,BON,Starting → Change,Daily Average,CCIW
units,kcfs,kcfs,% sat,% sat,kcfs,kcfs,% sat,% sat,kcfs,kcfs,% sat,% sat,kcfs,kcfs,% sat
2018-04-01,190,0,104,104,140,0,103,104,125,15,104,105,128,1,112
2018-04-02,190,20,105,104,140,0,103,103,125,15,104,104,128,1,112
2018-04-03,190,32,107,103,140,0,103,103,125,40,107,104,128,1,111
2018-04-04,190,58,108,103,140,0,103,103,125,31,106,106,128,1,110
2018-04-05,190,61,112,104,140,0,104,104,125,43,107,107,128,1,111
2018-04-06,190,60,112,105,140,0,105,105,125,51,108,110,128,1,113
2018-04-07,190,60,113,106,140,0,106,105,125,58,108,110,128,20,113
2018-04-08,190,73,113,106,140,12,110,105,125,66,108,109,128,32,113
2018-04-09,190,90,114,106,140,14,111,106,125,72,110,108,128,40,114
2018-04-10,190→195,184,118,108,140→135,138,120,114,125,123,118,111,128→126,127,120


In [15]:
pd.DataFrame().style.set_table_styles(table_styles).set_uuid('header-fixed')

In [16]:
tz = 'US/Pacific'
now = datetime.now(timezone(tz))
print('{}{}{}{}'.format('Executed: ', str(now), ' ', tz))

Executed: 2018-09-28 10:04:55.173731-07:00 US/Pacific
