In [2]:
# Import standard libraries
import os
import sys
import importlib
import datetime as dt
import time
from pathlib import Path
from contextlib import redirect_stdout

# Import data manipulation libraries
import numpy as np
import pandas as pd

# Import visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Import custom modules - NEED WINDOWS OS
from CoEQWAL.imports import AuxFunctions as af, cs3, csPlots, cs_util as util, dss3_functions_reference as dss

### Read Data

In [3]:
df = pd.read_csv("../data/EDA_data_04_09_24.csv", header=[0, 1, 2, 3, 4, 5, 6], index_col=0, parse_dates=True)
df = df.loc[:, ~df.columns.get_level_values(6).str.contains('CFS.1')]
dss_names = pd.read_csv("../data/dss_names.csv")["0"].tolist()
df.head(5)

A,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE
B,C_SJRVER_expl0000,C_WILKNS_expl0000,DEL_CVP_PAG_N_expl0000,DEL_CVP_PAG_S_expl0000,DEL_CVP_PEX_S_expl0000,DEL_CVP_PMI_N_expl0000,DEL_CVP_PMI_S_expl0000,DEL_CVP_PRF_N_expl0000,DEL_CVP_PRF_S_expl0000,DEL_CVP_PSC_N_expl0000,...,WYT_SJR__expl0599,WYT_TRIN__expl0599,X2_PRV_expl0599,D_TOTAL_expl0599,S_RESTOT_expl0599,S_RESTOT_NOD_expl0599,DEL_CVP_TOTAL_expl0599,DEL_CVP_PAG_TOTAL_expl0599,DEL_CVP_PSCEX_TOTAL_expl0599,DEL_CVP_PRF_TOTAL_expl0599
C,FLOW-CHANNEL,FLOW-CHANNEL,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,...,WATER-YEAR-TYPE,WATER-YEAR-TYPE,X2-POSITION-PREV,FLOW-DELIVERY,STORAGE,STORAGE,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP
D,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,...,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON
E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,...,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E
F,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,...,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER
Units,CFS,CFS,CFS,CFS,CFS,CFS,CFS,CFS,CFS,CFS,...,NONE.1,NONE,KM,CFS,TAF,TAF,CFS,CFS,CFS,CFS
1921-10-31,2505.279,5427.5645,79.62269,922.12427,978.93085,175.15205,177.9746,330.14786,1070.5406,776.8217,...,2.0,3.0,68.20287,9404.355,9270.104,7347.322,3488.7666,602.3068,1006.93964,1387.5029
1921-11-30,1977.8469,4904.4517,0.0,703.3396,336.38205,137.90486,239.3647,268.8889,689.73926,990.7904,...,2.0,3.0,67.4141,7729.3105,9145.287,7191.747,2524.2285,421.99603,728.172,948.3119
1921-12-31,2740.8728,9056.177,0.0,973.1761,147.93547,122.706566,237.1183,157.75537,314.70547,0.0,...,2.0,3.0,73.77786,11280.0,9187.259,7189.641,1591.1171,583.89496,88.76128,472.46307
1922-01-31,2577.6514,6841.2305,0.0,1708.2953,153.59721,119.16147,156.20477,74.81183,141.36443,24.395163,...,2.0,3.0,73.25478,8157.7305,9453.143,7418.521,1731.6113,1024.9584,105.010345,216.17726
1922-02-28,5592.398,16573.014,0.0,2154.231,442.68655,131.60466,80.2711,68.422615,106.018906,0.0,...,1.0,3.0,70.16119,11280.0,9932.215,7835.668,2126.8743,1292.5148,265.61194,173.28299


In [None]:
var_df = pd.read_csv("../data/EDA_vars_04_09_24.csv")
var_list = var_df["DSS Part B"].tolist()

## Subset by variable across studies

In [26]:
def create_subset(df, varname):
    """ 
    Filters df to return columns that contain the string varname
    :param df: Dataframe to filter
    :param varname: variable of interest, e.g. S_SHSTA
    """
    filtered_columns = df.columns.get_level_values(1).str.contains(varname)
    return df.loc[:, filtered_columns]

In [27]:
def create_subset_list(df, var_names):
    """ 
    Filters df to return columns that contain any of the strings in var_names.
    :param df: Dataframe to filter.
    :param var_names: List of variables of interest, e.g. ['S_SHSTA', 'S_OROVL'].
    """
    filtered_columns = df.columns.get_level_values(1).str.contains('|'.join(var_names))
    return df.loc[:, filtered_columns]

In [35]:
def convert_cfs_to_taf(df):
    date_column = df.index
    months = date_column.strftime('%m')
    years = date_column.strftime('%Y')

    days_in_month = np.zeros(len(df))

    # Compute the number of days in each month, considering leap years for February
    for i in range(len(months)):
        if months[i] in {"01", "03", "05", "07", "08", "10", "12"}:
            days_in_month[i] = 31
        elif months[i] == "02":
            year = int(years[i])
            if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0):
                days_in_month[i] = 29
            else:
                days_in_month[i] = 28
        elif months[i] in {"04", "06", "09", "11"}:
            days_in_month[i] = 30

    columns_to_convert = [col for col in df.columns if ('DEL' in col[1] or 'NDO' in col[1] or 'D_TOTAL' in col[1]) and 'CFS' in col[6]]
    new_columns_dict = {}

    for column in columns_to_convert:
        new_values = df[column].values * 2.29568e-5 * 86400 * days_in_month / 1000
        new_column_name = list(column)
        new_column_name[1] = new_column_name[1] + '_TAF'
        new_column_name[6] = 'TAF'
        new_column_name = tuple(new_column_name)
        new_columns_dict[new_column_name] = new_values

    for new_col, new_values in new_columns_dict.items():
        df[new_col] = new_values

    return df

In [34]:
fPath = "../output/plots"

In [37]:
df = convert_cfs_to_taf(df)
df

Unnamed: 0_level_0,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE,CALLITE
Unnamed: 0_level_1,C_SJRVER_expl0000,C_WILKNS_expl0000,DEL_CVP_PAG_N_expl0000,DEL_CVP_PAG_S_expl0000,DEL_CVP_PRF_S_expl0000,DEL_CVP_PSC_N_expl0000,DEL_CVP_TOTAL_N_expl0000,DEL_CVP_TOTAL_S_expl0000,DEL_SWP_MWD_expl0000,DEL_SWP_PMI_expl0000,...,UNIMP_SJ_CFS_DV_expl0383,UNIMP_SRBB_CFS_DV_expl0383,UNIMP_TU_CFS_DV_expl0383,UNIMP_YUBA_CFS_DV_expl0383,UNIMP_YUBFEA_CFS_DV_expl0383,X2_PRV_expl0383,D_JONES_TAF_expl0383,D_BANKS_TAF_expl0383,D_TOT_expl0383,S_RESTOT_expl0383
Unnamed: 0_level_2,FLOW-CHANNEL,FLOW-CHANNEL,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-CVP,DELIVERY-SWP,DELIVERY-SWP,...,FLOW-UNIMPAIRED,FLOW-UNIMPAIRED,FLOW-UNIMPAIRED,FLOW-UNIMPAIRED,FLOW-UNIMPAIRED,X2-POSITION-PREV,FLOW-DELIVERY,FLOW-DELIVERY,FLOW-DELIVERY,STORAGE
Unnamed: 0_level_3,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,...,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON
Unnamed: 0_level_4,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,...,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E,2020D09E
Unnamed: 0_level_5,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,...,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER
Unnamed: 0_level_6,CFS,CFS,TAF,TAF,TAF,TAF,TAF,TAF,TAF,TAF,...,CFS,CFS,CFS,CFS,CFS,KM,TAF,TAF,TAF,TAF
1921-10-31,2505.2790,5427.5645,2.092484e+05,2.423343e+06,2.813381e+06,2.041487e+06,3.710914e+06,8.575424e+06,6.493492e+06,1.116402e+07,...,174.01881,3935.7527,94.32796,408.21237,1728.8037,68.202870,170.78238,403.27628,574.05865,8793.0510
1921-11-30,1977.8469,4904.4517,0.000000e+00,1.848376e+06,1.812635e+06,2.603797e+06,3.688032e+06,5.393127e+06,6.396417e+06,1.186342e+07,...,166.37500,4856.8057,102.17778,524.33340,2204.8890,67.435990,141.08742,331.10460,472.19202,8698.9300
1921-12-31,2740.8728,9056.1770,0.000000e+00,2.557507e+06,8.270460e+05,0.000000e+00,7.394474e+05,4.620543e+06,5.373313e+06,1.044180e+07,...,959.54300,8814.7850,887.98380,1836.14250,4991.2500,73.796830,254.83131,426.15015,680.98145,8713.6930
1922-01-31,2577.6514,6841.2305,0.000000e+00,4.489400e+06,3.715057e+05,6.411049e+04,5.762657e+05,5.964501e+06,1.066858e+06,1.463146e+06,...,1076.63980,7025.8066,1166.41400,1976.00800,5098.5890,73.272900,255.08055,237.40735,492.48790,8969.5830
1922-02-28,5592.3980,16573.0140,0.000000e+00,5.661319e+06,2.786177e+05,0.000000e+00,5.337932e+05,7.789075e+06,1.716434e+06,2.194158e+06,...,1775.38680,17411.7560,3411.40800,5724.09230,13322.6045,70.166000,256.13720,424.84427,680.98145,9383.8740
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003-05-31,3318.8572,18618.2580,1.874032e+06,5.704934e+06,1.099716e+06,1.183967e+07,1.493467e+07,1.223848e+07,4.522120e+06,6.261571e+06,...,7087.94900,20311.0370,8463.59300,9055.09400,22705.2600,55.135270,179.19414,207.87874,387.07288,5741.7935
2003-06-30,2222.6667,4923.5800,3.628755e+06,9.520898e+06,1.231732e+06,1.608900e+07,2.162981e+07,1.817971e+07,4.591179e+06,7.037492e+06,...,6301.49500,8225.8150,6258.27150,3678.68580,9518.3970,53.185272,275.99796,314.75156,590.74950,5636.2314
2003-07-31,1147.9255,9365.8700,4.052868e+06,1.145013e+07,3.030184e+05,1.667051e+07,2.299562e+07,2.015033e+07,3.406958e+06,6.321682e+06,...,1452.27650,5212.8720,901.28735,394.12820,2597.1577,64.092170,270.60452,228.97102,499.57556,4991.3300
2003-08-31,1359.1146,5336.4430,2.851372e+06,8.250568e+06,5.057250e+05,1.162066e+07,1.652348e+07,1.683312e+07,3.466934e+06,6.764329e+06,...,559.36480,4595.1377,484.66680,815.66034,2717.5884,85.212200,258.20230,139.03201,397.23430,4677.8726


## Plotting Functions

Need to look example file "general_plots_v20231115.py" is posted to the Google Drive with the example files/scripts in the Python_DSS directory (here: https://drive.google.com/drive/folders/1JbN0eYKNM0772P0XMj0S7Fyl7I9GZxfW?usp=drive_link). Also the plotting.yml file to list out and define which CalSim/CalLite studies to plot and analyze. If you want to test it out yourself, you'll need ot update the paths to your own CalLite/CalSim studies.


In [None]:
def plot_ts(df, pTitle = 'Time Series', xLab = 'Date', lTitle = 'Studies', fTitle = 'mon_tot', pSave = True, fPath = fPath):
    """
    Plots a time-series graph for a given MultiIndex dataframe (follows calsim conventions)
    
    The function assumes the DataFrame columns follow a specific naming
    convention where the last part of the name indicates the study.
    """
    
    var = '_'.join(df.columns[0][1].split('_')[:-1])
    colormap = plt.cm.tab20
    colors = [colormap(i) for i in range(df.shape[1])]
    colors[-1] = [0,0,0,1]

    count = 0
    
    plt.figure(figsize=(14, 8))
    
    default_font_size = plt.rcParams['font.size']
    scaled_font_size = 1.5 * default_font_size # Change it to font size you want
    default_line_width = plt.rcParams['lines.linewidth']  
    scaled_line_width = 1.5 * default_line_width
    
    studies = [col[1].split('_')[-1] for col in df.columns]

    for study in studies:
        study_cols = [col for col in df.columns if col[1].endswith(study)]
        for col in study_cols:
            sns.lineplot(data=df, x=df.index, y=col, label=f'{study}', color = colors[count], linewidth=scaled_line_width)
            count+=1
            
    plt.title(var + ' ' + pTitle, fontsize=scaled_font_size*2)
    plt.xlabel(xLab, fontsize=scaled_font_size*1.5)
    plt.ylabel(var+"\nUnits: " + df.columns[0][6], fontsize=scaled_font_size*1.5)

    plt.legend(title=lTitle, title_fontsize = scaled_font_size*1.5, fontsize=scaled_font_size*1.25, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    plt.xticks(rotation=45, fontsize=scaled_font_size)  
    plt.yticks(fontsize=scaled_font_size)  
    plt.tight_layout()  
     
    if pSave:
        plt.savefig(f'{fPath}/{var}_{fTitle}.png', format = 'png', bbox_inches='tight', dpi=600, transparent=False)
        
    plt.show()
   

In [None]:
def plot_annual_totals(df, xLab = 'Date', pTitle = 'Annual Totals', lTitle = 'Studies', fTitle = 'ann_tot', pSave = True, fPath = fPath):
    """
    Plots a time-series graph of annual totals for a given MultiIndex Dataframe that 
    follows calsim conventions
    
    The function assumes the DataFrame columns follow a specific naming
    convention where the last part of the name indicates the study. 
    """
    
    annualized_df = pd.DataFrame()
    var = '_'.join(df.columns[0][1].split('_')[:-1])
    studies = [col[1].split('_')[-1] for col in df.columns]
        
    colormap = plt.cm.tab20
    colors = [colormap(i) for i in range(df.shape[1])]
    colors[-1] = [0,0,0,1]
        
    i=0

    plt.figure(figsize=(14, 8))
        
    default_font_size = plt.rcParams['font.size']
    scaled_font_size = 1.5 * default_font_size # Change it to font size you want
    default_line_width = plt.rcParams['lines.linewidth']  
    scaled_line_width = 1.5 * default_line_width
    
    for study in studies:
        study_cols = [col for col in df.columns if col[1].endswith(study)]
        for col in study_cols:
            with redirect_stdout(open(os.devnull, 'w')):
                df_ann = csPlots.annualize(df.loc[:, [df.columns[i]]])
                annualized_df = pd.concat([annualized_df, df_ann], axis=1)
                annualized_col_name = df_ann.columns[0]
                sns.lineplot(data = df_ann, x=df_ann.index, y=annualized_col_name, label=f'{study}', color = colors[i],
                            linewidth = scaled_line_width)
                i+=1
                    

    plt.title(var + ' ' + pTitle, fontsize=scaled_font_size*2)
    plt.xlabel(xLab, fontsize=scaled_font_size*1.5)
    plt.ylabel(var+"\nUnits: " + df.columns[0][6], fontsize=scaled_font_size*1.5)

    plt.legend(title=lTitle, title_fontsize = scaled_font_size*1.5, fontsize=scaled_font_size*1.25, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    plt.xticks(rotation=45, fontsize=scaled_font_size)  
    plt.yticks(fontsize=scaled_font_size)  
    plt.tight_layout()  
        
    if pSave:
        plt.savefig(f'{fPath}/{var}_{fTitle}.png', format = 'png', bbox_inches='tight', dpi=600, transparent=False)
        
    plt.show()
    return annualized_df 

In [None]:
def plot_exceedance(df, month = "All Months", xLab = 'Probability', pTitle = 'Exceedance Probability', lTitle = 'Studies', fTitle = 'exceed', pSave = True, fPath = fPath):
    """
    Plots an exceedance graph for a given MultiIndex Dataframe that follows calsim conventions
  
    The function assumes the DataFrame columns follow a specific naming
    convention where the last part of the name indicates the study. 
    """
    pTitle = pTitle + " " + month
    fTitle = fTitle + " " + month
    
    var = '_'.join(df.columns[0][1].split('_')[:-1])
    studies = [col[1].split('_')[-1] for col in df.columns]
    i=0
    
    colormap = plt.cm.tab20
    colors = [colormap(i) for i in range(df.shape[1])]
    colors[-1] = [0,0,0,1]

    plt.figure(figsize=(14, 8))
            
    default_font_size = plt.rcParams['font.size']
    scaled_font_size = 1.5 * default_font_size # Change it to font size you want
    default_line_width = plt.rcParams['lines.linewidth']  
    scaled_line_width = 1.5 * default_line_width

    for study in studies:
        study_cols = [col for col in df.columns if col[1].endswith(study)]
        for col in study_cols:
            df_ex = csPlots.single_exceed(df, df.columns[i])
            ex_col_name = df_ex.columns[0]
            sns.lineplot(data = df_ex, x=df_ex.index, y=ex_col_name, label=f'{study}', color = colors[i], linewidth = scaled_line_width)
            i+=1

    plt.title(var + ' ' + pTitle, fontsize=scaled_font_size*2)
    plt.xlabel(xLab, fontsize=scaled_font_size*1.5)
    plt.ylabel(var+"\nUnits: " + df.columns[0][6], fontsize=scaled_font_size*1.5)
    plt.legend(title=lTitle, title_fontsize = scaled_font_size*1.5, fontsize=scaled_font_size*1.25, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    plt.xticks(rotation=45, fontsize=scaled_font_size)  
    plt.yticks(fontsize=scaled_font_size)  
    plt.tight_layout()  
    
    if pSave:
        plt.savefig(f'{fPath}/{var}_{fTitle}.png', format = 'png', bbox_inches='tight', dpi=600, transparent=False)
        
    plt.show()

In [None]:
def plot_moy_averages(df, xLab = 'Month of Year', pTitle = 'Month of Year Average Totals', lTitle = 'Studies', fTitle = 'moy_avg', fPath = fPath):
    """
    Plots a time-series graph of month of year averages of a study for a given MultiIndex Dataframe that follows calsim conventions. Calculates mean for 12 months across all study years and uses the plot_ts function to produce a graph.
    
    The function assumes the DataFrame columns follow a specific naming
    convention where the last part of the name indicates the study. 
    """
    df_copy = df.copy()
    df_copy["Month"] = df.index.month
    df_moy = df_copy.groupby('Month').mean()
    plot_ts(df_moy, pTitle = pTitle, xLab = xLab, lTitle = lTitle, fTitle = fTitle, fPath = fPath)

### Difference From Baseline

In [None]:
def get_difference_from_baseline(df):
    """
    Calculates the difference from baseline for a given variable
    Assumptions: baseline column on first column, df only contains single variable
    """
    df_diff = df.copy()
    baseline_column = df_diff.iloc[:, 0]
    
    for i in range(1, df_diff.shape[1]):
        df_diff.iloc[:, i] = df_diff.iloc[:, i].sub(baseline_column)
    df_diff = df_diff.iloc[:, 1:]

    return df_diff

In [None]:
def difference_from_baseline(df, plot_type, pTitle = 'Difference from Baseline ', xLab = 'Date', lTitle = 'Studies', fTitle = "___", pSave = True, fPath = fPath):
    """
    Plots the difference from baseline of a single variable with a specific plot type
    plot_type parameter inputs: plot_ts, plot_exceedance, plot_moy_averages, plot_annual_totals
    """
    pTitle += plot_type.__name__
    diff_df = get_difference_from_baseline(df)
    plot_type(diff_df, pTitle = pTitle, fTitle = fTitle, fPath = fPath)

### Looping Through All Variables to Create Plots

In [None]:
def slice_with_baseline(df, var, study_lst):
    """
    Creates a subset of df based on varname and slices it according to the provided range.
    """
    subset_df = create_subset(df, var)
    df_baseline = subset_df.iloc[:,[0]]
    df_rest = subset_df.iloc[:, study_lst]
    return pd.concat([df_baseline, df_rest], axis = 1)

In [None]:
def plot_all(df, vars, studies, storyline):
    for var in vars:
        
        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)
        
    
        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)
    
        # Annualized Total
        # plot_annual_totals(filter_df, pTitle = 'Water Year Total', fTitle = 'WY_Tot', fPath = fPath)
    
        # # End of Year TS
        # copy = filter_df.copy()
        # sep_data = copy[copy.index.month.isin([9])]
        # plot_ts(sep_data, pTitle = 'September Total', fTitle = 'Sep_Tot', fPath = fPath)
        # 
        # # April TS
        # apr_data = copy[copy.index.month.isin([4])]
        # plot_ts(apr_data, pTitle = 'April Total', fTitle = 'Apr_Tot', fPath = fPath)
    
        # # Summer Totals
        # thisdata_summer = copy[copy.index.month.isin([6,7,8])]
        # plot_annual_totals(thisdata_summer, pTitle = 'Annual Summer Totals', fTitle = 'Ann_Summ_Tot', fPath = fPath)
    
        # Exceedance
        plot_exceedance(filter_df, xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)
    
        # Average MOY
        plot_moy_averages(filter_df, pTitle = 'Month of Year Average', fTitle = 'MoY_Avg', fPath = fPath)
    
        # Yearly Totals
        # copy = filter_df.copy()
        # thisdata_yr = copy
        # thisdata_yr["Year"] = thisdata_yr.index.year
        # thisdata_yrtot = thisdata_yr.groupby('Year').sum()
        # plot_ts(thisdata_yrtot, xLab = 'Date', fPath = fPath)

        # # Difference from baseline
        # difference_from_baseline(filter_df, plot_ts)
        # # difference_from_baseline(filter_df, plot_exceedance)
        # # difference_from_baseline(filter_df, plot_moy_averages)
        # difference_from_baseline(filter_df, plot_annual_totals)  

In [None]:
def plot_all_storage(df, vars, studies, storyline):
    for var in vars:
        
        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)
        
    
        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)
    
        # Annualized Total
        plot_annual_totals(filter_df, pTitle = 'Water Year Total', fTitle = 'WY_Tot', fPath = fPath)
    
        # # End of Year TS
        copy = filter_df.copy()
        sep_data = copy[copy.index.month.isin([9])]
        plot_ts(sep_data, pTitle = 'September Total', fTitle = 'Sep_Tot', fPath = fPath)
        # 
        # # April TS
        apr_data = copy[copy.index.month.isin([4])]
        plot_ts(apr_data, pTitle = 'April Total', fTitle = 'Apr_Tot', fPath = fPath)
    
        # # Summer Totals
        thisdata_summer = copy[copy.index.month.isin([6,7,8])]
        plot_annual_totals(thisdata_summer, pTitle = 'Annual Summer Totals', fTitle = 'Ann_Summ_Tot', fPath = fPath)
    
        # Exceedance
        plot_exceedance(filter_df, xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)
    
        # Average MOY
        plot_moy_averages(filter_df, pTitle = 'Month of Year Average', fTitle = 'MoY_Avg', fPath = fPath)
    
        # Yearly Totals
        copy = filter_df.copy()
        thisdata_yr = copy
        thisdata_yr["Year"] = thisdata_yr.index.year
        thisdata_yrtot = thisdata_yr.groupby('Year').sum()
        plot_ts(thisdata_yrtot, xLab = 'Date', fPath = fPath)

        # # Difference from baseline
        # difference_from_baseline(filter_df, plot_ts)
        # # difference_from_baseline(filter_df, plot_exceedance)
        # # difference_from_baseline(filter_df, plot_moy_averages)
        # difference_from_baseline(filter_df, plot_annual_totals)  

In [None]:
var_storage = var_list[0:2]
var_storage

In [None]:
var_del = var_list[2:4]
var_del

### Plot Vars Across 384 Studies

In [None]:
plot_all_storage(df, var_storage, [], 'Baseline')

In [None]:
def plot_all_ndo(df, vars, studies, storyline):
    for var in vars:

        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)


        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)

        # Annualized Total
        plot_annual_totals(filter_df, pTitle = 'Water Year Total', fTitle = 'WY_Tot', fPath = fPath)

        # # End of Year TS
        copy = filter_df.copy()
        sep_data = copy[copy.index.month.isin([9])]
        plot_ts(sep_data, pTitle = 'September Total', fTitle = 'Sep_Tot', fPath = fPath)
        #
        # # April TS
        apr_data = copy[copy.index.month.isin([4])]
        plot_ts(apr_data, pTitle = 'April Total', fTitle = 'Apr_Tot', fPath = fPath)

        # # Summer Totals
        thisdata_summer = copy[copy.index.month.isin([6,7,8])]
        plot_annual_totals(thisdata_summer, pTitle = 'Annual Summer Totals', fTitle = 'Ann_Summ_Tot', fPath = fPath)

        # Exceedance
        plot_exceedance(filter_df, xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

        # Average MOY
        plot_moy_averages(filter_df, pTitle = 'Month of Year Average', fTitle = 'MoY_Avg', fPath = fPath)

        # Yearly Totals
        copy = filter_df.copy()
        thisdata_yr = copy
        thisdata_yr["Year"] = thisdata_yr.index.year
        thisdata_yrtot = thisdata_yr.groupby('Year').sum()
        plot_ts(thisdata_yrtot, xLab = 'Date', fPath = fPath)

        # # Difference from baseline
        # difference_from_baseline(filter_df, plot_ts)
        # # difference_from_baseline(filter_df, plot_exceedance)
        # # difference_from_baseline(filter_df, plot_moy_averages)
        # difference_from_baseline(filter_df, plot_annual_totals)

In [None]:
var_list_ndo = ["NDO"]
var_list_ndo

In [None]:
plot_all_ndo(df, var_list_ndo, [], "NDO EDA")

### Storyline 1 - Natural Flows

##### Single Strategy

In [None]:
var_list

In [None]:
var_list_storage = var_list[:3]
var_list_storage

In [None]:
var_list_delivery = var_list[3:6]
var_list_delivery

In [None]:
var_list_c_vars = var_list[7:9]
var_list_c_vars

##### C_VARS

In [None]:
def plot_all(df, vars, studies, storyline):
    for var in vars:

        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)


        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)

        # Average MOY
        plot_moy_averages(filter_df, pTitle = 'Month of Year Average', fTitle = 'MoY_Avg', fPath = fPath)

        # January Exceedance
        copy = filter_df.copy()
        jan_data = copy[copy.index.month.isin([1])]
        plot_exceedance(jan_data, month="January", xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

        # May Exceedance
        copy = filter_df.copy()
        may_data = copy[copy.index.month.isin([5])]
        plot_exceedance(may_data, month="May", xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

        # October Exceedance
        copy = filter_df.copy()
        oct_data = copy[copy.index.month.isin([10])]
        plot_exceedance(oct_data, month="October", xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath) 

In [None]:
study_lst = [1, 2, 3]

In [None]:
plot_all(df, var_list_c_vars, study_lst, "Storyline #1 Single Strategy")

##### Combination Strategy

In [None]:
study_lst = [2, 14, 302]

In [None]:
plot_all(df, var_list_c_vars, study_lst, "Storyline #1 Combination Strategy")

##### Storage

In [None]:
def plot_all(df, vars, studies, storyline):
    for var in vars:

        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)


        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)

        # April Exceedance
        copy = filter_df.copy()
        apr_data = copy[copy.index.month.isin([4])]
        plot_exceedance(apr_data, month="April", xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

        # September Exceedance
        copy = filter_df.copy()
        sep_data = copy[copy.index.month.isin([9])]
        plot_exceedance(sep_data, month="September", xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

In [None]:
study_lst = [1, 2, 3]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #1 Single Strategy")

##### Combination Strategy

In [None]:
study_lst = [2, 14, 302]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #1 Combination Strategy")

##### Storyline #2 Single Strategy

In [None]:
study_lst = [32, 64]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #2 Single Strategy")

##### Storyline #2 Combination Strategy

In [None]:
study_lst = [64, 80, 368]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #2 Combination Strategy")

##### Storyline #3 Single Strategy

In [None]:
study_lst = [16]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #3 Single Strategy")

##### Storyline #3 Combination Strategy

In [None]:
study_lst = [16, 80, 210]

In [None]:
plot_all(df, var_list_storage, study_lst, "Storyline #3 Combination Strategy")

##### Deliveries

In [None]:
def plot_all(df, vars, studies, storyline):
    for var in vars:

        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)


        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)

        #Exceedance
        plot_exceedance(filter_df, xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

In [None]:
study_lst = [1, 2, 3]

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #1 Single Strategy")

##### Combination Strategy

In [None]:
study_lst = [2, 14, 302]

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #1 Combination Strategy")

##### Scenario #2 Single Strategy

In [None]:
study_lst = [32, 64]

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #2 Single Strategy")

##### Storyline #2 Combination Strategy

In [None]:
study_lst = [64, 80, 368]

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #2 Combination Strategy")

##### Strategy #3 Single Strategy

In [None]:
study_lst = [16]

In [None]:
var_list_delivery = var_list[3:7]
var_list_delivery

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #3 Single Strategy")

##### Storyline 3 Combination Strategy

In [None]:
study_lst = [16, 80, 210]

In [None]:
plot_all(df, var_list_delivery, study_lst, "Storyline #3 Combination Strategy")

##### NDO

In [None]:
study_lst = [32, 64]

In [None]:
var_list_ndo = [var_list[9]]
var_list_ndo

In [None]:
def plot_all(df, vars, studies, storyline):
    for var in vars:

        fPath = f"../visualizations/{storyline}/{var}"
        if not os.path.exists(fPath):
            os.makedirs(fPath, exist_ok=True)


        filter_df = slice_with_baseline(df, var, studies)

        # Regular TS
        plot_ts(filter_df, pTitle = 'Monthly Total', fTitle = 'Month_Tot', fPath = fPath)

        #Exceedance
        plot_exceedance(filter_df, xLab = 'Probability', pTitle = 'Monthly Exceedance Probability', fTitle = 'Mon_exceed', fPath = fPath)

In [None]:
plot_all(df, var_list_ndo, study_lst, "Storyline #2 Single Strategy")

##### Storyline #2 Combination Strategy

In [None]:
study_lst = [64, 80, 368]

In [None]:
plot_all(df, var_list_ndo, study_lst, "Storyline #2 Combination Strategy")

##### Storyline #3 Single Strategy 

In [None]:
study_lst = [16]

In [None]:
plot_all(df, var_list_ndo, study_lst, "Storyline #3 Single Strategy")

##### Storyline #3 Combination Strategy

In [None]:
study_lst = [16, 80, 210]

In [None]:
plot_all(df, var_list_ndo, study_lst, "Storyline #3 Combination Strategy")

### Unused Code

In [None]:
# def preprocess_data(df, addsl=False):
#     dvar_list = []
#     combined_df = pnd.DataFrame()
#     
#     for i, r in df.iterrows():
#         dvar_list.append(f'/{r["DSS Part B"]}/{r["DSS Part C"]}/')
# 
#     for study_name, launch_name, alias_name in zip(study_names, launch_names, alias):
# 
#         launchFP_study = os.path.join(launch_base_directory, study_name)
#         launchFP = os.path.join(launchFP_study, launch_name)
#         print(launchFP)
#         
#         # Create a python "calsim" object
#         thiscs3 = cs3.calsim(launchFP=launchFP, csvers=3, reorg=True)
# 
#         # Retrieve the DSS data variables from the DSS file
#         thiscs3.DVdata.getDVts(filter=dvar_list)
# 
#         df = thiscs3.DVdata.DVtsDF.copy(deep=True)
#         
#         # if storage add the 2 variables to create a new one
#         # Add S_SLSCVP and S_SLSWP into S_SLTOT
# 
#         if addsl:
#             df[('CALLITE', 'S_SLTOT', 'STORAGE', '1MON', '2020D09E', 'PER-AVER', 'TAF')] = df.loc[:,[('CALLITE', 'S_SLCVP', 'STORAGE', '1MON', '2020D09E', 'PER-AVER', 'TAF'),('CALLITE', 'S_SLSWP', 'STORAGE', '1MON', '2020D09E', 'PER-AVER', 'TAF')]].sum(axis=1)
#       
#         new_columns = [(col[0], f'{col[1]}_{alias_name}', *col[2:]) if len(col) > 1 else (col[0], '') for col in df.columns]
#         df.columns = pnd.MultiIndex.from_tuples(new_columns)
#         df.columns.names = ['A', 'B', 'C', 'D', 'E', 'F', 'Units']
#         combined_df = pnd.concat([combined_df, df], axis=1)
#     
#     return combined_df