## Reading of ASCII files created for cam diagnostics tool

In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import os
from helper_funcs import read_file_custom, load_varinfo
import ipywidgets as ipw
from IPython import display
from copy import deepcopy
from traceback import format_exc
from collections import OrderedDict as od

VERBOSE = True

### 1. Paths and file definitions (PATHS)  

Please change accordingly if you execute this notebook on your local machine.

In [2]:
data_dir = "./data/michael_ascii_read/"
case_ok = "./data/from_ada/table_GLBL_ANN_obs_FIXED.asc"

michaels_excel = data_dir + "obs-comp-noresmversions.xlsx"
varinfo_csv = "./data/var_info.csv"

output_dir = "./output/"

### 2. Importing and editing supplementary information from Excel table (EXCEL)

Let's begin with reading the variable information from the excel table. Note that this is not strictly required but helps us below to display the results in a more intuitive manner, when analysing the data. The custom method that we use is:

In [3]:
help(load_varinfo)

Help on function load_varinfo in module helper_funcs:

load_varinfo(try_path, catch_excel_michael)
    Read short description strings for variables
    
    Load long names of variables. Tries to load information from csv file
    specified by input parameter ``try_path`` and if this fails, the information
    is imported from Michaels Excel table, in which case the csv file will be 
    created at location ``try_path``.
    
    Parameters
    ----------
    try_path : str
        location of csv file
    catch_excel_michael : str
        path to Michaels Excel
    
    Returns
    -------
    dict 
        dictionary containing all variable names (keys) and corresponding
        description strings (if applicable, else empty string)



Load the information.

In [4]:
var_info_dict = load_varinfo(varinfo_csv, michaels_excel)

In [5]:
from helper_funcs import save_varinfo_dict_csv, load_varinfo_dict_csv
from traceback import format_exc

class AddInfo(object):
    output = ipw.Output()
    def __init__(self, var_dict=None, csv_loc=None):
        self.csv_loc = csv_loc
        if not isinstance(var_dict, dict):
            self.load_csv()
        else:
            self.var_dict = var_dict
            
        self.init_widgets()
        self.init_actions()
        self.init_layout()
    
    def init_widgets(self):
        
        self.btn_update = ipw.Button(description='Update',
                                     tooltip=('Updates the current dictionary based on values in text fields'
                                              '(for further analysis, use Save csv button to write to CSV)'))
        self.btn_reload = ipw.Button(description='Reload',
                                     tooltip='Reloads information from file var_info.csv')
        self.btn_save = ipw.Button(description='Update and save',
                                     tooltip='Updates current selection and writes to CSV')
        
        self.btn_save.style.button_color = "lime"
        
        self.input_rows = []
        self.input_fields = {}
        
        for name,  val in self.var_dict.items():
            ipt = ipw.Text(value=val, placeholder='Insert new name',
                            disabled=False, min_width="200px")
            row = ipw.HBox([ipw.Label(name, minwidth="200px"), ipt])
            self.input_fields[name] = ipt
            self.input_rows.append(row)  
            
    def init_actions(self):
        self.btn_update.on_click(self.on_click_update)
        self.btn_reload.on_click(self.on_click_load_csv)
        self.btn_save.on_click(self.on_click_save)
        
    def init_layout(self):
        
        vbox_buttons = ipw.VBox([self.btn_reload,
                                 self.btn_update,
                                 self.btn_save])
        self.layout = ipw.HBox([ipw.VBox(self.input_rows), vbox_buttons, self.output])
        
    def on_click_update(self, b):
        self.apply_changes()
    
    def on_click_load_csv(self, b):
        self.load_csv()
        
    def on_click_save(self, b):
        self.save_csv()
    
    def save_csv(self):
        self.apply_changes()
        save_varinfo_dict_csv(self.var_dict, self.csv_loc)
        
    def load_csv(self):
        if self.csv_loc is None or not os.path.exists(self.csv_loc):
            raise IOError("Please provide path to csv file")
        try:
            self.var_dict = load_varinfo_dict_csv(self.csv_loc)
        except Exception as e:
            self.write_to_output(format_exc())
        for key, val in self.var_dict.items():
            self.input_fields[key].value = val
    
    def write_to_output(self, msg):
        self.output.append_display_data(msg)
        self.output
        
    def apply_changes(self):
        
        new = od()
        for key, edit in self.input_fields.items():
            new[key] = edit.value
        
        self.var_dict = new
        
    def __call__(self):
        return self.layout
    
adder = AddInfo(var_info_dict, varinfo_csv)
#show
adder()

HBox(children=(VBox(children=(HBox(children=(Label(value='RESTOM'), Text(value='TOmodel net flux', placeholder…

In [6]:
var_info_dict = adder.var_dict
var_info_dict

OrderedDict([('RESTOM', 'TOmodel net flux'),
             ('RESSURF', 'SRF net flux'),
             ('RESTOA_CERES-EBAF', 'TOA  net flux'),
             ('RESTOA_ERBE', ''),
             ('SOLIN_CERES-EBAF', ''),
             ('SOLIN_CERES', ''),
             ('CLDTOT_ISCCP', 'Total cloud cover'),
             ('CLDTOT_CLOUDSAT', ''),
             ('FLDS_ISCCP', 'LW down SRF'),
             ('FLNS_ISCCP', 'LW net SRF'),
             ('FLUT_CERES-EBAF', 'LW up Top'),
             ('FLUT_CERES', ''),
             ('FLUT_ERBE', ''),
             ('FLUTC_CERES-EBAF', 'LW up Top Clearsky'),
             ('FLUTC_CERES', ''),
             ('FLUTC_ERBE', ''),
             ('FLNT_CAM', ''),
             ('FSDS_ISCCP', 'SW down SRF'),
             ('FSNS_ISCCP', 'SW net SRF'),
             ('FSNS_LARYEA', ''),
             ('FSNTOA_CERES', 'SW net TOA'),
             ('FSNTOA_ERBE', ''),
             ('FSNTOAC_CERES', 'SW net TOA clearsky'),
             ('FSNTOAC_ERBE', ''),
             ('FSNT

### 3. Search and load ASCII files, either using .asc or .webarchive file type (LOAD_FILE)

In [7]:
files = sorted(glob(data_dir + "*.webarchive"))

for file in files:
    print(file)
    
test_file = files[0]

print("TEST FILE: {}".format(os.path.basename(test_file)))

./data/michael_ascii_read/N1850C53CLM45L32_f09_tn11_191017 (yrs 71-100).webarchive
./data/michael_ascii_read/N1850_f09_tn14_230218 (yrs 1-20).webarchive
./data/michael_ascii_read/N1850_f19_tn14_r227_ctrl (yrs 185-215).webarchive
./data/michael_ascii_read/N1850_f19_tn14_r227_ctrl (yrs 310-340).webarchive
./data/michael_ascii_read/N1850_f19_tn14_r227_ctrl (yrs 80-110).webarchive
./data/michael_ascii_read/N1850_f19_tn14_r265_ctrl_20180411 (yrs 90-120).webarchive
TEST FILE: N1850C53CLM45L32_f09_tn11_191017 (yrs 71-100).webarchive


Try read first file as is with pandas

In [8]:
try:
    frame = pd.read_csv(test_file, encoding="latin-1")
except Exception as e:
    print(repr(e))
frame.head()

Unnamed: 0,bplist00Ñ_WebMainResourceÕ
0,_WebResourceTextEncodingName^WebResourceUR...
1,TEST CASE: N1850C53CLM45L32_f09_tn11_191017 (y...
2,CONTROL CASE: OBS data
3,Variable N1850C53CLM45L32_f09_tn11_191017 ...
4,...


This did not work (it basically did not separate the individual columns). The same is the case for the file that includes a whitespace at the problematic variables.

In [9]:
frame = pd.read_csv(case_ok)
frame.head()
frame.shape

(67, 1)

This did not really work since the data is not splitted by columns but includes one column containing the content of each row. The reading has to be done from scratch, especially also because there is some variables with too long names (e.g. L.25 and L. 28) that stick together the first two columns. 

This folder contains a file ``helper_funcs.py`` in which I defined a custom read function ``read_file_custom`` that can convert these files into pandas dataframes.

In [10]:
help(read_file_custom)

Help on function read_file_custom in module helper_funcs:

read_file_custom(fpath, var_info_dict=None, run_id=None, verbose=False)
    Custom ASCII conversion method 
    
    Parameters
    ----------
    fpath : str
        path to file location
    var_info_dict : dict
        optinal dictionary that contains description strings for each of the 
        variables (e.g. retrieved using :func:`read_var_info_michaels_excel`)
    run_id : str, int or dict
        string or integer that may be used as index specifying the model run 
        and that should be used in the Dataframe for the index specifying the 
        model run (only relevant if multiple files are loaded and concatenated 
        into one dataframe). If None, the "TEST CASE" ID, specified in the 
        file header is used for the index.
    verbose : bool
        if True, print output (defaults to False)
        
    Returns
    -------
    Dataframe 
        pandas data frame ready for further analysis. NOTE: the retu

Now load the first file using this function (without providing the optional parameter *var_info_dict*, i.e. the info from Michael's Excel sheet.

In [11]:
df = read_file_custom(test_file, verbose=VERBOSE)
df

Ignoring line: bplist00Ñ_WebMainResourceÕ	
Ignoring line: 
Ignoring line: 
Ignoring line: _WebResourceTextEncodingName^WebResourceURL_WebResourceFrameName_WebResourceData_WebResourceMIMETypeUUTF-8_http://ns2345k.web.sigma2.no/noresm_diagnostics/N1850C53CLM45L32_f09_tn11_191017/CAM_DIAG/yrs71to100-obs/set1/table_GLBL_ANN_obs.ascPO}<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">DIAG SET 1: ANN MEANS GLOBAL
Ignoring line:  
Ignoring line:  
Ignoring line:  
Problem case FSNTOA_CERES-EBAF
Problem case FSNTOAC_CERES-EBAF
Test case: N1850C53CLM45L32_f09_tn11_191017
Control case: OBS data


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N1850C53CLM45L32_f09_tn11_191017,71-100,RESTOM,,False,-0.489,0.000,-0.489,
N1850C53CLM45L32_f09_tn11_191017,71-100,RESSURF,,False,-0.489,0.000,-0.489,
N1850C53CLM45L32_f09_tn11_191017,71-100,RESTOA_CERES-EBAF,,False,1.529,0.992,0.537,8.842
N1850C53CLM45L32_f09_tn11_191017,71-100,RESTOA_ERBE,,False,1.529,0.059,1.470,8.992
N1850C53CLM45L32_f09_tn11_191017,71-100,SOLIN_CERES-EBAF,,False,340.206,340.054,0.152,0.167
N1850C53CLM45L32_f09_tn11_191017,71-100,SOLIN_CERES,,False,340.206,341.479,-1.273,1.226
N1850C53CLM45L32_f09_tn11_191017,71-100,CLDTOT_ISCCP,,False,63.621,66.800,-3.179,11.323
N1850C53CLM45L32_f09_tn11_191017,71-100,CLDTOT_CLOUDSAT,,False,63.621,66.824,-3.203,9.731
N1850C53CLM45L32_f09_tn11_191017,71-100,FLDS_ISCCP,,False,338.280,343.347,-5.066,14.450
N1850C53CLM45L32_f09_tn11_191017,71-100,FLNS_ISCCP,,False,55.819,49.425,6.394,11.967


That worked, you can see that the column *Description* is empty and that all flags are set `False`. Here in the default reading function the flag is set True, if the ``var_info_dict`` is provided and information for a given variable is available. We illustrate that in the following, our second example, where we load the corrected ascii file and provide the info dictionary that we imported from the Excel sheet. Furthermore, we use the optional input parameter `run_id` to shorten to shorten the width of the HTML table display of the dataframe.

In [12]:
df1 = read_file_custom(case_ok, var_info_dict, run_id="Run1", verbose=False)
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,150-180,RESTOM,TOmodel net flux,True,0.020,0.000,0.020,
Run1,150-180,RESSURF,SRF net flux,True,0.027,0.000,0.027,
Run1,150-180,RESTOA_CERES-EBAF,TOA net flux,True,2.109,0.992,1.117,9.824
Run1,150-180,RESTOA_ERBE,,False,2.109,0.059,2.050,9.194
Run1,150-180,SOLIN_CERES-EBAF,,False,340.200,340.054,0.146,0.417
Run1,150-180,SOLIN_CERES,,False,340.200,341.479,-1.279,1.296
Run1,150-180,CLDTOT_ISCCP,Total cloud cover,True,70.746,66.800,3.946,12.472
Run1,150-180,CLDTOT_CLOUDSAT,,False,70.746,66.824,3.923,10.572
Run1,150-180,FLDS_ISCCP,LW down SRF,True,347.663,343.347,4.316,15.146
Run1,150-180,FLNS_ISCCP,LW net SRF,True,56.374,49.425,6.949,13.926


As specified in the docstring of the reading method, the actual test case ID is stored (as attribute `test_case` in the Dataframe) in form of a dictionary that maps test_case with specified run_id:

In [13]:
print(df1.test_case)

N1850_f19_tn14_r265_ctrl_20180411


Reading worked, check, if both dataframes have the same dimension.

In [14]:
print(df.shape)
print(df1.shape)

(63, 5)
(63, 5)


You can change the appearance of the way a DataFrame is displayed. For instance ...

In [15]:
df1.style.background_gradient(cmap="GnBu", low=0.5, high=0.5, axis=0).highlight_null("red")

  np.copyto(xa, -1, where=xa < 0.0)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,150-180,RESTOM,TOmodel net flux,True,0.02,0.0,0.02,
Run1,150-180,RESSURF,SRF net flux,True,0.027,0.0,0.027,
Run1,150-180,RESTOA_CERES-EBAF,TOA net flux,True,2.109,0.992,1.117,9.824
Run1,150-180,RESTOA_ERBE,,False,2.109,0.059,2.05,9.194
Run1,150-180,SOLIN_CERES-EBAF,,False,340.2,340.054,0.146,0.417
Run1,150-180,SOLIN_CERES,,False,340.2,341.479,-1.279,1.296
Run1,150-180,CLDTOT_ISCCP,Total cloud cover,True,70.746,66.8,3.946,12.472
Run1,150-180,CLDTOT_CLOUDSAT,,False,70.746,66.824,3.923,10.572
Run1,150-180,FLDS_ISCCP,LW down SRF,True,347.663,343.347,4.316,15.146
Run1,150-180,FLNS_ISCCP,LW net SRF,True,56.374,49.425,6.949,13.926


... applies a column based colour gradient based on the cell values in each column (similar to Excel feature *Conditional formatting*). Here, we provided a colormap of our choice (see [here](https://matplotlib.org/examples/color/colormaps_reference.html) for options). The input parameters `low` and `high` are optional, and can be interpreted as an percentage specification of the colour range used from the colourbar for the display (i.e. here, we use 0.5, i.e. 50% both for the lower and upper end).

[Follow this link](https://pandas.pydata.org/pandas-docs/stable/style.html) for more details related to pandas styling. 

### 4. Importing multiple result files and concatenating them into one Dataframe (LOAD_FILES)

Now we have a method that can import the results from a single run into a datframe that can be used for further analysis. In the following, we basically do this for all available files and put the results into one big `Dataframe`. 

To do this, a custom method `read_and_merge_all` was defined in [helper_funcs.py](https://github.com/jgliss/my_py3_scripts/blob/master/notebooks/helper_funcs.py). 

The following cells show, how this method may be used to either keep the original test case IDs as index or to replace them with a shorter version. 

In [16]:
from helper_funcs import read_and_merge_all
merged = read_and_merge_all(file_list=files, var_info_dict=var_info_dict)
merged

  df.test_case = pd.Series()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N1850C53CLM45L32_f09_tn11_191017,71-100,AODDUST,,False,,,,
N1850C53CLM45L32_f09_tn11_191017,71-100,AODVIS,,False,,,,
N1850C53CLM45L32_f09_tn11_191017,71-100,CLDTOT_CLOUDSAT,,False,63.621,66.824,-3.203,9.731
N1850C53CLM45L32_f09_tn11_191017,71-100,CLDTOT_ISCCP,Total cloud cover,True,63.621,66.800,-3.179,11.323
N1850C53CLM45L32_f09_tn11_191017,71-100,FLDS_ISCCP,LW down SRF,True,338.280,343.347,-5.066,14.450
N1850C53CLM45L32_f09_tn11_191017,71-100,FLNS_ISCCP,LW net SRF,True,55.819,49.425,6.394,11.967
N1850C53CLM45L32_f09_tn11_191017,71-100,FLNT_CAM,,False,236.838,,,
N1850C53CLM45L32_f09_tn11_191017,71-100,FLUTC_CERES,,False,261.783,266.878,-5.096,8.384
N1850C53CLM45L32_f09_tn11_191017,71-100,FLUTC_CERES-EBAF,LW up Top Clearsky,True,261.783,266.051,-4.268,6.042
N1850C53CLM45L32_f09_tn11_191017,71-100,FLUTC_ERBE,,False,261.783,264.429,-2.646,5.725


This is rather unhandy, since the names of the run IDs are rather long. This can be changed directly when loading the Dataframe:

In [17]:
merged = read_and_merge_all(file_list=files, var_info_dict=var_info_dict, replace_runid_prefix="Run")
merged

  df.test_case = pd.Series(mapping)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,185-215,AODDUST,,False,,,,
Run1,185-215,AODVIS,,False,,,,
Run1,185-215,CLDTOT_CLOUDSAT,,False,68.543,66.824,1.720,10.886
Run1,185-215,CLDTOT_ISCCP,Total cloud cover,True,68.543,66.800,1.744,12.992
Run1,185-215,FLDS_ISCCP,LW down SRF,True,353.861,343.347,10.514,16.891
Run1,185-215,FLNS_ISCCP,LW net SRF,True,56.249,49.425,6.824,14.098
Run1,185-215,FLNT_CAM,,False,240.640,,,
Run1,185-215,FLUTC_CERES,,False,267.090,266.878,0.212,5.873
Run1,185-215,FLUTC_CERES-EBAF,LW up Top Clearsky,True,267.090,266.051,1.039,4.662
Run1,185-215,FLUTC_ERBE,,False,267.090,264.429,2.662,5.765


Call:

In [18]:
merged.test_case

Run1             N1850_f19_tn14_r227_ctrl
Run2    N1850_f19_tn14_r265_ctrl_20180411
Run3                N1850_f09_tn14_230218
Run4     N1850C53CLM45L32_f09_tn11_191017
dtype: object

to get the corresponding test_case IDs.

### 5. Rearranging and restructuring of the imported data (REARRANGE)

In the following cell, you can interacively select which Variables you wish to keep for further analysis. Preselected are the variables that are flagged.

#### 5.1 Interactive selection of variables (IA_VAR)

In [19]:
# TODO: 
# 1. Move into helper_funcs.py when finalised
# 2. Inlcude reordering
# 3. Write documentation
class SelectVariable():
    output = ipw.Output()
    def __init__(self, df):
        df.sort_index(inplace=True)
        self.df = df
        self.vals = tuple(self.df.index.levels[2].values)
        
        self._df_edit = self.df
        
        self.init_layout()
        self.init_widgets()
        self.init_actions()
        self.init_display()
        
        self.print_current(1)
        self.crop_selection()
        self.disp_current()
    
    @property
    def df_edit(self):
        return deepcopy(self._df_edit)
    
    @property
    def flagged_vars(self):
        return list(self.df[self.df.Flag].index.get_level_values("Variable").unique().values)
    
    def init_widgets(self):
        
        self.btn_unselect_all = ipw.Button(description='Unselect all')
        self.btn_select_all = ipw.Button(description='Select all')
        self.btn_flagged = ipw.Button(description="Flagged")
        self.btn_apply = ipw.Button(description='Apply')
        self.btn_apply.style.button_color = 'lime'

        self.var_selector = ipw.SelectMultiple(description="Variables", 
                                               options=self.vals, 
                                               value=self.flagged_vars, 
                                               min_width='150px',
                                               layout=self.box_layout)
        
        self.current_disp = ipw.Textarea(value='', 
                                         description='Current:', 
                                         disabled=True, 
                                         layout=self.box_layout)
        #self.output = ipw.Output()
        
    def init_actions(self):
        #what happens when the state of the selection is changed (display current selection)
        self.var_selector.observe(self.print_current)
        #what happens when buttons are clicked
        self.btn_select_all.on_click(self.on_select_all_vars_clicked)
        self.btn_unselect_all.on_click(self.on_unselect_all_vars_clicked)
        self.btn_flagged.on_click(self.on_flagged_clicked)
        self.btn_apply.on_click(self.on_click_apply)
        
    def init_layout(self):
        self.box_layout = ipw.Layout(flex='0 1 auto', height='250px', min_height='150px', width='auto')
        self.disp_layout = ipw.Layout(flex='0 1 auto', height='250px', min_height='150px', width='auto')
    
    def init_display(self):
        self.btns = ipw.VBox([self.btn_select_all, 
                              self.btn_unselect_all,
                              self.btn_flagged,
                              ipw.Label(),
                              self.btn_apply])
    
        self.edit_area = ipw.HBox([self.var_selector, 
                                   self.current_disp, 
                                   self.btns])
        
        self.layout = ipw.VBox([self.edit_area, self.output])
    
    def on_unselect_all_vars_clicked(self, b):
        self.unselect_all()
    
    def on_select_all_vars_clicked(self, b):
        self.select_all()
    
    def on_flagged_clicked(self, b):
        self.select_flagged()
        
    def unselect_all(self):
        self.var_selector.value = ()
    
    def select_all(self):
        self.var_selector.value = self.var_selector.options
    
    def select_flagged(self):
        self.var_selector.value = self.flagged_vars
        
    def disp_current(self):
        self.output.clear_output()
        #self.output.append_display_data(ipw.Label("PREVIEW current selection", fontsize=22))
        self.output.append_display_data(self._df_edit.head().style.set_caption("PREVIEW HEAD"))
        self.output
        
    def crop_selection(self):
        idx = pd.IndexSlice
        try:
            self._df_edit = self.df.loc[idx[:, :, self.var_selector.value, :], :]
        except Exception as e:
            print("WARNING: failed to extract selection.\nTraceback {}".format(format_exc()))
    
    def on_click_apply(self, b):
        self.crop_selection()
        self.disp_current()
        
    def print_current(self, b):
        s=""
        for item in self.var_selector.value:
            s += "{}\n".format(item)
        self.current_disp.value = s
    
    def __repr__(self):
        return repr(self.layout)
    
    def __call__(self):
        return self.layout
    
selector = SelectVariable(df=merged)
#show
selector()

VBox(children=(HBox(children=(SelectMultiple(description='Variables', index=(3, 4, 5, 8, 11, 13, 14, 16, 19, 2…

In [20]:
selector.df_edit

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,185-215,CLDTOT_ISCCP,Total cloud cover,True,68.543,66.800,1.744,12.992
Run1,185-215,FLDS_ISCCP,LW down SRF,True,353.861,343.347,10.514,16.891
Run1,185-215,FLNS_ISCCP,LW net SRF,True,56.249,49.425,6.824,14.098
Run1,185-215,FLUTC_CERES-EBAF,LW up Top Clearsky,True,267.090,266.051,1.039,4.662
Run1,185-215,FLUT_CERES-EBAF,LW up Top,True,241.972,239.574,2.398,7.188
Run1,185-215,FSDS_ISCCP,SW down SRF,True,190.458,189.390,1.068,15.915
Run1,185-215,FSNS_ISCCP,SW net SRF,True,166.962,165.893,1.070,13.587
Run1,185-215,FSNTOAC_CERES,SW net TOA clearsky,True,290.330,294.702,-4.373,15.977
Run1,185-215,FSNTOA_CERES,SW net TOA,True,244.525,244.691,-0.167,12.096
Run1,185-215,LHFLX_JRA25,Lat Heat Flux,True,87.926,87.935,-0.009,14.947


Now access the current selection and continue.

#### 5.2 Interactive index renaming tool (IA_RENAME)

In the following, an interactive widget is defined, that allows for renaming of the runs.

In [21]:
class IndexRenamer(object):
    output = ipw.Output()
    def __init__(self, df, level=0, suggestions=[]):
        self.df = df
        self._df_edit = df
        self.level = level
        
        self.suggestions = suggestions
      
        self.init_widgets()
        self.init_actions()
        self.init_layout()
        
        self.renamed_info = od()
        
    @property
    def names(self):
        return sorted(self.df.index.get_level_values(self.level).unique().values)
        
    @property
    def df_edit(self):
        return deepcopy(self._df_edit)
    
    def init_widgets(self):
        
        self.btn_apply = ipw.Button(description='Apply')
        self.btn_apply.style.button_color = "lime"
        
        self.input_rows = []
        self.input_fields = []
        
        for i, name in enumerate(self.names):
            try:
                val = self.suggestions[i]
            except:
                val = name
            ipt = ipw.Text(value=val, placeholder='Insert new name',
                            disabled=False)
            row = ipw.HBox([ipw.Label(name, width="200px"), ipt])
            self.input_fields.append(ipt)
            self.input_rows.append(row)
                                      
    def init_actions(self):
        #what happens when the state of the selection is changed (display current selection)
        self.btn_apply.on_click(self.on_click_apply)
        
    def init_layout(self):
        
        edit_area = ipw.HBox([ipw.VBox(self.input_rows), self.btn_apply])
        self.layout = ipw.VBox([edit_area, self.output])
        
    def on_click_apply(self, b):
        self.apply_changes()
        
    def disp_current(self):
        self.output.clear_output()
        #self.output.append_display_data(ipw.Label("PREVIEW current selection", fontsize=22))
        self.output.append_display_data(self._df_edit.style.set_caption("PREVIEW"))
        self.output
        
    def apply_changes(self):
        
        df = self.df 
        mapping = od()
        
        for i, name in enumerate(self.names):
            repl = str(self.input_fields[i].value)
            mapping[name] = repl
        self._df_edit = df.rename(index=mapping, level=self.level)
        
        self.disp_current()
        
    def __call__(self):
        return self.layout

selection  = selector.df_edit
selection

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,185-215,CLDTOT_ISCCP,Total cloud cover,True,68.543,66.800,1.744,12.992
Run1,185-215,FLDS_ISCCP,LW down SRF,True,353.861,343.347,10.514,16.891
Run1,185-215,FLNS_ISCCP,LW net SRF,True,56.249,49.425,6.824,14.098
Run1,185-215,FLUTC_CERES-EBAF,LW up Top Clearsky,True,267.090,266.051,1.039,4.662
Run1,185-215,FLUT_CERES-EBAF,LW up Top,True,241.972,239.574,2.398,7.188
Run1,185-215,FSDS_ISCCP,SW down SRF,True,190.458,189.390,1.068,15.915
Run1,185-215,FSNS_ISCCP,SW net SRF,True,166.962,165.893,1.070,13.587
Run1,185-215,FSNTOAC_CERES,SW net TOA clearsky,True,290.330,294.702,-4.373,15.977
Run1,185-215,FSNTOA_CERES,SW net TOA,True,244.525,244.691,-0.167,12.096
Run1,185-215,LHFLX_JRA25,Lat Heat Flux,True,87.926,87.935,-0.009,14.947


In [39]:
class IndexRenamer(object):
    output = ipw.Output()
    def __init__(self, df, level=0, suggestions=[]):
        self.df = df
        self._df_edit = df
        self.level = level
        
        self.suggestions = suggestions
      
        self.init_widgets()
        self.init_actions()
        self.init_layout()
        
        self.renamed_info = od()
        
    @property
    def names(self):
        #return sorted(self.df.index.get_level_values(self.level).unique().values)
        return self.df.index.get_level_values(self.level).unique().values
    @property
    def df_edit(self):
        return deepcopy(self._df_edit)
    
    def init_widgets(self):
        
        self.btn_apply = ipw.Button(description='Apply')
        self.btn_apply.style.button_color = "lime"
        
        self.input_rows = []
        self.input_fields = []
        
        for i, name in enumerate(self.names):
            try:
                val = self.suggestions[i]
            except:
                val = name
            ipt = ipw.Text(value=val, placeholder='Insert new name',
                            disabled=False)
            row = ipw.HBox([ipw.Label(name, width="200px"), ipt])
            self.input_fields.append(ipt)
            self.input_rows.append(row)
                                      
    def init_actions(self):
        #what happens when the state of the selection is changed (display current selection)
        self.btn_apply.on_click(self.on_click_apply)
        
    def init_layout(self):
        
        edit_area = ipw.HBox([ipw.VBox(self.input_rows), self.btn_apply])
        self.layout = ipw.VBox([edit_area, self.output])
        
    def on_click_apply(self, b):
        self.apply_changes()
        
    def disp_current(self):
        self.output.clear_output()
        #self.output.append_display_data(ipw.Label("PREVIEW current selection", fontsize=22))
        self.output.append_display_data(self._df_edit.style.set_caption("PREVIEW"))
        self.output
        
    def apply_changes(self):
        
        df = self.df 
        mapping = od()
        
        for i, name in enumerate(self.names):
            repl = str(self.input_fields[i].value)
            mapping[name] = repl
        self._df_edit = df.rename(index=mapping, level=self.level)
        
        self.disp_current()
        
    def __call__(self):
        return self.layout

renamer = IndexRenamer(selection)
renamer()

VBox(children=(HBox(children=(VBox(children=(HBox(children=(Label(value='Run1'), Text(value='Run1', placeholde…

Now, update the current dataframe for further usage.

In [23]:
selection = renamer.df_edit
selection

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Run1,185-215,CLDTOT_ISCCP,Total cloud cover,True,68.543,66.800,1.744,12.992
Run1,185-215,FLDS_ISCCP,LW down SRF,True,353.861,343.347,10.514,16.891
Run1,185-215,FLNS_ISCCP,LW net SRF,True,56.249,49.425,6.824,14.098
Run1,185-215,FLUTC_CERES-EBAF,LW up Top Clearsky,True,267.090,266.051,1.039,4.662
Run1,185-215,FLUT_CERES-EBAF,LW up Top,True,241.972,239.574,2.398,7.188
Run1,185-215,FSDS_ISCCP,SW down SRF,True,190.458,189.390,1.068,15.915
Run1,185-215,FSNS_ISCCP,SW net SRF,True,166.962,165.893,1.070,13.587
Run1,185-215,FSNTOAC_CERES,SW net TOA clearsky,True,290.330,294.702,-4.373,15.977
Run1,185-215,FSNTOA_CERES,SW net TOA,True,244.525,244.691,-0.167,12.096
Run1,185-215,LHFLX_JRA25,Lat Heat Flux,True,87.926,87.935,-0.009,14.947


#### 5.3 Reshaping of table (make it wider for readibility) (RESHAPE)
 
For visualisation this display requires a lot of scrolling. We can make the table `wider` by unstacking certain indices, e.g. the two outermost indices `Run` and `Years`.

In [24]:
class ReshapeAndSelect(object):
    """Widget that can be used to reshape a Dataframe and select individual data columns"""
    output = ipw.Output()
    def __init__(self, df):
        
        self.df = df
        self._df_edit = df
        
        self.index_names = df.index.names
        self.col_names = df.columns
    
    @property
    def df_edit(self):
        return deepcopy(self._df_edit)
    
    @property
    def flagged_vars(self):
        return list(self.df[self.df.Flag].index.get_level_values("Variable").unique().values)
    
    def init_widgets(self):
    
        self.btn_unselect_all = ipw.Button(description='Unselect all')
        self.btn_select_all = ipw.Button(description='Select all')
        self.btn_flagged = ipw.Button(description="Flagged")
        self.btn_apply = ipw.Button(description='Apply')
        self.btn_apply.style.button_color = 'lime'

        self.var_selector = ipw.SelectMultiple(description="Variables", 
                                               options=self.vals, 
                                               value=self.flagged_vars, 
                                               min_width='150px',
                                               layout=self.box_layout)
        
        self.current_disp = ipw.Textarea(value='', 
                                         description='Current:', 
                                         disabled=True, 
                                         layout=self.box_layout)
        #self.output = ipw.Output()
        
    def init_actions(self):
        #what happens when the state of the selection is changed (display current selection)
        self.var_selector.observe(self.print_current)
        #what happens when buttons are clicked
        self.btn_select_all.on_click(self.on_select_all_vars_clicked)
        self.btn_unselect_all.on_click(self.on_unselect_all_vars_clicked)
        self.btn_flagged.on_click(self.on_flagged_clicked)
        self.btn_apply.on_click(self.on_click_apply)
    
    def init_display(self):
        self.btns = ipw.VBox([self.btn_select_all, 
                              self.btn_unselect_all,
                              self.btn_flagged,
                              ipw.Label(),
                              self.btn_apply])
    
        self.edit_area = ipw.HBox([self.var_selector, 
                                   self.current_disp, 
                                   self.btns])
        
        self.layout = ipw.VBox([self.edit_area, self.output])
    
    def on_unselect_all_vars_clicked(self, b):
        self.unselect_all()
    
    def on_select_all_vars_clicked(self, b):
        self.select_all()
    
    def on_flagged_clicked(self, b):
        self.select_flagged()
        
    def unselect_all(self):
        self.var_selector.value = ()
    
    def select_all(self):
        self.var_selector.value = self.var_selector.options
    
    def select_flagged(self):
        self.var_selector.value = self.flagged_vars
        
    def disp_current(self):
        self.output.clear_output()
        #self.output.append_display_data(ipw.Label("PREVIEW current selection", fontsize=22))
        self.output.append_display_data(self._df_edit.head().style.set_caption("PREVIEW HEAD"))
        self.output
        
    def crop_selection(self):
        idx = pd.IndexSlice
        try:
            self._df_edit = self.df.loc[idx[:, :, self.var_selector.value, :], :]
        except Exception as e:
            print("WARNING: failed to extract selection.\nTraceback {}".format(format_exc()))
    
    def on_click_apply(self, b):
        self.crop_selection()
        self.disp_current()
        
    def print_current(self, b):
        s=""
        for item in self.var_selector.value:
            s += "{}\n".format(item)
        self.current_disp.value = s
    
    def __repr__(self):
        return repr(self.layout)
    
    def __call__(self):
        return self.layout
    

In [57]:
selection_unstacked = selection.unstack(["Run", "Years"])
selection_unstacked.stack(["Run", "Years"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Bias,Flag,Model,Obs,RMSE,RMSE_ERR
Variable,Description,Run,Years,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
CLDTOT_ISCCP,Total cloud cover,Run1,185-215,1.744,True,68.543,66.800,12.992,0.044541
CLDTOT_ISCCP,Total cloud cover,Run1,310-340,1.435,True,68.234,66.800,13.078,0.051455
CLDTOT_ISCCP,Total cloud cover,Run1,80-110,2.157,True,68.956,66.800,12.869,0.034652
CLDTOT_ISCCP,Total cloud cover,Run2,90-120,3.947,True,70.747,66.800,12.485,0.003779
CLDTOT_ISCCP,Total cloud cover,Run3,1-20,1.786,True,68.586,66.800,11.881,-0.044782
CLDTOT_ISCCP,Total cloud cover,Run4,71-100,-3.179,True,63.621,66.800,11.323,-0.089645
FLDS_ISCCP,LW down SRF,Run1,185-215,10.514,True,353.861,343.347,16.891,0.051809
FLDS_ISCCP,LW down SRF,Run1,310-340,11.499,True,354.846,343.347,17.664,0.099944
FLDS_ISCCP,LW down SRF,Run1,80-110,5.162,True,348.508,343.347,16.720,0.041161
FLDS_ISCCP,LW down SRF,Run2,90-120,4.507,True,347.854,343.347,15.278,-0.048633


Well, this is better but also not extremely illustrative / intuitive. It becomes more intuitive if we just look at one parameter that we are interested in (e.g. RMSE). 

#### 5.4 Extracting the Bias of each model run relative to the observations (GET_BIAS)

Retrieving a table that illustrates the Bias of each run for each flagged variable is straight forward. We just extract the `Bias` column from our flagged frame:

In [26]:
bias = selection_unstacked["Bias"]
bias.head()

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,1.744,1.435,2.157,3.947,1.786,-3.179
FLDS_ISCCP,LW down SRF,10.514,11.499,5.162,4.507,-1.799,-5.066
FLNS_ISCCP,LW net SRF,6.824,6.847,7.167,6.925,9.699,6.394
FLUTC_CERES-EBAF,LW up Top Clearsky,1.039,1.426,-0.786,-1.182,-0.986,-4.268
FLUT_CERES-EBAF,LW up Top,2.398,2.928,0.66,-0.832,1.704,-1.426


#### 5.5 Computing RMSE relative error (GET_RMSE_REL)

In the following we extract the subset containing the *RSME* information of the flagged variables for all runs in order to compute the relative error for each run based on the average *RMSE* of all runs:

$$\frac{RMSE_{Run}\,-\,\overline{RMSE_{All\,Runs}}}{\overline{RMSE_{All\,Runs}}}$$


In [27]:
rmse = selection_unstacked["RMSE"]
rmse

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,12.992,13.078,12.869,12.485,11.881,11.323
FLDS_ISCCP,LW down SRF,16.891,17.664,16.72,15.278,15.351,14.45
FLNS_ISCCP,LW net SRF,14.098,14.174,14.516,13.988,14.953,11.967
FLUTC_CERES-EBAF,LW up Top Clearsky,4.662,4.738,5.67,4.778,4.778,6.042
FLUT_CERES-EBAF,LW up Top,7.188,7.467,7.499,6.598,6.169,6.855
FSDS_ISCCP,SW down SRF,15.915,16.082,16.048,15.421,15.089,13.38
FSNS_ISCCP,SW net SRF,13.587,13.727,13.705,13.068,12.632,12.711
FSNTOAC_CERES,SW net TOA clearsky,15.977,15.71,18.506,17.609,17.432,18.458
FSNTOA_CERES,SW net TOA,12.096,12.125,12.314,12.711,10.795,12.307
LHFLX_JRA25,Lat Heat Flux,14.947,15.116,15.587,15.153,14.578,17.176


##### Side comment: Series vs. unstacked Multiindex Dataframes

As you can see in the previous output, we have extracted ***ONE*** variable from the ***UNSTACKED*** dataframe. Now, this is still a pandas ``Dataframe`` since it is *tabular* data. 

In [28]:
print("Extracted table is Dataframe since it is a wide table: {}".format(isinstance(rmse, pd.DataFrame)))

Extracted table is Dataframe since it is a wide table: True


In [29]:
rmse_mean = rmse.mean(axis=1, skipna=True)
#Note that the created object is a Series and not a Dataframe
rmse_mean.head()

Variable          Description       
CLDTOT_ISCCP      Total cloud cover     12.438000
FLDS_ISCCP        LW down SRF           16.059000
FLNS_ISCCP        LW net SRF            13.949333
FLUTC_CERES-EBAF  LW up Top Clearsky     5.111333
FLUT_CERES-EBAF   LW up Top              6.962667
dtype: float64

The next step is (semi) straight forward (we have to use the `div` and `subtract` methods of the Dataframe rather than `/` and `-` operators in order to specify that we want to apply them in the horizontal and not in the vertical direction.

In [30]:
rmse_err_rel = rmse.subtract(rmse_mean, axis=0).div(rmse_mean, axis=0)
rmse_err_rel

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,0.044541,0.051455,0.034652,0.003779,-0.044782,-0.089645
FLDS_ISCCP,LW down SRF,0.051809,0.099944,0.041161,-0.048633,-0.044087,-0.100193
FLNS_ISCCP,LW net SRF,0.010658,0.016106,0.040623,0.002772,0.071951,-0.14211
FLUTC_CERES-EBAF,LW up Top Clearsky,-0.087909,-0.07304,0.1093,-0.065215,-0.065215,0.182079
FLUT_CERES-EBAF,LW up Top,0.032363,0.072434,0.07703,-0.052375,-0.113989,-0.015463
FSDS_ISCCP,SW down SRF,0.038669,0.049568,0.047349,0.006428,-0.015239,-0.126774
FSNS_ISCCP,SW net SRF,0.026338,0.036913,0.035251,-0.012867,-0.045801,-0.039834
FSNTOAC_CERES,SW net TOA clearsky,-0.075512,-0.090962,0.070825,0.018921,0.00868,0.068048
FSNTOA_CERES,SW net TOA,0.003151,0.005556,0.021231,0.054155,-0.104744,0.02065
LHFLX_JRA25,Lat Heat Flux,-0.031062,-0.020107,0.010426,-0.017708,-0.054982,0.113433


### 5.6 Inserting column of RMSE relative error into original table (INSERT_RMSE_REL_ORIG)

If we want, we can now add the typical RMSE to our original dataframe (containing the only flagged data, since it was computed from this). 

**Note: this is just illustrative and not used in the following section**

First we have to stack it:

In [31]:
stacked = rmse_err_rel.stack(level=(0,1)).reorder_levels(order=(2,3,0,1))
stacked.head()

Run   Years    Variable      Description      
Run1  185-215  CLDTOT_ISCCP  Total cloud cover    0.044541
      310-340  CLDTOT_ISCCP  Total cloud cover    0.051455
      80-110   CLDTOT_ISCCP  Total cloud cover    0.034652
Run2  90-120   CLDTOT_ISCCP  Total cloud cover    0.003779
Run3  1-20     CLDTOT_ISCCP  Total cloud cover   -0.044782
dtype: float64

In [32]:
selection["RMSE_ERR"] = stacked
selection

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Flag,Model,Obs,Bias,RMSE,RMSE_ERR
Run,Years,Variable,Description,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Run1,185-215,CLDTOT_ISCCP,Total cloud cover,True,68.543,66.800,1.744,12.992,0.044541
Run1,185-215,FLDS_ISCCP,LW down SRF,True,353.861,343.347,10.514,16.891,0.051809
Run1,185-215,FLNS_ISCCP,LW net SRF,True,56.249,49.425,6.824,14.098,0.010658
Run1,185-215,FLUTC_CERES-EBAF,LW up Top Clearsky,True,267.090,266.051,1.039,4.662,-0.087909
Run1,185-215,FLUT_CERES-EBAF,LW up Top,True,241.972,239.574,2.398,7.188,0.032363
Run1,185-215,FSDS_ISCCP,SW down SRF,True,190.458,189.390,1.068,15.915,0.038669
Run1,185-215,FSNS_ISCCP,SW net SRF,True,166.962,165.893,1.070,13.587,0.026338
Run1,185-215,FSNTOAC_CERES,SW net TOA clearsky,True,290.330,294.702,-4.373,15.977,-0.075512
Run1,185-215,FSNTOA_CERES,SW net TOA,True,244.525,244.691,-0.167,12.096,0.003151
Run1,185-215,LHFLX_JRA25,Lat Heat Flux,True,87.926,87.935,-0.009,14.947,-0.031062


### 6. Conditional formatting of tables (Dataframes) (VISUALISE)

This section illustrates, how we can perform conditional formatting of the color tables. As discussed above, we can apply background colour gradients to the data. In the example above we had a multiindex data type specifying model run, year-range and variable in stacked format (long table) and the four data columns specifying results from model and observation as well as bias and RMSE. 

Now, in the following we illustrate how we can apply this colour highlighting for the two unstacked tables that we just created and that contain Bias and relative error. 

Starting with the Bias data, we show an example that does not work for our purposes (since it only allows for conditional formatting of either rows or columns.

#### 6.1 NOT how we want it (using the style method `background_gradient`) (VIS_WRONG)

The most straight forward example for conditional formatting of a Dataframe is shown in the following. In the example we use the `Bias` table and, similar to the example above, apply a value based colormap. Here, we use a *diverging colormap (bwr)* which has white as center color. Like in the example above, we use the style method `background_gradient` which can perform the formatting either in a **rowwise** or **columnwise** manner (using input argument `axis=1` or `axis=0`, respectively). 

Note, however, that this is not what we are aiming for in this example, rather, we want the colour formatting to be applied based on the values available the **whole table** and not individually for **columns** or **rows** (which is done in the next section). Nonetheless, in the cell below we show what we get if we use the method `backgroun_gradient`.  

Again, we use the `low` and `high` parameters to specify the colorrange that we use to map the values (see above).

In [41]:
bias.style.background_gradient(cmap="bwr", low=0.5, high=0.5, axis=1).highlight_null("white")

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,1.744,1.435,2.157,3.947,1.786,-3.179
FLDS_ISCCP,LW down SRF,10.514,11.499,5.162,4.507,-1.799,-5.066
FLNS_ISCCP,LW net SRF,6.824,6.847,7.167,6.925,9.699,6.394
FLUTC_CERES-EBAF,LW up Top Clearsky,1.039,1.426,-0.786,-1.182,-0.986,-4.268
FLUT_CERES-EBAF,LW up Top,2.398,2.928,0.66,-0.832,1.704,-1.426
FSDS_ISCCP,SW down SRF,1.068,1.228,1.216,-1.721,3.2,-1.589
FSNS_ISCCP,SW net SRF,1.07,1.365,-0.017,-2.145,2.63,-2.214
FSNTOAC_CERES,SW net TOA clearsky,-4.373,-4.183,-5.747,-4.988,-5.051,-6.703
FSNTOA_CERES,SW net TOA,-0.167,0.223,-1.724,-3.937,-0.338,-5.015
LHFLX_JRA25,Lat Heat Flux,-0.009,0.434,-1.695,-2.692,-2.503,-0.031


In [34]:
bias

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,1.744,1.435,2.157,3.947,1.786,-3.179
FLDS_ISCCP,LW down SRF,10.514,11.499,5.162,4.507,-1.799,-5.066
FLNS_ISCCP,LW net SRF,6.824,6.847,7.167,6.925,9.699,6.394
FLUTC_CERES-EBAF,LW up Top Clearsky,1.039,1.426,-0.786,-1.182,-0.986,-4.268
FLUT_CERES-EBAF,LW up Top,2.398,2.928,0.66,-0.832,1.704,-1.426
FSDS_ISCCP,SW down SRF,1.068,1.228,1.216,-1.721,3.2,-1.589
FSNS_ISCCP,SW net SRF,1.07,1.365,-0.017,-2.145,2.63,-2.214
FSNTOAC_CERES,SW net TOA clearsky,-4.373,-4.183,-5.747,-4.988,-5.051,-6.703
FSNTOA_CERES,SW net TOA,-0.167,0.223,-1.724,-3.937,-0.338,-5.015
LHFLX_JRA25,Lat Heat Flux,-0.009,0.434,-1.695,-2.692,-2.503,-0.031


Now, this worked nicely but there are mainly two problems with this representation:

1. As mentioned above, one problem here is that the colour coding can only be performed row or column wise using the input parameter `axis` (and not based on the values of the whole table, see [here](https://pandas.pydata.org/pandas-docs/stable/style.html#Building-Styles-Summary) for details)
2. If we use the symmetric colormap as is (i.e. center colour is white), then, the color white will be mapped to the midpoint value of the considered value range (e.g. min=-2, max=4 => (4 - -2)/2 = 3 => 1 == white). However, what we want is a *shifter diverging colormap* that ensures that the value 0 is mapped white, even if min != -max.
3. Further, we might wish to have control over the number of significant digits that are displayed in the table

All these problems will be solved in the following.

#### 6.2 How we want it (VIS_RIGHT)

In the following, we use a custom display method `my_table_display` (that is defined in [helper_funcs.py](https://github.com/jgliss/my_py3_scripts/blob/master/notebooks/helper_funcs.py)) in order to perform colour formatting considering all rows and columns at the same time and furthermore, using a diverging colour map that is dynamically shifted such that value 0 corresponds to the colour white (method `shifted_color_map`) also if `-vmin != vmax` (like usually).

In [35]:
from helper_funcs import my_table_display
my_table_display(bias)

Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,1.74,1.44,2.16,3.95,1.79,-3.18
FLDS_ISCCP,LW down SRF,10.51,11.5,5.16,4.51,-1.8,-5.07
FLNS_ISCCP,LW net SRF,6.82,6.85,7.17,6.92,9.7,6.39
FLUTC_CERES-EBAF,LW up Top Clearsky,1.04,1.43,-0.79,-1.18,-0.99,-4.27
FLUT_CERES-EBAF,LW up Top,2.4,2.93,0.66,-0.83,1.7,-1.43
FSDS_ISCCP,SW down SRF,1.07,1.23,1.22,-1.72,3.2,-1.59
FSNS_ISCCP,SW net SRF,1.07,1.36,-0.02,-2.15,2.63,-2.21
FSNTOAC_CERES,SW net TOA clearsky,-4.37,-4.18,-5.75,-4.99,-5.05,-6.7
FSNTOA_CERES,SW net TOA,-0.17,0.22,-1.72,-3.94,-0.34,-5.01
LHFLX_JRA25,Lat Heat Flux,-0.01,0.43,-1.7,-2.69,-2.5,-0.03


Now for the typical RMSE error

In [36]:
my_table_display(rmse_err_rel)

  np.copyto(xa, -1, where=xa < 0.0)


Unnamed: 0_level_0,Run,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_1,Years,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CLDTOT_ISCCP,Total cloud cover,0.04,0.05,0.03,0.0,-0.04,-0.09
FLDS_ISCCP,LW down SRF,0.05,0.1,0.04,-0.05,-0.04,-0.1
FLNS_ISCCP,LW net SRF,0.01,0.02,0.04,0.0,0.07,-0.14
FLUTC_CERES-EBAF,LW up Top Clearsky,-0.09,-0.07,0.11,-0.07,-0.07,0.18
FLUT_CERES-EBAF,LW up Top,0.03,0.07,0.08,-0.05,-0.11,-0.02
FSDS_ISCCP,SW down SRF,0.04,0.05,0.05,0.01,-0.02,-0.13
FSNS_ISCCP,SW net SRF,0.03,0.04,0.04,-0.01,-0.05,-0.04
FSNTOAC_CERES,SW net TOA clearsky,-0.08,-0.09,0.07,0.02,0.01,0.07
FSNTOA_CERES,SW net TOA,0.0,0.01,0.02,0.05,-0.1,0.02
LHFLX_JRA25,Lat Heat Flux,-0.03,-0.02,0.01,-0.02,-0.05,0.11


### 7. Concatenate and save results (Bias and typical RMSE) as table (EXPORT)

In the following, the two result tables ``bias_table`` and ``typical_rmse`` are merged into one result table and then saved both as excel table and as csv file.

In [60]:
result = pd.concat([bias, rmse_err_rel],axis=1, keys=["Bias", "RMSE relative Error"])
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Bias,Bias,Bias,Bias,Bias,Bias,RMSE relative Error,RMSE relative Error,RMSE relative Error,RMSE relative Error,RMSE relative Error,RMSE relative Error
Unnamed: 0_level_1,Run,Run1,Run1,Run1,Run2,Run3,Run4,Run1,Run1,Run1,Run2,Run3,Run4
Unnamed: 0_level_2,Years,185-215,310-340,80-110,90-120,1-20,71-100,185-215,310-340,80-110,90-120,1-20,71-100
Variable,Description,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
CLDTOT_ISCCP,Total cloud cover,1.744,1.435,2.157,3.947,1.786,-3.179,0.044541,0.051455,0.034652,0.003779,-0.044782,-0.089645
FLDS_ISCCP,LW down SRF,10.514,11.499,5.162,4.507,-1.799,-5.066,0.051809,0.099944,0.041161,-0.048633,-0.044087,-0.100193
FLNS_ISCCP,LW net SRF,6.824,6.847,7.167,6.925,9.699,6.394,0.010658,0.016106,0.040623,0.002772,0.071951,-0.14211
FLUTC_CERES-EBAF,LW up Top Clearsky,1.039,1.426,-0.786,-1.182,-0.986,-4.268,-0.087909,-0.07304,0.1093,-0.065215,-0.065215,0.182079
FLUT_CERES-EBAF,LW up Top,2.398,2.928,0.66,-0.832,1.704,-1.426,0.032363,0.072434,0.07703,-0.052375,-0.113989,-0.015463
FSDS_ISCCP,SW down SRF,1.068,1.228,1.216,-1.721,3.2,-1.589,0.038669,0.049568,0.047349,0.006428,-0.015239,-0.126774
FSNS_ISCCP,SW net SRF,1.07,1.365,-0.017,-2.145,2.63,-2.214,0.026338,0.036913,0.035251,-0.012867,-0.045801,-0.039834
FSNTOAC_CERES,SW net TOA clearsky,-4.373,-4.183,-5.747,-4.988,-5.051,-6.703,-0.075512,-0.090962,0.070825,0.018921,0.00868,0.068048
FSNTOA_CERES,SW net TOA,-0.167,0.223,-1.724,-3.937,-0.338,-5.015,0.003151,0.005556,0.021231,0.054155,-0.104744,0.02065
LHFLX_JRA25,Lat Heat Flux,-0.009,0.434,-1.695,-2.692,-2.503,-0.031,-0.031062,-0.020107,0.010426,-0.017708,-0.054982,0.113433


Now save both tables as excel file.

In [38]:
writer = pd.ExcelWriter('{}/result.xlsx'.format(output_dir))
df.to_excel(writer)
writer.save()