<img style='float: left' width="150px" src="http://bostonlightswim.org/wp/wp-content/uploads/2011/08/BLS-front_4-color.jpg">
<br><br>

## [The Boston Light Swim](http://bostonlightswim.org/)

### Sea Surface Temperature time-series model skill

### Load configuration

In [1]:
import os
import sys
import warnings

pytools_path = os.path.abspath(os.path.join(os.path.pardir, os.path.pardir))
sys.path.append(pytools_path)

# Suppresing warnings for a "pretty output."
# Remove this line to debug any possible issues.
warnings.simplefilter("ignore")

In [2]:
from pytools.ioos import parse_config

config_file = 'config.yaml'
config = parse_config(config_file)

save_dir = os.path.join(os.path.abspath(os.path.dirname(config_file)),
                        config['run_name'])

In [3]:
import numpy as np
import pandas as pd
from pytools.ioos import to_html, save_html
from pytools.skill_score import apply_skill


fname = '{}-all_obs.csv'.format(config['run_name'])
all_obs = pd.read_csv(os.path.join(save_dir, fname), index_col='name')


def rename_cols(df):
    columns = dict()
    for station in df.columns:
        mask = all_obs['station'].astype(str) == station
        name = all_obs['station'][mask].index[0]
        columns.update({station: name})
    return df.rename(columns=columns)

In [4]:
def nc2df(fname):
    """
    Load a netCDF timeSeries file as a dataframe.

    """
    import iris
    from iris.pandas import as_data_frame

    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != 'station name':
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord('station name').points[0]
        df.columns = [station]
    return df

In [5]:
from glob import glob
from pandas import Panel


def load_ncs(run_name):
    fname = '{}-{}.nc'.format
    ALL_OBS_DATA = nc2df(os.path.join(save_dir,
                                      fname(run_name, 'OBS_DATA')))
    index = ALL_OBS_DATA.index
    dfs = dict(OBS_DATA=ALL_OBS_DATA)
    for fname in glob(os.path.join(run_name, "*.nc")):
        if 'OBS_DATA' in fname:
            continue
        else:
            model = fname.split('.')[0].split('-')[-1]
            df = nc2df(fname)
            # FIXME: Horrible work around duplicate times.
            if len(df.index.values) != len(np.unique(df.index.values)):
                kw = dict(subset='index', take_last=True)
                df = df.reset_index().drop_duplicates(**kw).set_index('index')
            kw = dict(method='time', limit=2)
            df = df.reindex(index).interpolate(**kw).ix[index]
            dfs.update({model: df})

    return pd.Panel.fromDict(dfs).swapaxes(0, 2)

In [6]:
%matplotlib inline
dfs = load_ncs(config['run_name'])

if len(dfs.minor_axis) == 1 and (dfs.minor_axis == 'OBS_DATA'):
    raise ValueError('No model found. The notebook cannot proceed.\nPlease check the logs.')

### Skill 1: Model Bias (or Mean Bias)

The bias skill compares the model mean temperature against the observations.
It is possible to introduce a Mean Bias in the model due to a mismatch of the
boundary forcing and the model interior.

$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$

In [7]:
from pytools.skill_score import mean_bias

dfs = load_ncs(config['run_name'])

df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
df = rename_cols(df)
skill_score = dict(mean_bias=df.to_dict())

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(save_dir, 'mean_bias.html')
save_html(fname, html)
html

Unnamed: 0,HYCOM,NECOFS_FVCOM,NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston,1.56,8.42,0.71
Buoy A01,0.96,10.26,0.97
"Boston, MA",--,2.89,1.87


### Skill 2: Central Root Mean Squared Error

Root Mean Squared Error of the deviations from the mean.

$$ \text{CRMS} = \sqrt{\left(\mathbf{m'} - \mathbf{o'}\right)^2}$$

where: $\mathbf{m'} = \mathbf{m} - \mathbf{\overline{m}}$ and $\mathbf{o'} = \mathbf{o} - \mathbf{\overline{o}}$

In [8]:
from pytools.skill_score import rmse

dfs = load_ncs(config['run_name'])

df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['rmse'] = df.to_dict()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(save_dir, 'rmse.html')
save_html(fname, html)
html

Unnamed: 0,HYCOM,NECOFS_FVCOM,NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston,1.14,0.74,0.84
Buoy A01,0.76,1.43,1.18
"Boston, MA",--,0.75,0.9


### Skill 3: R$^2$
https://en.wikipedia.org/wiki/Coefficient_of_determination

In [9]:
from pytools.skill_score import r2

dfs = load_ncs(config['run_name'])

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['r2'] = df.to_dict()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(save_dir, 'r2.html')
save_html(fname, html)
html

Unnamed: 0,HYCOM,NECOFS_FVCOM,NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston,-0.32,0.17,-0.06
Buoy A01,0.78,0.14,0.42
"Boston, MA",--,-0.0,-0.45


In [10]:
import json

fname = os.path.join(save_dir, 'skill_score.json')

with open(fname, 'w') as f:
    f.write(json.dumps(skill_score))

### Normalized Taylor diagrams

The radius is model standard deviation error divided  by observations deviation,
azimuth is arc-cosine of cross correlation (R), and distance to point (1, 0) on the
abscissa is Centered RMS.

In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
from pytools.taylor_diagram import TaylorDiagram


def make_taylor(samples):
    fig = plt.figure(figsize=(9, 9))
    dia = TaylorDiagram(samples['std']['OBS_DATA'],
                        fig=fig,
                        label="Observation")
    colors = plt.matplotlib.cm.jet(np.linspace(0, 1, len(samples)))
    # Add samples to Taylor diagram.
    samples.drop('OBS_DATA', inplace=True)
    for model, row in samples.iterrows():
        dia.add_sample(row['std'], row['corr'], marker='s', ls='',
                       label=model)
    # Add RMS contours, and label them.
    contours = dia.add_contours(colors='0.5')
    plt.clabel(contours, inline=1, fontsize=10)
    # Add a figure legend.
    kw = dict(prop=dict(size='small'), loc='upper right')
    leg = fig.legend(dia.samplePoints,
                     [p.get_label() for p in dia.samplePoints],
                     numpoints=1, **kw)
    return fig

In [12]:
dfs = load_ncs(config['run_name'])

# Bin and interpolate all series to 1 hour.
freq = '30min'
for station, df in list(dfs.iteritems()):
    df = df.resample(freq).interpolate().dropna(axis=1)
    if 'OBS_DATA' in df:
        samples = pd.DataFrame.from_dict(dict(std=df.std(),
                                              corr=df.corr()['OBS_DATA']))
    else:
        continue
    samples[samples < 0] = np.NaN
    samples.dropna(inplace=True)
    if len(samples) <= 2:  # 1 obs 1 model.
        continue
    fig = make_taylor(samples)
    fig.savefig(os.path.join(save_dir, '{}.png'.format(station)))
    plt.close(fig)