# Validation

Station points configuration and analysis. 


- Requires:

    - The data folder. See [README](README.md) in this folder.

In [None]:
#to use the full width of the browser window
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# Not neccessarily needed. Just to check version. Must be >= 0.6
import pyposeidon
pyposeidon.__version__

In [None]:
import pyposeidon.model as pm

### Graphics

In [None]:
import pyposeidon.utils.pplot # initialize matplolib accessor

In [None]:
## for interactive matplotlib graphics
%matplotlib widget 

In [None]:
import matplotlib.pyplot as plt

In [None]:
#optional
#pyposeidon.utils.pplot.__init__(dark_background=True) # set plt style for pplot graphics below

In [None]:
import pyposeidon.utils.hplot # initialize pyviz accessor

- In order to have within the Notebook the executable binaries installed in the conda env of the Kernel we add the corresponding path to the env `PATH`. This is not needed when we are running from python.

In [None]:
#Create a folder to save the output
import os
if not os.path.exists('test'):
            os.makedirs('test')

In [None]:
import numpy
cpath = numpy.__path__[0].split('/lib/')[0] # get the current kernel path

os.environ['PATH'] += os.pathsep + cpath + '/bin' # add to PATH

In [None]:
# use cartopy to get coastlines
import cartopy.feature as cf

cr='i'

coast = cf.NaturalEarthFeature(
    category='physical',
    name='land',
    scale='{}m'.format({'l':110, 'i':50, 'h':10}[cr]))

In [None]:
import geopandas as gp
ne_i = gp.GeoDataFrame(geometry = [x for x in coast.geometries()])

## Setup

Station info can be incorporated by using the obs tag with the minimum required info being the lon/lan location. One needs to set to `True` the `monitor` flag as well.

Such a file could look like

In [None]:
import pandas as pd

In [None]:
st = pd.DataFrame([[ 70.484, -21.962],[ 64.15 , -21.933]], columns=['lat','lon'])
st

In [None]:
#save it to file
st.to_csv('./test/stations.csv')

### A better way

However these station locations should match datasets where observation data are available. One such dataset is the one provided by the `JRC` at https://webcritech.jrc.ec.europa.eu/SeaLevelsDb/

This DataFrame is integrated in `pyposeidon`. See `critech.csv` in `pyposeidon/misc/`.

In [None]:
critech = pd.read_csv('../pyposeidon/misc/critech.csv')
critech.head()

The above Dataset is currently the default. You can use this one if you don't specify an *obs* in the model setup (see above). Now running the same case with *obs* commented out, we get..

**Note** : It is foreseen that a new python package will handle observation data from multiple sources including data cleanup utilizing ML. pyPoseidon will then use that package as an upstream dependency.

In [None]:
#define in a dictionary the properties of the model..
model={'solver_name':'schism',
     'geometry':{'lon_min' : -25,'lon_max' : -12.,'lat_min' : 56.,'lat_max' : 74.}, 
     'coastlines':ne_i, # coastlines
     'mesh_generator' : 'jigsaw', # set grid generator  
     'start_date':'2017-10-1 0:0:0',
     'time_frame':'12H',
     'meteo_source' : './data/erai.grib', #path to meteo files
     'dem_source' : './data/dem.nc', #path to dem file
     'rpath':'./test/validation/', #location of calc folder
     'monitor':True, # get time series for observation points
#     'obs':'./test/stations.csv', #Define station locations. If not set, pyposeidon/misc/critech.csv is used.
#     'parameters':{'dt':400, 'rnday':.5, 'hotout':1, 'ihot':0,'nspool':9, 'ihfskip':36, 'hotout_write':108 } # set param.nml components
    }

## Execute


In [None]:
#initialize a model
b = pm.set(**model)
b.execute()

## Output

The output of the simulations could be in separate files (due to MPI) or specific folders. pyposeidon can incorporate the resulting Datasets with 

In [None]:
b.get_output_data()

In [None]:
out = b.data.Dataset

In [None]:
out

## Station data

In [None]:
b.data.time_series # the station data in xarray Dataset

In [None]:
b.data.time_series.to_dataframe()

In [None]:
b.stations

In [None]:
# one can assign the names of locations like..
sim = b.data.time_series.rename({'node':'location'})
sim['location'] = b.stations.name.values
sim

In [None]:
sim.to_dataframe() # easily convert to pandas dataframe 

### plot

In [None]:
plt.figure()
sim.elev.isel(location=0).plot()

### Observation data

Because we've access to an API for the tide-gauge data we can retrieve them on-demand.

In [None]:
b.obs.locations

In [None]:
# Get with index number
p = b.obs.iloc(1)
p.head()

In [None]:
#get with Station Name  
p = b.obs.loc('Iceland-Reykjavik')
p.head()

In [None]:
p.plot(title='Iceland-Reykjavik')

### Plot together

In [None]:
loc='Iceland-Reykjavik' # change accordingly 
plt.figure()
sim.sel(location=loc).elev.plot(color='r')
ap = plt.gca()
p.plot(ax=ap)
ap.legend(['Schism'] + list(p.columns.values) , loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.tight_layout()

... or with hvplot

In [None]:
from hvplot import xarray, pandas

In [None]:
## Join the graphs
p.hvplot(figsize=(14,10)) * sim.elev.sel(location=loc).hvplot(color='red',label='Schism')

### Statistics

In [None]:
sdata=sim.elev.sel(location=loc).to_dataframe().drop('location',axis=1)
sdata

In [None]:
odata=b.obs.loc(loc).loc[:,['Surge']]
odata

We use the corresponding utility for getting verification stats

In [None]:
from pyposeidon.utils.statistics import get_stats

In [None]:
stats = get_stats(sdata,odata)
stats

## Additional features

### sample nearest point from simulation 

In [None]:
from pyposeidon.utils.get_ts import spot

In [None]:
plat, plon = 66., -13.

In [None]:
tp = spot(solver = 'schism', lon=plon,lat=plat,dataset=out, var='elev')
tp.time_series.head()

**Note**: Since a `pandas` dataframe is available all sampling option within pandas can be used.