# Discharge 1 Forecast all steps


First we are going to open a CSV File containing EFAS Stations.

We will select station 308.

In [None]:
import pandas as pd
from random import sample
stations = pd.read_csv('../static/EFAS_calib_stations_metadata.csv',encoding='iso-8859-1',index_col=None)
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['ObsID'] == 308] # We have chosen a station for consistency
station


We will open a dataset containing the ECMWF Ensemble Forecast (50 members) containg River Dicharge over 24 hours from Step 0 to 360 from the forecast 15th November 2018 00Z

In [None]:
import xarray as xr
ds = xr.open_dataset('../data/eue_15111800.nc')

We can quickly see the variable's of the file using ds.data_vars

In [None]:
ds.data_vars

We can check the data_variables dimensions using dis.dims

Number being the number of the Model

In [None]:
ds.dims

Now we know the variable and the dimensions we can look at the shape of the variable.

In [None]:
ds.dis24.shape

Now we will open the Historical Simulations

In [None]:
clim = xr.open_dataset('../data/clim_151118.nc')
clim


Now we will plot a time series of the derived station by using its Lat and Lon, locating the nearest point

In [None]:
import pyproj
inputEPSG = pyproj.Proj(init='epsg:4326')   # ref system EFAS-IS
outputEPSG = pyproj.Proj(init='epsg:3035')  # ref system LISFLOOD

x_lsf, y_lsf = pyproj.transform(inputEPSG, outputEPSG, float(station.StationLon.values), float(station.StationLat.values))

# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=x_lsf,y=y_lsf,method='nearest')
climloc = clim.sel(x=x_lsf,y=y_lsf,method='nearest')

Now that we have a location in the dsloc and climloc variables we can plot the data.
Below we will first align the data so that the time fields are in the same format and reference

Historical Simulations are based on the 06UTC of the Date in the Time field
Forecast fields are based on the timestep from the basetime.

In [None]:
import numpy as np

base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])

We will now construct a dataframe consisting of the Historical Dataset and Ensemble Members

In [None]:
df=pd.DataFrame({
    'Historical_times':np.array(climloc.time.values+np.timedelta64(climloc.step.values)), # Historical Times from the Climatology File
    'Historical': climloc.dis24.values, # Historical Data from the Climatology file
    'TimeStep': time}) # Time data from the Ensemble Data file as Times (Base + Delta("STEP"))

for number in dsloc.number.values: #Include each member into the dataframe
    df['Ensemble_Member_'+str(number)] = dsloc.dis24[number-1,:]
    
df.head() #Print a short list of the dataframe

Now we will plot the members of the Dataframe

In [None]:
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plot

matplotlib.rcParams['figure.figsize'] = [12, 12]

plot.figure(figsize=(10,10),num='EFAS ECMWF 50 Perturbed Members vs Historical Simulations')
plot.title("Station : " + station.StationName.to_string(index=False) + 
           "\n River : " + station.River.to_string(index=False) +
           "\n Catchment : " + station.Catchment.to_string(index=False) +
          "\n Latitude : " + station.StationLat.to_string(index=False) +
          "\n Longitude : " + station.StationLon.to_string(index=False))
plot.xlabel('TimeStep in 24h')
plot.ylabel(dsloc.dis24.GRIB_name+' ' + dsloc.dis24.GRIB_units)
plot.plot( 'Historical_times', 'Historical', data=df, marker='o', markerfacecolor='blue', markersize=12, color='blue', linewidth=2)
for ens_member in df.filter(regex='Ensemble'):
    plot.plot( 'TimeStep', ens_member, data=df, marker='', color='lightblue', linewidth=1, linestyle='dashed')
plot.show()

We can also look at the member distribution using a box plot
This allows us to look at the spread of the ensemble values.
And adding the Historical Simulations of Discharge on top as a Line Plot to see how they compare.

In [None]:
%matplotlib notebook
matplotlib.rcParams['figure.figsize'] = [12, 12]

plot.figure(figsize=(10,10),num='EFAS ECMWF 50 Perturbed Members vs Historical Simulations Box Plot')
df2=df.filter(like='Ensemble')
plot.title("Station : " + station.StationName.to_string(index=False) + 
           "\n River : " + station.River.to_string(index=False) +
           "\n Catchment : " + station.Catchment.to_string(index=False) +
          "\n Latitude : " + station.StationLat.to_string(index=False) +
          "\n Longitude : " + station.StationLon.to_string(index=False))
plot.ylabel(dsloc.dis24.GRIB_name+' ' + dsloc.dis24.GRIB_units)
plot.xlabel('Days from Forecast Basetime ' + np.datetime_as_string(ds.time.values,unit='s'))
# Here we add the 6 Hour Base time and 24h to the values just to align it for the box plot.
plot.plot(df2.index+1.25,'Historical', data=df, marker='o', markerfacecolor='blue', markersize=12, color='blue', linewidth=2) 
plot.boxplot(df2)
plot.legend()
plot.show()