# Discharge 1 Forecast all steps


First we are going to open a CSV File containing EFAS Stations.
We will select station.

In [27]:
import pandas as pd
from random import sample
stations = pd.read_csv('../static/EFAS_calib_stations_metadata.csv',encoding='iso-8859-1',index_col=None)
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['ObsID'] == 1041] # We have chosen a station for consistency
station


Unnamed: 0,ObsID,StationName,Provider ID,Country code,StationLat,StationLon,Height,Height Units,DrainingArea.km2.Provider,Catchment Area Units,...,DiffDays_hist_6,DiffDays_nrt_6,cal_hist_24h,cal_nrt_24h,cal_hist_6h,cal_nrt_6h,CAL_TYPE,Notes,EC_calib,Dam/Lake
481,1041,Blaker,1002,NO,60.004052,11.299323,90,m.a.s.l.,38588.11,km2,...,-,1493,True,True,False,True,NRT_6h,Negative discharge and zeros,3,-


We will open a dataset containing the ECMWF Ensemble Forecast (50 members) containg River Dicharge over 24 hours from Step 0 to 360 from the forecast 15th November 2018 00Z

In [2]:
import xarray as xr
ds = xr.open_dataset('../data/eue_15111800.nc')

We can quickly see the variable's of the file using ds.data_vars

In [3]:
ds.data_vars

Data variables:
    dis24                         (number, step, y, x) float32 ...
    lambert_azimuthal_equal_area  int32 ...
    land_binary_mask              (y, x) int8 ...
    upArea                        (y, x) float32 ...

We can check the data_variables dimensions using dis.dims

Number being the number of the Model

In [4]:
ds.dims

Frozen(SortedKeysDict({'y': 950, 'x': 1000, 'number': 50, 'step': 15}))

Now we know the variable and the dimensions we can look at the shape of the variable.

In [5]:
ds.dis24.shape

(50, 15, 950, 1000)

Now we will open the Historical Simulations

In [6]:
clim = xr.open_dataset('../data/clim_151118.nc')
clim


<xarray.Dataset>
Dimensions:                       (time: 15, x: 1000, y: 950)
Coordinates:
  * y                             (y) float64 5.498e+06 5.492e+06 ... 7.525e+05
  * x                             (x) float64 2.502e+06 2.508e+06 ... 7.498e+06
  * time                          (time) datetime64[ns] 2018-11-15T06:00:00 ... 2018-11-29T06:00:00
    step                          timedelta64[ns] ...
    surface                       int64 ...
    latitude                      (y, x) float32 ...
    longitude                     (y, x) float32 ...
    valid_time                    (time) datetime64[ns] ...
Data variables:
    dis24                         (time, y, x) float32 ...
    lambert_azimuthal_equal_area  int32 ...
    land_binary_mask              (y, x) int8 ...
    upArea                        (y, x) float32 ...
Attributes:
    GRIB_edition:            2
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    G

Now we will plot a time series of the derived station by using its Lat and Lon, locating the nearest point

In [7]:
import pyproj
inputEPSG = pyproj.Proj(init='epsg:4326')   # ref system EFAS-IS
outputEPSG = pyproj.Proj(init='epsg:3035')  # ref system LISFLOOD

x_lsf, y_lsf = pyproj.transform(inputEPSG, outputEPSG, float(station.StationLon.values), float(station.StationLat.values))

# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=x_lsf,y=y_lsf,method='nearest')
climloc = clim.sel(x=x_lsf,y=y_lsf,method='nearest')

Now that we have a location in the dsloc and climloc variables we can plot the data.
Below we will first align the data so that the time fields are in the same format and reference

Historical Simulations are based on the 06UTC of the Date in the Time field
Forecast fields are based on the timestep from the basetime.

In [8]:
import numpy as np

base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])

We will now construct a dataframe consisting of a few Ensemble Members

Here we have placed all Ensemble members into a dataframe

In [9]:
df=pd.DataFrame({
    'TimeStep': time, # Time data from the Ensemble Data file as Times (Base + Delta("STEP"))
    'Historical_times':np.array(climloc.time.values+np.timedelta64(climloc.step.values)), # Historical Times from the Climatology File
    'Historical': climloc.dis24.values}) # Historical Data from the Climatology file

for number in dsloc.number.values: #Include each member into the dataframe
    df['Ensemble_Member_'+str(number)] = dsloc.dis24[number-1,:]
    
df.head() #Print a short list of the dataframe

Unnamed: 0,TimeStep,Historical_times,Historical,Ensemble_Member_1,Ensemble_Member_2,Ensemble_Member_3,Ensemble_Member_4,Ensemble_Member_5,Ensemble_Member_6,Ensemble_Member_7,...,Ensemble_Member_41,Ensemble_Member_42,Ensemble_Member_43,Ensemble_Member_44,Ensemble_Member_45,Ensemble_Member_46,Ensemble_Member_47,Ensemble_Member_48,Ensemble_Member_49,Ensemble_Member_50
0,2018-11-16,2018-11-16 06:00:00,531.290039,531.759033,531.713623,532.092773,531.148682,532.010986,531.560791,531.872314,...,531.108887,531.606201,531.295654,532.25415,532.408936,531.291504,531.01709,532.227783,531.477783,532.131104
1,2018-11-17,2018-11-17 06:00:00,514.240723,520.054932,519.157959,520.054199,517.356201,519.814697,518.817139,519.572998,...,517.38208,519.28833,518.349365,520.481445,521.304199,518.422607,517.449219,520.364014,518.480713,521.119385
2,2018-11-18,2018-11-18 06:00:00,485.908936,499.068848,495.771973,498.254395,494.414795,497.291992,496.568359,497.165527,...,494.564209,497.425293,495.543213,498.221191,498.949219,496.565918,494.675293,498.007568,495.355225,499.368408
3,2018-11-19,2018-11-19 06:00:00,451.466797,466.343262,461.689209,465.563965,460.84375,463.716309,463.254883,463.352539,...,461.326172,464.057861,461.976562,464.276611,464.901123,462.705078,461.341309,463.63916,461.204834,465.299072
4,2018-11-20,2018-11-20 06:00:00,421.651855,430.835449,426.35498,430.731934,424.939453,428.470459,427.501709,427.348633,...,426.210693,428.241699,426.505371,428.075439,428.802246,426.35376,426.070801,427.170654,424.987549,428.919678


Now we will plot the members of the Dataframe

In [37]:
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plot

matplotlib.rcParams['figure.figsize'] = [12, 12]

plot.figure(figsize=(10,10),num='EFAS ECMWF 50 Perturbed Members vs Historical Simulations')
plot.title("Station : " + station.StationName.to_string(index=False) + 
           "\n River : " + station.River.to_string(index=False) +
           "\n Catchment : " + station.Catchment.to_string(index=False) +
          "\n Latitude : " + station.StationLat.to_string(index=False) +
          "\n Longitude : " + station.StationLon.to_string(index=False) +
          "\n Draining Area : " + station.DrainingArea.km2.Provider.to_string(index=False))
plot.xlabel('TimeStep in 24h')
plot.ylabel(dsloc.dis24.GRIB_name+' ' + dsloc.dis24.GRIB_units)
plot.plot( 'Historical_times', 'Historical', data=df, marker='o', markerfacecolor='blue', markersize=12, color='blue', linewidth=2)
for ens_member in df.filter(regex='Ensemble'):
    plot.plot( 'TimeStep', ens_member, data=df, marker='', color='lightblue', linewidth=1, linestyle='dashed')
plot.show()

<IPython.core.display.Javascript object>

We can also look at the member distribution using a box plot
This allows us to look at the spread of the ensemble values.
And adding the Historical Simulations of Discharge on top as a Line Plot to see how they compare.

In [11]:
%matplotlib notebook
matplotlib.rcParams['figure.figsize'] = [12, 12]

plot.figure(figsize=(10,10),num='EFAS ECMWF 50 Perturbed Members vs Historical Simulations Box Plot')
df2=df.filter(like='Ensemble')
plot.title("Station : " + station.StationName.to_string(index=False) + 
           "\n River : " + station.River.to_string(index=False) +
           "\n Catchment : " + station.Catchment.to_string(index=False) +
          "\n Latitude : " + station.StationLat.to_string(index=False) +
          "\n Longitude : " + station.StationLon.to_string(index=False) +
          "\n Draining Area : " + station.DrainingArea.km2.Provider.to_string(index=False))
plot.ylabel(dsloc.dis24.GRIB_name+' ' + dsloc.dis24.GRIB_units)
plot.xlabel('Days from Forecast Basetime ' + np.datetime_as_string(ds.time.values,unit='s'))
# Here we add the 6 Hour Base time and 24h to the values just to align it for the box plot.
plot.plot(df2.index+1.25,'Historical', data=df, marker='o', markerfacecolor='blue', markersize=12, color='blue', linewidth=2) 
plot.boxplot(df2)
plot.legend()
plot.show()

<IPython.core.display.Javascript object>