# Slicing and dicing GRIB data

## Terminology

A GRIB file consists of a sequence of self-contained GRIB *messages*. A GRIB file is represented as a *Fieldset* object in Metview. Each message contains the data for a single *field*, e.g. a single parameter generated at a single time for a single forecast step. A field contains a set of *gridpoints* geographically distributed in some way, plus metadata such as the parameter, the generation time, the forecast step and the centre that generated the data. A field may be plotted on a map, and a Fieldset may be plotted as an animation on a map.

## Setting up

In [None]:
import numpy as np
import metview as mv

In [None]:
# not strictly necessary to tell Metview that we're running in a Jupyter notebook,
# but we will call this function so that we can specify a larger font size
mv.setoutput('jupyter', output_font_scale=1.5)

## Reading and inspecting the data

In [None]:
data = mv.read('grib_to_be_sliced.grib')
print(data)

In [None]:
data.describe()

In [None]:
data.describe('r')

In [None]:
data.describe('z')

In [None]:
data.describe('t')

In [None]:
data.ls()

# Field selection

## Field selection through indexing

In [None]:
# select the first field (0-based indexing)
print(data[0])
data[0].ls()

In [None]:
# select the fourth field (0-based indexing)
data[3].ls()

In [None]:
# select the last field
data[-1].ls()

In [None]:
# index with numpy array
indices = np.array([1, 2, 0, 15])
data[indices].ls()

## Field selection through slicing

In [None]:
# select fields 4 to 7
data[4:8].ls()

In [None]:
# select fields 4 to 7, step 2
data[4:8:2].ls()

In [None]:
# select the last 5 fields
data[-5:].ls()

In [None]:
# reverse the fields' order
data[::-1].ls()

In [None]:
# assign this to a variable and write to disk
rev = data[::-1]
rev.write('reversed.grib')
print(rev)

## Field selection through metadata

In [None]:
# select() method, various ways
data.select(shortName='r').ls()

In [None]:
data.select(shortName='r', level=850).ls()

In [None]:
# put the selection criteria into a dict, then modify it before using
criteria = {"shortName": "r", "level": 850}
criteria.update({"level": 500})
data.select(criteria).ls()

In [None]:
# shorthand way of expressing parameters and levels
data['r500'].ls()

In [None]:
# specify units - useful if different level types in the same fieldset
data['r300hPa'].ls()

## Combining fields

In [None]:
# generate 4 fieldsets - one will be from another GRIB file to show that we can
# combine fields from any number of different files

a = data[5]
b = data[78:80]
c = data['z']
d = mv.read('reversed.grib')[0]
print(a, b, c, d)

In [None]:
# create a new Fieldset out of existing ones
combined = mv.merge(a, b, c, d)
combined.ls()

In [None]:
# use the Fieldset constructor to do the same thing from a
# list of Fieldsets
combined = mv.Fieldset(fields=[a, b, c, d])
combined.ls()

In [None]:
# append to an existing Fieldset
print(combined)
combined.append(b)
print(combined)

# Point selection

## Area cropping

In [None]:
# first plot the data (first 5 fields) to see what we've got
few_fields = data[9:14]
mv.plot(few_fields)

In [None]:
# select an area [N,W,S,E]
data_area = [70, -25, 28, 45]
data_on_subarea = mv.read(data=few_fields, area=data_area)

In [None]:
# plot the filelds to see
mv.plot(data_on_subarea)

In [None]:
# add some automatic styling and zoom into the area
view = mv.geoview(map_area_definition="corners", area=data_area)
cont_auto = mv.mcont(legend=True, contour_automatic_setting="ecmwf", grib_scaling_of_derived_fields=True)
mv.plot(view, data_on_subarea, cont_auto)

## Point reduction with regridding

In [None]:
# let's plot the data points to see what the grid looks like
gridpoint_markers = mv.mcont(
    contour                          = "off",
    contour_grid_value_plot          = "on",
    contour_grid_value_plot_type     = "marker",
    )
mv.plot(view, data_on_subarea[0], gridpoint_markers)

In [None]:
# regrid to a lower-resolution octahedral reduced Gaussian grid
lowres_data = mv.read(data=data_on_subarea, grid="O80")
mv.plot(view, lowres_data[0], gridpoint_markers)

In [None]:
# regrid to a regular lat/lon grid
lowres_data = mv.read(data=data_on_subarea, grid=[3, 3]) # 3 degrees
mv.plot(view, lowres_data[0], gridpoint_markers)

## Masking

In [None]:
# masking in Metview means defining an area and either:
#   creating a field with 1s inside the area and 0s outside (missing=False)
#   or
#   turning the values outside the area into missing values (missing=True)

In [None]:
# we will use temperature data at step 0 to be masked
t0 = data.select(shortName='t', step=0)

### Direct masking
This is where we define regions of a field to be preserved, while the points outside those regions are filled with missing values.

In [None]:
print('Mean val for first field:', t0[0].average())

In [None]:
# define a rectangular mask
rect_masked_data = mv.mask(t0, [48, -12, 63, 5], missing=True) # [N,W,S,E]
print('Mean val for first field:', rect_masked_data[0].average())
mv.plot(view, rect_masked_data, cont_auto)

In [None]:
# define a circular mask - centre in lat/lon, radius in m
circ_masked_data = mv.rmask(t0, [55, -4, 800*1000], missing=True) # [N,W,S,E]
print('Mean val for first field:', circ_masked_data[0].average())
mv.plot(view, circ_masked_data, cont_auto)

In [None]:
# polygon area - we will use a shapefile from Magics
import shapefile # pip install pyshp
metview_dir = mv.version_info()["metview_dir"]
sf = shapefile.Reader(metview_dir + "/../../share/magics/50m/ne_50m_land.shp")

In [None]:
# extract the list of points for the Great Britain polygon
shapes = sf.shapes()
points = shapes[135].points  # GB
lats = np.array([p[1] for p in points])
lons = np.array([p[0] for p in points])

In [None]:
poly_masked_data = mv.poly_mask(t0, lats, lons, missing=True)
print('Mean val for first field:', poly_masked_data[0].average())
mv.plot(view, poly_masked_data, cont_auto)

### Indirect masking
This is where we generate masks consisting of 1s where the points are inside a given region (or satisfy some other criteria) and 0s otherwise. We can then combine these and use them to provide a missing value mask to any field.

In [None]:
# contouring for 0 and 1 values
mask_1_and_0_contouring = mv.mcont(
    legend="on",
    contour="off",
    contour_level_selection_type="level_list",
    contour_level_list=[0, 1, 2],
    contour_shade="on",
    contour_shade_technique="grid_shading",
    contour_shade_max_level_colour="red",
    contour_shade_min_level_colour="yellow",
)

In [None]:
# define a rectangular mask
rect_masked_data = mv.mask(t0, [48, -12, 63, 5], missing=False) # [N,W,S,E]
mv.plot(view, rect_masked_data[0], mask_1_and_0_contouring)

In [None]:
# define a circular mask - centre in lat/lon, radius in m
circ_masked_data = mv.rmask(t0, [55, -4, 800*1000], missing=False) # [N,W,S,E]
mv.plot(view, circ_masked_data[0], mask_1_and_0_contouring)

In [None]:
r0 = data.select(shortName='r', step=0)
high_relhum = r0 > 75
mv.plot(view, high_relhum, mask_1_and_0_contouring)

In [None]:
# combine the masks with the 'or' operator (only useful for 1/0 masks)
combined_mask_data = rect_masked_data | circ_masked_data | high_relhum
mv.plot(view, combined_mask_data, mask_1_and_0_contouring)

In [None]:
# use this mask to replace 0s with missing values in the original data
combined_mask_data = mv.bitmap(combined_mask_data, 0) # replace 0 with missing vals
masked_data = mv.bitmap(t0, combined_mask_data) # copy missing vals over
print('Mean val for first field:', masked_data[0].average())
mv.plot(view, masked_data, cont_auto)

## Frames
Frames are useful to supply boundary conditions to a local area model.


In [None]:
# the frame parameter is the width of the frame in degrees
data_frame = mv.read(data=data, area=data_area, frame=5, grid=[1,1])
mv.plot(data_frame['t1000'], cont_auto)

## Vertical profiles

In [None]:
# let's plot a profile for each forecast step of temperature

# we will extract one Fieldset for each time step - each of these Fieldsets
# will contain all the vertical levels of temperature data for that time step
# we will end up with a list of these Fieldsets and plot a profile for each

steps = mv.unique(mv.grib_get_long(data, 'step'))
data_for_all_steps = [data.select(shortName='t', step=s) for s in steps]
for f in data_for_all_steps:
    print(f.grib_get(['step', 'level']))

In [None]:
# we will plot the profile for each step in a different colour - generate a list
# of 'mgraph' definitions, each using a different colour, for this purpose
nsteps = len(steps)
colour_inc = 1/nsteps
graph_colours = [mv.mgraph(legend=True, graph_line_colour='HSL('+str(360*s*colour_inc)+',1,0.5)') for s in range(len(steps))]


# define a nice legend
legend = mv.mlegend(
    legend_display_type="disjoint",
    legend_entry_plot_direction="column",
    legend_text_composition="user_text_only",
    legend_entry_plot_orientation="top_bottom",
    legend_border_colour="black",
    legend_box_mode="positional",
    legend_box_x_position=2.5,
    legend_box_y_position=4,
    legend_box_x_length=5,
    legend_box_y_length=8,
    legend_text_font_size=0.5,
    legend_user_lines=[str(int(s)) for s in steps],
)

# finally, the magic happens here - the vertical profile view extracts the data
# at the given point at each level

vertical_axis = mv.maxis(
    axis_type="position_list",
    axis_tick_position_list=data_for_all_steps[0].grib_get_long('level')
)

vpview = mv.mvertprofview(
    input_mode="point",
    point=[50, -60], # lat,lon
    bottom_level=1000,
    top_level=300,
    vertical_scaling="log",
    level_axis=vertical_axis
)

mv.plot(vpview, list(zip(data_for_all_steps, graph_colours)), legend)

## Vertical cross sections

## Zonal means?

## Time-height sections

## Other Hovmoeller?

## Ensemble means?
Using xarray for now, but will use Metview's own code in the future

## Gridpoint selection

In [None]:
# nearest_gridpoint, interpolate(), single points, arrays of points, geopoints

## Time series

In [None]:
# uses nearest_gridpoint etc