# Compression and serialization of seismic wavefields 

This brief notebook is aimed at learning by manipulation the essentials of compression and serialization for a realistic finite difference modeled wavefield.

## Outline
1. setup a small 2D seismic modeling example 
1. generate wavefields to compress
1. exercise compression algorithms and benchmark
  * compression ratio
  * execution time

## Use of Memoryview
Add comments about use of memoryview to provide a copy free conversion from Devito wavefield arrays to raw bytes.

## Devito related imports grouped here

In [1]:
import numpy as np
from examples.seismic import RickerSource, Receiver, TimeAxis, Model, AcquisitionGeometry
from devito import (Grid, Function, TimeFunction, SpaceDimension, Constant, 
                    Eq, Operator, solve, configuration, norm)
from examples.seismic.self_adjoint import (acoustic_sa_setup, setup_w_over_q,
                                           SaIsoAcousticWaveSolver)
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
from timeit import default_timer as timer

# These lines force images to be displayed in the notebook, and scale up fonts 
%matplotlib inline
mpl.rc('font', size=14)

# Make white background for plots, not transparent
plt.rcParams['figure.facecolor'] = 'white'

# Set logging to debug, captures statistics on the performance of operators
# configuration['log-level'] = 'DEBUG'
configuration['log-level'] = 'INFO'

## Setup the model and acoustic wave solver

We will use the self-adjoint system from examples/seismic/self-adjoint.

In [2]:
# NBVAL_IGNORE_OUTPUT

# Setup time / frequency
nt = 101
dt = 1
tmin = 0.0
tmax = dt * (nt - 1)
fpeak = 0.010
omega = 2.0 * np.pi * fpeak
time_axis = TimeAxis(start=tmin, stop=tmax, step=dt)
time = np.linspace(tmin, tmax, nt)

# Model
space_order = 8
npad = 10
dx, dz = 10, 10
nx, nz = 251, 251
shape = (nx, nz)
spacing = (dx, dz)
origin = (0., 0.)

dtype = np.float64
qmin = 0.1
qmax = 100000
v0 = 1.5*np.ones(shape) # velocity
b0 = 1.0*np.ones(shape) # buoyancy

# Model
init_damp = lambda func, nbl: setup_w_over_q(func, omega, qmin, qmax, npad, sigma=0)
model = Model(origin=origin, shape=shape, vp=v0, b=b0, spacing=spacing, nbl=npad,
              space_order=space_order, bcs=init_damp, dtype=dtype, dt=dt)

# Source and reciver coordinates 
src_coords = np.empty((1, 2), dtype=dtype)
src_coords[:,0] = dx * (nx-1) / 2
src_coords[:,1] = dz * (nz-1) / 2

rec_coords = np.empty((nz, 2), dtype=dtype)
rec_coords[:,0] = dx * (nx-1) / 2
rec_coords[:,1] = np.linspace(0.0, dz*(nz-1), nz)

geometry = AcquisitionGeometry(model, rec_coords, src_coords,
                               t0=0.0, tn=tmax, src_type='Ricker', f0=fpeak)

# Solver setup 
solver = SaIsoAcousticWaveSolver(model, geometry, space_order=space_order)

# Solution -- note we save all time steps for use in the compression tests
d, u, _ = solver.forward(dt=dt, save=nt)

dmin, dmax = np.min(d.data[:]), np.max(d.data[:])
umin, umax = np.min(u.data[:]), np.max(u.data[:])

print("Receiver wavefield min/max; %+12.6e %+12.6e" % (dmin, dmax))
print("Total    wavefield min/max; %+12.6e %+12.6e" % (umin, umax))
print("d.data.shape; ", d.data.shape)
print("u.data.shape; ", u.data.shape)

Operator `WOverQ_Operator` run in 0.01 s
Operator `padfunc` run in 0.01 s
Operator `padfunc` run in 0.01 s
Operator `IsoFwdOperator` run in 0.06 s


Receiver wavefield min/max; -2.209856e+01 +3.264765e+01
Total    wavefield min/max; -2.209856e+01 +3.450400e+01
d.data.shape;  (101, 251)
u.data.shape;  (101, 271, 271)


In [3]:
# NBVAL_IGNORE_OUTPUT

# Continuous integration hooks 
# We ensure the norm of these computed wavefields is repeatable
print(norm(d))
print(norm(u))
assert np.isclose(norm(d), 211.122, atol=0, rtol=1e-3)
assert np.isclose(norm(u), 466.577, atol=0, rtol=1e-3)

211.12176390411162
466.57703970006713


In [4]:
import gzip, lzma, bz2
import pprint  # for printing dictionaries below
import time    # for timing execution
from pathlib import Path # to get file size

# list of extensions
exts = ["raw", "gzip", "lzma", "bz2"]

# dictionary for compression, files, sizes, and times
cmps = {
  "raw": None,
  "gzip": gzip,
  "lzma": lzma,
  "bz2": bz2
}

files = {}
sizes = {}
times = {}

In [5]:
# use memoryview to provide a copy-free conversion to raw bytes
# mv = memoryview(u._data[nt-20:nt-1,:,:])
M = 20 # number time steps blocked for compression/serialization
mv = memoryview(u._data[nt-1-M:nt-1,:,:])
b = bytearray(mv)

print("u._data.shape; ", u._data.shape)
print("mv.shape;      ", mv.shape)

print("")
for ext in exts:
    if cmps[ext] == None:
        files[ext] = open("file." + ext, "wb")
    else:
        files[ext] = cmps[ext].open("file." + ext, "wb")
    t1 = time.perf_counter()
    files[ext].write(mv)
    t2 = time.perf_counter()
    files[ext].close()
    times[ext] = t2 - t1
    s = Path("file." + ext).stat().st_size / 1024**2
    sizes[ext] = s
    print("time; %8.4f sec -- size; %8.4f mb -- type; %s" % (times[ext], sizes[ext], ext))

u._data.shape;  (101, 287, 287)
mv.shape;       (20, 287, 287)

time;   0.0058 sec -- size;  12.5685 mb -- type; raw
time;   0.3018 sec -- size;   7.3336 mb -- type; gzip
time;   2.7395 sec -- size;   2.9805 mb -- type; lzma
time;   1.1551 sec -- size;   6.0650 mb -- type; bz2
