In [None]:
# This notebook runs preparation for SeismicGeometry tests and SeismicGeometry tests for different cube formats
import os
import sys
from datetime import date
import warnings
warnings.filterwarnings("ignore")

import glob
import json
import pprint
from shutil import rmtree
import nbformat
from matplotlib import pyplot as plt
from py.path import local
from tqdm.notebook import tqdm

sys.path.append('../..') # for running tests py-script
sys.path.append('../../..') # for running this notebook directly

from seismiqb.batchflow.utils_notebook import run_notebook
from seismiqb.tests.utils import extract_traceback

In [None]:
""" The behaviour of the test is parametrized by the following constants:

DATESTAMP : str
    Execution date in "YYYY-MM-DD" format.
    Used for saving notebooks executions and temporary files.
NOTEBOOKS_DIR : str
    Path to the directory with test .ipynb files.
LOGS_DIR : str
    Path to the directory with test logs (timings, message).
OUTPUT_DIR : str
    Path to the directory for saving results and temporary files
    (executed notebooks, logs, data files like cubes, etc.).

And you can manage test running with parameters:

USE_TMP_OUTPUT_DIR: bool
    Whether to use pytest tmpdir as a workspace.
    If True, then all files are saved in temporary directories.
    If False, then all files are saved in local directories.
REMOVE_OUTDATED_FILES: bool
    Whether to remove outdated files which relate to previous executions.
REMOVE_EXTRA_FILES : bool
    Whether to remove extra files after execution.
    Extra files are temporary files and execution savings that relate to successful tests.
SHOW_TEST_ERROR_INFO : bool
    Whether to show error traceback in outputs.
    Notice that it only works with SHOW_MESSAGE = True.
"""

# Workspace constants
DATESTAMP = date.today().strftime("%Y-%m-%d")
NOTEBOOKS_DIR = './'
LOGS_DIR = './geometry_test_files/'
OUTPUT_DIR = LOGS_DIR

# Execution parameters
USE_TMP_OUTPUT_DIR = False
REMOVE_OUTDATED_FILES = True
REMOVE_EXTRA_FILES = True
SHOW_TEST_ERROR_INFO = True

# Preparation

Create directories for files and create a fake cube and save it.

**Storage structure:**
___



**geometry_test_files** (tests root directory)

&emsp;├── **notebooks** (directory with notebooks executions)

&emsp;│&emsp;&emsp;├── **geometry_test_data_format_{DATAFORMAT}_out.ipynb**

&emsp;│&emsp;&emsp;└── **geometry_test_preparation_out.ipynb**

&emsp;├── **tmp** (directory with temporary files)

&emsp;│&emsp;&emsp;├── **test_array.npy**

&emsp;│&emsp;&emsp;├── **test_cube.{DATAFORMAT}**

&emsp;│&emsp;&emsp;└── **timings_{DATAFORMAT}.json**

&emsp;├── **message.txt** (file with output message)

&emsp;└── **timings.json** (file with loading data slides and crops timings)

---
**{DATAFORMAT}** means each data format from **FORMATS** list

In [None]:
# Constants and parameters:
CUBE_NAME = f'test_cube_{DATESTAMP}.sgy'
FORMATS = ['sgy', 'hdf5', 'qhdf5', 'blosc', 'qblosc']

# Storage preparation
if not USE_TMP_OUTPUT_DIR:
    # Clear and (re)create local workspace
    for folder_dir in ['tmp/', 'notebooks/']:
        dir_path = os.path.join(OUTPUT_DIR, folder_dir)

        if REMOVE_OUTDATED_FILES:
            try:
                rmtree(dir_path)
            except OSError as e:
                print(f"Can't delete the directory {dir_path} : {e.strerror}")

        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    if REMOVE_OUTDATED_FILES:
        # If previous run failed than we need to delete corresponding timings
        failed_timings_file = glob.glob(str(OUTPUT_DIR) + 'timings*fail*.json')

        for file in failed_timings_file:
            try:
                os.remove(file)
            except OSError as e:
                print(f"Can't delete the file: {file} : {e.strerror}")

msg = DATESTAMP + '\n\n'

In [None]:
# Run the notebook with preparation for tests.
# It contains: data creation, data loading checking and cube conversion into different formats

all_OK = True
timings = {}

out_path_ipynb = os.path.join(
    OUTPUT_DIR,
    f'notebooks/geometry_test_preparation_out_{DATESTAMP}.ipynb'
)

exec_info = run_notebook(
    path=os.path.join(NOTEBOOKS_DIR, 'geometry_test_preparation.ipynb'),
    nb_kwargs={
        # Workspace constants
        'DATESTAMP': DATESTAMP,
        'OUTPUT_DIR': OUTPUT_DIR,

        # Data creation parameters
        'CUBE_NAME': CUBE_NAME,
        'CUBE_SHAPE': (1000, 200, 400),
        'SEED': 42
    },
    insert_pos=2,
    out_path_ipynb=out_path_ipynb,
    display_links=False
)

if exec_info is True:
    msg += 'Data was successfully prepared.\n'

else:
    all_OK = False
    msg += f'An ERROR occured in cell number {exec_info}:\n{out_path_ipynb}\n'

    if SHOW_TEST_ERROR_INFO:
        # Add error traceback into the message
        cell_num = exec_info + 1 # plus one because we inserted a markdown cell
        msg += extract_traceback(path_ipynb=out_path_ipynb, cell_num=cell_num)

print(msg)

# Data format tests

In [None]:
# Run the test notebook for the cube in each data format.
# It contains: checking data; attributes, slides, crops loading test, data loading timings and visualization tests.
if all_OK:

    for f in tqdm(FORMATS):
        current_message = ''

        # Run and save the test notebook
        out_path_ipynb = os.path.join(
            OUTPUT_DIR,
            f'notebooks/geometry_test_data_format_{f.upper()}_out_{DATESTAMP}.ipynb'
        )

        exec_info = run_notebook(
            path=os.path.join(NOTEBOOKS_DIR, 'geometry_test_data_format.ipynb'),
            nb_kwargs={
                # Workspace constants
                'DATESTAMP': DATESTAMP,
                'LOGS_DIR': LOGS_DIR,
                'OUTPUT_DIR': OUTPUT_DIR,

                # Tests parameters
                'CUBE_NAME': CUBE_NAME.replace('sgy', f),
                'N_SLIDE': 1000,
                'N_CROP': 300,
                'SEED': 42,

                # Visualization parameters
                'FIGSIZE': (12, 6),

                # Tests running parameters
                'REMOVE_EXTRA_FILES': REMOVE_EXTRA_FILES
            },
            insert_pos=2,
            out_path_ipynb=out_path_ipynb,
            display_links=False
        )

        # Saving logs
        if exec_info is True:
            with open(os.path.join(OUTPUT_DIR, f'tmp/timings_{f}_{DATESTAMP}.json'), "r") as infile:
                timings.update(json.load(infile))

            # If everything is OK we can delete the test notebook
            if REMOVE_EXTRA_FILES:
                try:
                    os.remove(out_path_ipynb)
                except OSError as e:
                    print(f"Can't delete the file: {out_path_ipynb} : {e.strerror}")

            current_message += f'Notebook for {f.upper()} executed correctly and was deleted.\n'
        else:
            all_OK = False
            current_message += f'An ERROR occured in cell number {exec_info}:\n'

            if SHOW_TEST_ERROR_INFO:
                # Get first line in the error cell. It contains comment with a test case.
                out_notebook = nbformat.read(out_path_ipynb, as_version=4)

                # And add error traceback into the message
                cell_num = exec_info + 1 # plus one because we inserted a markdown cell
                msg += extract_traceback(path_ipynb=out_path_ipynb, cell_num=cell_num)


            current_message += f'{out_path_ipynb}\n\n'

        print(current_message)
        msg += current_message

In [None]:
# Check output
print(msg)

pp = pprint.PrettyPrinter()
pp.pprint(timings)

In [None]:
# Visualize timings
def plot_ax(dct, unit, title, ax):
    """ Plot axis for a benchmark unit. """
    _ = ax.bar(dct.keys(), dct.values())
    ax.set_title(title, fontsize=20)
    ax.set_xlabel('Storage format', fontsize=16)
    ax.set_ylabel(unit, fontsize=16)
    return ax

fig, axs = plt.subplots(1, 2, figsize=(15, 6))
axs[0] = plot_ax(dct={key: value['slide']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Slide loading timings", ax=axs[0])
axs[1] = plot_ax(dct={key: value['crop']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Crop loading timings", ax=axs[1])
plt.show()

# Exit

In [None]:
# Dump timings and message and remove extra files
if all_OK:
    timings['state'] = 'OK'

    if REMOVE_OUTDATED_FILES:
        # Remove old timings
        timings_files = glob.glob(os.path.join(str(OUTPUT_DIR), 'timings*.json'))

        for file in timings_files:
            try:
                os.remove(file)
            except OSError as e:
                print(f"Can't delete the file: {file} : {e.strerror}")

    if REMOVE_EXTRA_FILES:
        # Remove dirs with temporary files
        for folder_dir in ['tmp/', 'notebooks/']:
            dir_path = os.path.join(OUTPUT_DIR, folder_dir)

            try:
                rmtree(dir_path)
            except OSError as e:
                print(f"Can't delete the directory {dir_path} : {e.strerror}")

    timings_file_name = f'timings_{DATESTAMP}.json'

else:
    timings['state'] = 'FAIL'

    if REMOVE_EXTRA_FILES:
        # Remove timings for each data format
        timings_files = glob.glob(str(OUTPUT_DIR) + 'tmp/timings*')

        for file_name in timings_files:
            try:
                os.remove(file_name)
            except OSError as e:
                print(f"Can't delete the file: {file_name} : {e.strerror}")

    timings_file_name = f'timings_fail_{DATESTAMP}.json'

# Dump timings            
with open(os.path.join(OUTPUT_DIR, timings_file_name), "w") as outfile:
    json.dump(timings, outfile)

# Message: remove old and save new
if REMOVE_OUTDATED_FILES:
    msg_files = glob.glob(os.path.join(str(OUTPUT_DIR), 'message*.txt'))

    for file in msg_files:
        try:
            os.remove(file)
        except OSError as e:
            print(f"Can't delete the file: {file} : {e.strerror}")

with open(os.path.join(OUTPUT_DIR, f'message_{DATESTAMP}.txt'), "w") as outfile:
    outfile.write(msg)