In [None]:
# This notebook runs preparation for SeismicGeometry tests and SeismicGeometry tests for different cube formats
import os
import sys
from datetime import date
import glob
import json
import nbformat
import numpy as np
from matplotlib import pyplot as plt
import shutil
from py.path import local
from tqdm.notebook import tqdm
import warnings

sys.path.append('../..') # for running py-script
sys.path.append('../../..') # for running this notebook directly
from seismiqb.batchflow.utils_notebook import run_notebook

warnings.filterwarnings("ignore")

In [None]:
# Defaults for run directly from this notebook
# DATESTAMP = date.today().strftime("%Y-%m-%d")
# NOTEBOOKS_DIR = './'
# DROP_EXTRA_FILES = True # drop files reffering to successful tests
# SHOW_TEST_ERROR_INFO = True # whether to show info about a test case with an error (if an error exists)
# SAVING_DIR = './geometry_test_files/'
# GITHUB_MODE = False

In [None]:
# Constants and parameters:
CUBE_NAME = f'test_cube_{DATESTAMP}.sgy'
FORMATS = ['sgy', 'hdf5', 'qhdf5', 'blosc', 'qblosc']

# Storage preparation:
# The `tmp` dir contains cube files: cube in different formats and meta
# The `notebooks` dir contains notebooks results (notebooks copies with outputs)

if GITHUB_MODE:
    _ = SAVING_DIR.mkdir("notebooks")
    _ = SAVING_DIR.mkdir("tmp")
    
else:
    # Clear and recreate workspace
    for folder_dir in ['tmp/', 'notebooks/']:
        dir_path = os.path.join(SAVING_DIR, folder_dir)

        try:
            shutil.rmtree(dir_path)
        except OSError as e:
            print(f"Can't delete the directory {dir_path} : {e.strerror}")
            
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    # if previous run failed than we need to delete corresponding timings
    failed_timings_file = glob.glob(str(SAVING_DIR) + 'timings*fail*.json')

    for file in failed_timings_file:
        try:
            os.remove(file)
        except OSError as e:
            print(f"Can't delete the file: {file} : {e.strerror}")

msg = DATESTAMP + '\n\n'

In [None]:
# Run the notebook with preparation for tests.
# It contains: data creation, data loading checking and cube conversion into different formats
out_path_ipynb = os.path.join(SAVING_DIR, f'notebooks/geometry_test_preparation_out_{DATESTAMP}.ipynb')

exec_info = run_notebook(
    path=os.path.join(NOTEBOOKS_DIR, 'geometry_test_preparation.ipynb'),
    nb_kwargs={
        'CUBE_NAME': CUBE_NAME,
        'CUBE_SHAPE': (1000, 200, 400),
        'SEED': 42,
        'DATESTAMP': DATESTAMP,
        'SAVING_DIR': SAVING_DIR
    },
    insert_pos=1,
    out_path_ipynb=out_path_ipynb,
    display_links=False
)

if exec_info is True:
    msg += 'Data was successfully prepared.\n'
else:
    msg += f'An ERROR occured in cell number {exec_info}:\n{out_path_ipynb}\n'
    
    if SHOW_TEST_ERROR_INFO:
        # Add error traceback into the message
        out_notebook = nbformat.read(out_path_ipynb, as_version=4)
        cell_info = out_notebook['cells'][exec_info + 1] # plus one because we inserted an additional cell

        for output in cell_info['outputs']:
                output_type = output.get('output_type', False)

                if output_type == 'error':
                    current_message += f"TRACEBACK: \n {traceback}\n"
                    traceback = output.get('traceback', None)
                    for line in traceback:
                        current_message += line
                    break

print(msg)

In [None]:
# Run the test notebook for the cube in each data format.
# It contains: checking data; attributes, slides, crops loading test, data loading timings and visualization tests.
timings = {}
all_OK = True

for f in tqdm(FORMATS):
    current_message = ''

    # Run and save the test notebook
    out_path_ipynb = os.path.join(SAVING_DIR, f'notebooks/geometry_test_data_format_{f.upper()}_out_{DATESTAMP}.ipynb')

    exec_info = run_notebook(
        path=os.path.join(NOTEBOOKS_DIR, 'geometry_test_data_format.ipynb'),
        nb_kwargs={
            'CUBE_NAME': CUBE_NAME.replace('sgy', f),
            'TEST_DIR': TEST_DIR,
            'N_SLIDE': 1000,
            'N_CROP': 300,
            'FIGSIZE': (12, 6),
            'SEED': 42,
            'DATESTAMP': DATESTAMP,
            'DROP_EXTRA_FILES': DROP_EXTRA_FILES,
            'SAVING_DIR': SAVING_DIR
        },
        insert_pos=1, 
        out_path_ipynb=out_path_ipynb,
        display_links=False
    )
    
    # Saving logs
    if exec_info is True:
        with open(os.path.join(SAVING_DIR, f'tmp/timings_{f}_{DATESTAMP}.json'), "r") as infile:
            timings.update(json.load(infile))

        # If everything is OK we can delete the test notebook
        if DROP_EXTRA_FILES:
            try:
                os.remove(out_path_ipynb)
            except OSError as e:
                print(f"Can't delete the file: {out_path_ipynb} : {e.strerror}")

        current_message += f'Notebook for {f.upper()} executed correctly and was deleted.\n'
    else:
        all_OK = False
        current_message += f'An ERROR occured in cell number {exec_info}:\n'

        if SHOW_TEST_ERROR_INFO:
            # Get first line in the error cell. It contains comment with a test case.
            out_notebook = nbformat.read(out_path_ipynb, as_version=4)
            cell_info = out_notebook['cells'][exec_info + 1] # plus one because we inserted an additional cell
            first_line = cell_info['source'].split('\n')[0]

            if first_line[0] == '#':
                # Prettifyings:
                if first_line[-1] == ':':
                    first_line = first_line[1:-1]
                else:
                    first_line = first_line[1:]

                current_message += f'Test \"{first_line.strip()}\" failure in:\n\n'
            
            # And add error traceback into the message
            for output in cell_info['outputs']:
                output_type = output.get('output_type', False)
                
                if output_type == 'error':
                    current_message += f"TRACEBACK: \n"
                    
                    traceback = output.get('traceback', None)
                    for line in traceback:
                        current_message += line
                    break


        current_message += f'{out_path_ipynb}\n\n'

    print(current_message)
    msg += current_message

In [None]:
# Check output
print(msg)
print(timings)

In [None]:
# Visualize timings
def plot_ax(dct, unit, title, ax):
    bars = ax.bar(dct.keys(), dct.values())
    ax.set_title(title, fontsize=20)
    ax.set_xlabel('Storage format', fontsize=16)
    ax.set_ylabel(unit, fontsize=16)
    return ax

fig, axs = plt.subplots(1, 2, figsize=(15, 6))
axs[0] = plot_ax(dct={key: value['slide']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Slide loading timings", ax=axs[0])
axs[1] = plot_ax(dct={key: value['crop']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Crop loading timings", ax=axs[1])
plt.show()

In [None]:
# Dump timings and message and remove extra files
if all_OK:
    timings['state'] = 'OK'

    if DROP_EXTRA_FILES:
        # Remove old timings
        timings_files = glob.glob(os.path.join(str(SAVING_DIR), 'timings*.json'))
        for file in timings_files:
            try:
                os.remove(file)
            except OSError as e:
                print(f"Can't delete the file: {file} : {e.strerror}")

        # Remove dirs with temporary files
        for folder_dir in ['tmp/', 'notebooks/']:
            dir_path = os.path.join(SAVING_DIR, folder_dir)

            try:
                shutil.rmtree(dir_path)
            except OSError as e:
                print(f"Can't delete the directory {dir_path} : {e.strerror}")

    timings_file_name = f'timings_{DATESTAMP}.json'   
else:
    timings['state'] = 'FAIL'
    
    if DROP_EXTRA_FILES:
        # Remove timings for each data format
        timings_files = glob.glob(str(SAVING_DIR) + 'tmp/timings*')

        for file_name in timings_files:
            try:
                os.remove(file_name)
            except OSError as e:
                print(f"Can't delete the file: {file_name} : {e.strerror}")

    timings_file_name = f'timings_fail_{DATESTAMP}.json'

    
if not GITHUB_MODE:
    SAVING_RES_DIR = TEST_DIR
else:
    SAVING_RES_DIR = SAVING_DIR

# Dump timings            
with open(os.path.join(SAVING_RES_DIR, timings_file_name), "w") as outfile:
    json.dump(timings, outfile)

# Message: drop old and save new
msg_files = glob.glob(os.path.join(str(SAVING_RES_DIR), 'message*.txt'))

for file in msg_files:
    try:
        os.remove(file)
    except OSError as e:
        print(f"Can't delete the file: {file} : {e.strerror}")

with open(os.path.join(SAVING_RES_DIR, f'message_{DATESTAMP}.txt'), "w") as outfile:
    outfile.write(msg)