# Cube conversion to HDF5

This notebooks creates a `HDF5` version of each `SEG-Y` cube.

Pseudocode of this notebook looks like:

```python
for each cube:
    mkdir
    infer geometry
    if SHOW, log to std.out
    
    convert segy to hdf5
```

* The parameter `paths` controls which cubes are converted
* `RECREATE` determines whether already converted volumes are re-converted
* `SHOW` allows to control whether results are shown in the notebook itself
* `DRY` can be used to check which operations will happen, without actually executing them

In [1]:
import os
import sys
import warnings
from tqdm.auto import tqdm
from glob import glob
import matplotlib.pyplot as plt

sys.path.append('..')
from seismiqb import SeismicGeometry, plot_image

In [2]:
from IPython.display import display
from ipywidgets import Checkbox, Output

In [4]:
DEFAULT_PATH = "/data/seismic_data/seismic_interpretation"
paths = sorted(glob(f'{DEFAULT_PATH}/*_*/*.sgy'))
names = [path[path.rfind('/')+1:path.rfind('.')] for path in paths]
cube_boxes = {'All': Checkbox(False, description='All'), **{name: Checkbox(False, description=name) for name in names}}
cube_boxes['All'].friends = [cube_boxes[name] for name in names]
cube_boxes['All'].observe(lambda x: [setattr(friend, 'value', x['owner'].value) for friend in x['owner'].friends], 'value')
display(*cube_boxes.values())

Checkbox(value=False, description='All')

Checkbox(value=False, description='001_ETP')

Checkbox(value=False, description='006_TAYLAKOVSKY')

Checkbox(value=False, description='028_SAMBURG')

Checkbox(value=False, description='031_CHIST')

Checkbox(value=False, description='032_MYUG')

Checkbox(value=False, description='033_SAMBURG')

Checkbox(value=False, description='034_UKM')

Checkbox(value=False, description='038_ACHIM')

Checkbox(value=False, description='039_YY')

Checkbox(value=False, description='040_KET')

Checkbox(value=False, description='041_UJNOE')

Checkbox(value=False, description='042_ZAPZIM')

Checkbox(value=False, description='043_KARAM')

Checkbox(value=False, description='046_YETYPUR')

Checkbox(value=False, description='047_VYNGYPUR')

Checkbox(value=False, description='048_VENGAYKHA')

Checkbox(value=False, description='049_ZAPZIM')

Checkbox(value=False, description='058_VYNGYPUR')

Checkbox(value=False, description='059_NVT')

Checkbox(value=False, description='062_VYNGYPUR')

Checkbox(value=False, description='063_VYNGYPUR')

Checkbox(value=False, description='064_SALYM')

Checkbox(value=False, description='065_SALYM')

In [5]:
RECREATE = Checkbox(False, description='RECREATE')
SHOW = Checkbox(True, description='SHOW')
DRY_RUN = Checkbox(True, description='DRY RUN')

display(RECREATE, SHOW, DRY_RUN)

Checkbox(value=False, description='RECREATE')

Checkbox(value=True, description='SHOW')

Checkbox(value=True, description='DRY RUN')

In [7]:
%%time
checked_paths = [paths[names.index(name)] for name in names if cube_boxes[name].value]
for path_cube in tqdm(checked_paths, ncols=1100):
    if not os.path.exists(path_cube):
        continue
    
    hdf5_path_cube = '.'.join((os.path.splitext(path_cube)[0], 'hdf5'))
    if os.path.exists(hdf5_path_cube) and not RECREATE.value:
        continue
    
    if DRY_RUN.value:
        print('Will convert :::', path_cube)
        continue
    
    if SHOW.value:
        print('▆'*55); print('▆'*55);
        print('Working with', path_cube)

    geometry = SeismicGeometry(
        path_cube,
        headers=SeismicGeometry.HEADERS_POST_FULL,
        index_headers = SeismicGeometry.INDEX_POST,
        collect_stats=True, spatial=True, recollect=True
    )
    qmap = geometry.quality_map

    if SHOW.value:
        print(geometry)
        plot_image(qmap, cmap='Reds')
        plt.show()
    
    geometry.make_hdf5()

  0%|                                                                                                         …

▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
Working with /data/seismic_data/seismic_interpretation/038_ACHIM/038_ACHIM.sgy


Finding min/max:   0%|                                                                                        …

Collecting stats for 038_ACHIM:   0%|                                                                         …

KeyboardInterrupt: 