This code is an example of how to create a dataset for the FFN training.

In [1]:
%matplotlib notebook

import os
from typing import Callable

import matplotlib.pyplot as plt
import numpy as np
import yaml
from rockml.data.adapter import Datum
from rockml.data.adapter.seismic.segy import PostStackDataDumper
from rockml.data.adapter.seismic.segy.poststack import PostStackAdapter2D, PostStackDatum
from rockml.data.array_ops import crop_2d
from rockml.data.pipeline import Pipeline
from rockml.data.sampling import split_dataset
from rockml.data.transformations import Composer, Transformation
from rockml.data.transformations.seismic import image

def makedir(path: str):
    """ Wrapper to create all the necessary directories in *path* with user/group rights to
        read, write and execute.
    Args:
        path: (str) path to the directory to be created.
    """

    os.makedirs(path, mode=0o770, exist_ok=True)

In [2]:
params = dict()
params['output_path'] = '/Users/sallesd/Projects/f3_ffn_db'
params['segy_info'] = yaml.safe_load(
    open('/Users/sallesd/Projects/f3_info/info.yml')
)
params['train_slices'] = yaml.safe_load(
    open('/Users/sallesd/Projects/train_80.yml')
)
params['test_slices'] = yaml.safe_load(
    open('/Users/sallesd/Projects/test.yml')
)
params['tile_shape'] = (64, 64)
params['stride_shape'] = (8, 8)
params['gray_levels'] = 256
params['crop'] = (0, 0, 75, 0)
params['percentile'] = 5.0
params['valid_ratio'] = 0.1
params['cores'] = 8

params['horizons_path_list'] = [
    line['path'] for line in params['segy_info']['horizon_pixel_ranges']
]
makedir(params['output_path'])

adapter = PostStackAdapter2D(
    segy_path=params['segy_info']['segy_path'],
    horizons_path_list=params['horizons_path_list'],
    data_dict=params['train_slices']
)
adapter.initial_scan()

{'range_inlines': [100, 750],
 'range_crosslines': [300, 1250],
 'num_inlines': 651,
 'num_crosslines': 951,
 'range_time_depth': [0, 1848.0],
 'num_time_depth': 463,
 'res_inline': 1,
 'res_crossline': 1,
 'res_time_depth': 4.0,
 'range_x': [6054167, 6295763],
 'range_y': [60735564, 60904632]}

In [3]:
class Lambda(Transformation):
    def __init__(self, function: Callable[[Datum], Datum], **kwargs):
        self.function = function
        self.kwargs = kwargs

    def __call__(self, dataset: Datum) -> Datum:
        return self.function(dataset, **self.kwargs)

def filter_centered_tiles(datum: PostStackDatum) -> PostStackDatum:
    allow = 2
    vcrop = datum.label.shape[0] // 2 - allow
    hcrop = datum.label.shape[1] // 2 - allow
    if np.sum(datum.label[vcrop:-vcrop, hcrop:-hcrop]) != 0:
        return datum


def merge_feat_centered_label(datum: PostStackDatum,
                              l_crop: int = 0,
                              r_crop: int = 0,
                              t_crop: int = 0,
                              b_crop: int = 0) -> PostStackDatum:
    assert datum.features.shape[:-1] == datum.label.shape

    b_idx = datum.label.shape[0] - b_crop
    r_idx = datum.label.shape[1] - r_crop
    new_feat = np.zeros(datum.label.shape, dtype=np.float32)
    new_feat[t_crop:b_idx, l_crop:r_idx] += crop_2d(datum.label, t_crop, b_crop, l_crop, r_crop)
    new_feat = np.expand_dims(new_feat, axis=-1)
    datum.features = np.squeeze(np.stack((datum.features, new_feat), axis=-1))
    return datum

Defining the transformation composition for the first dataset. This dataset contains
features with only the central information of the label. We use such a dataset to train
our model for a few epochs in the beginning. We call this dataset as DB01.

In [4]:
pre_proc = [
    image.Crop2D(
        crop_left=params['crop'][0],
        crop_right=params['crop'][1],
        crop_top=params['crop'][2],
        crop_bottom=params['crop'][3]
    ),
    image.ScaleIntensity(
        gray_levels=params['gray_levels'],
        percentile=params['percentile']
    ),
    image.BinarizeMask(),
    image.ThickenLinesMask(n_points=1),
    image.ViewAsWindows(
        tile_shape=params['tile_shape'],
        stride_shape=params['stride_shape']
    ),
    Lambda(function=filter_centered_tiles),
]

db01_transform = Lambda(
    function=merge_feat_centered_label,
    t_crop=params['tile_shape'][0] // 2 - 5,
    b_crop=params['tile_shape'][0] // 2 - 5,
    l_crop=params['tile_shape'][1] // 2 - 5,
    r_crop=params['tile_shape'][1] // 2 - 5,
)

composer = Composer(transformations=pre_proc + [db01_transform])
pipeline = Pipeline(composer=composer)

In [5]:
db01 = pipeline.build_dataset(
    data_adapter=adapter,
    num_blocks=params['cores'],
    cores=params['cores']
)
num_classes = len(np.unique([t.label for t in db01]))

# Getting validation set from the training tiles
train_db01, valid_db01 = split_dataset(
    dataset=db01,
    valid_ratio=params['valid_ratio']
)

In [6]:
plt.imshow((train_db01[123].label * 250).astype(np.uint8))
plt.imshow((train_db01[123].features[:, :, 0]).astype(np.uint8))
plt.imshow((train_db01[123].features[:, :, 1] * 250).astype(np.uint8))

plt.show()

<IPython.core.display.Javascript object>

Defining the transformation composition for the second dataset. This dataset contains
features with part information of the label. We use such a dataset to train
our model for the rest of the epochs. We call this dataset as DB02.

In [7]:
db02_transform = Lambda(
    function=merge_feat_centered_label,
    r_crop=params['stride_shape'][1],
    t_crop=params['tile_shape'][0] // 2 - 5,
    b_crop=params['tile_shape'][0] // 2 - 5,
)

composer = Composer(transformations=pre_proc + [db02_transform])
pipeline = Pipeline(composer=composer)

In [8]:
db02 = pipeline.build_dataset(
    data_adapter=adapter,
    num_blocks=params['cores'],
    cores=params['cores']
)
num_classes = len(np.unique([t.label for t in db02]))

# Getting validation set from the training tiles
train_db02, valid_db02 = split_dataset(
    dataset=db02,
    valid_ratio=params['valid_ratio']
)

In [9]:
PostStackDataDumper.to_hdf(
    train_db01,
    os.path.join(params['output_path'], 'train_db01.hdf')
)
PostStackDataDumper.to_hdf(
    valid_db01,
    os.path.join(params['output_path'], 'valid_db01.hdf')
)
PostStackDataDumper.to_hdf(
    train_db02,
    os.path.join(params['output_path'], 'train_db02.hdf')
)
PostStackDataDumper.to_hdf(
    valid_db02,
    os.path.join(params['output_path'], 'valid_db02.hdf')
)
