In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image, ImageOps

import random
import os
import json
import math

matplotlib.rcParams['figure.figsize'] = (11.75, 8.5)

In [2]:
def pad_data(data, divisor=16):
    shape_y, shape_x = data.shape

    target_y_size = math.ceil(shape_y / divisor) * divisor
    target_x_size = math.ceil(shape_x / divisor) * divisor

    padding_top = round((target_y_size - shape_y) / 2)
    padding_bottom = target_y_size - shape_y - padding_top
    padding_left = round((target_x_size - shape_x) / 2)
    padding_right = target_x_size - shape_x - padding_left

    return np.pad(data, pad_width=((padding_top, padding_bottom), (padding_left, padding_right)), mode='edge'), [padding_top, padding_bottom, padding_left, padding_right]

In [3]:
DIVISOR = 32
RAW_PATH = os.path.join('data', 'raw')
SAVE_PATH = os.path.join('data', 'complete')

files_for_stages = {
    'train': [['seistrain1.npz', 'faulttrain1.npz'],
              ['seistrain2.npz', 'faulttrain2.npz'],
              ['seistrain3.npz', 'faulttrain3.npz'],
              ['seistrain4.npz', 'faulttrain4.npz'],
              ['seistrain5.npz', 'faulttrain5.npz'],
              ['seistrain6.npz', 'faulttrain6.npz'],
              ['seistrain7.npz', 'faulttrain7.npz'],
              ['seistrain8.npz', 'faulttrain8.npz'],
              ['seistrain9.npz', 'faulttrain9.npz']],
    'eval': [['seisval1.npz', 'faultval1.npz']],
    'val': [['seistest1.npz', 'faulttest1.npz']]
}

for stage in ('train', 'eval', 'val'):
    fault_path = os.path.join(SAVE_PATH, stage, 'fault')
    seis_path = os.path.join(SAVE_PATH, stage, 'seis')
    img_cnt = 0
    metadata = []
    for data_names in files_for_stages[stage]:
        print(data_names)
        data = np.load(os.path.join(RAW_PATH, data_names[0]))['arr_0'].T
        min_data_value = np.min(data)
        max_data_value = np.max(data)
        data = (data - min_data_value) / (max_data_value - min_data_value) * 255

        labels = np.load(os.path.join(RAW_PATH, data_names[1]))['arr_0'].T
        assert data.shape == labels.shape

        for horizon_num in range(data.shape[2]):
            data_slice = data[:,:,horizon_num]
            labels_slice = labels[:,:,horizon_num]

            timelines, xlines,  = np.shape(data_slice)
            data_slice, data_padding = pad_data(data_slice, DIVISOR)
            labels_slice, labels_padding = pad_data(labels_slice, DIVISOR)
            labels_slice = labels_slice * 255

            data_img = ImageOps.grayscale(Image.fromarray(data_slice))
            label_img = ImageOps.grayscale(Image.fromarray(labels_slice))
            metadata.append({
                'data': f'{img_cnt}.jpeg',
                'label': f'{img_cnt}.jpeg',
                'padding': data_padding
            })
            data_img.save(os.path.join(seis_path, f'{img_cnt}.jpeg'))
            label_img.save(os.path.join(fault_path, f'{img_cnt}.jpeg'))
            img_cnt += 1
    with open(os.path.join(SAVE_PATH, stage, 'metadata.json'), 'w') as file:
        json.dump(metadata, file)

['seistrain1.npz', 'faulttrain1.npz']
['seistrain2.npz', 'faulttrain2.npz']
['seistrain3.npz', 'faulttrain3.npz']
['seistrain4.npz', 'faulttrain4.npz']
['seistrain5.npz', 'faulttrain5.npz']
['seistrain6.npz', 'faulttrain6.npz']
['seistrain7.npz', 'faulttrain7.npz']
['seistrain8.npz', 'faulttrain8.npz']
['seistrain9.npz', 'faulttrain9.npz']
['seisval1.npz', 'faultval1.npz']
['seistest1.npz', 'faulttest1.npz']
