In [1]:
import numpy as np
from PIL import Image, ImageOps

import random
import os
import json

In [2]:
WINDOW_SIZE = 512
WINDOW_THRESHOLD = 2000
WINDOWS_PER_SLICE = 4
RAW_PATH = os.path.join('..', '2d unet full data', 'data', 'raw')
SAVE_PATH = os.path.join('data_png', '512window')

files_for_stages = {
    'train': [['seistrain1.npz', 'faulttrain1.npz'],
              ['seistrain2.npz', 'faulttrain2.npz'],
              ['seistrain3.npz', 'faulttrain3.npz'],
              ['seistrain4.npz', 'faulttrain4.npz'],
              ['seistrain5.npz', 'faulttrain5.npz'],
              ['seistrain6.npz', 'faulttrain6.npz'],
              ['seistrain7.npz', 'faulttrain7.npz'],
              ['seistrain8.npz', 'faulttrain8.npz'],
              ['seistrain9.npz', 'faulttrain9.npz']],
    'eval': [['seisval1.npz', 'faultval1.npz']]
}

for stage in ('train', 'eval'):
    fault_path = os.path.join(SAVE_PATH, stage, 'fault')
    seis_path = os.path.join(SAVE_PATH, stage, 'seis')
    img_cnt = 0
    metadata = []
    for data_names in files_for_stages[stage]:
        print(data_names)
        data = np.load(os.path.join(RAW_PATH, data_names[0]))['arr_0'].T
        min_data_value = np.min(data)
        max_data_value = np.max(data)
        data = (data - min_data_value) / (max_data_value - min_data_value) * 255

        labels = np.load(os.path.join(RAW_PATH, data_names[1]))['arr_0'].T
        assert data.shape == labels.shape

        for horizon_num in range(data.shape[2]):
            data_slice = data[:,:,horizon_num]
            labels_slice = labels[:,:,horizon_num]
            timelines, xlines,  = np.shape(data_slice)
            for window_num in range(WINDOWS_PER_SLICE):
                while True:
                    random_xline = random.randint(0, xlines - WINDOW_SIZE)
                    random_timeline = random.randint(0, timelines - WINDOW_SIZE)
                    random_labels_window = labels_slice[random_timeline:random_timeline+WINDOW_SIZE, random_xline:random_xline+WINDOW_SIZE]
                    if np.sum(random_labels_window) > WINDOW_THRESHOLD:
                        break
                random_data_window = data_slice[random_timeline:random_timeline+WINDOW_SIZE, random_xline:random_xline+WINDOW_SIZE]
                metadata.append({
                    'data': f'{img_cnt}.png',
                    'label': f'{img_cnt}.png'
                })
                data_img = ImageOps.grayscale(Image.fromarray(random_data_window))
                label_img = ImageOps.grayscale(Image.fromarray(random_labels_window * 255))
                data_img.save(os.path.join(seis_path, f'{img_cnt}.png'))
                label_img.save(os.path.join(fault_path, f'{img_cnt}.png'))
                img_cnt += 1
    with open(os.path.join(SAVE_PATH, stage, 'metadata.json'), 'w') as file:
        json.dump(metadata, file)

['seistrain1.npz', 'faulttrain1.npz']
['seistrain2.npz', 'faulttrain2.npz']
['seistrain3.npz', 'faulttrain3.npz']
['seistrain4.npz', 'faulttrain4.npz']
['seistrain5.npz', 'faulttrain5.npz']
['seistrain6.npz', 'faulttrain6.npz']
['seistrain7.npz', 'faulttrain7.npz']
['seistrain8.npz', 'faulttrain8.npz']
['seistrain9.npz', 'faulttrain9.npz']
['seisval1.npz', 'faultval1.npz']


In [17]:
import cv2

In [18]:
faults_image = np.expand_dims(cv2.imread('data_png/512window/train/fault/38.png', cv2.IMREAD_GRAYSCALE), axis=0)
seis_image = np.expand_dims(cv2.imread('data_png/512window/train/seis/38.png', cv2.IMREAD_GRAYSCALE), axis=0)
print(np.shape(faults_image))
print(np.shape(seis_image))
faults_image = np.expand_dims(cv2.imread('data/512window/train/fault/38.jpeg', cv2.IMREAD_GRAYSCALE), axis=0)
seis_image = np.expand_dims(cv2.imread('data/512window/train/seis/38.jpeg', cv2.IMREAD_GRAYSCALE), axis=0)
print(np.shape(faults_image))
print(np.shape(seis_image))

(1, 512, 512)
(1, 512, 512)
(1, 512, 512)
(1, 512, 512)
