## SEGY Denoising Inference

Create a script to perform inference on SEGYs.
1. Generate a seisnc corresponding to each SEGY
2. Perform inferenc on seisnc, per inline 
3. Write a new denoised seisnc 
4. Write a new SEGY from seisnc

In [29]:
import os
import sys 
sys.path.append('../models/')
sys.path.append('../utils/')

import numpy as np
import xarray as xr 

from dask.distributed import Client
import matplotlib.pyplot as plt

import torch

import warnings 
warnings.filterwarnings('ignore')

from segy_input import load_seisnc, get_config , derive_byte_locations, generate_xarray_from_segy, load_seisnc
from denoising_autoencoder_xarray import LitAutoEncoder

In [38]:
model_checkpoint_path = '/local1/workspace/random_denoising/scripts/..\lightning_logs/autoencoder/dae_noise_0.5_23_01_2023_16_09_36/checkpoints/epoch=0-step=10648.ckpt'
noisy_segy_path = '/local1/workspace/random_denoising/data/mp41b_pstm_stk_rnd/MP41B_PSTM_STK_RNDnoisy_0.1.sgy'

CONFIG_PATH = '../config/config_random_denoising.yaml'

noisy_seisnc_path = noisy_segy_path.split('.sgy')[0] + '.seisnc'
denoised_seisnc_path = noisy_segy_path.split('.sgy')[0] + 'denoised.seisnc'
denoised_segy_path = noisy_segy_path.split('.sgy')[0] + 'denoised.segy'

In [31]:
# config file 
config = get_config(CONFIG_PATH)

# dask client 
client  = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:38235/status,

0,1
Dashboard: http://127.0.0.1:38235/status,Workers: 11
Total threads: 88,Total memory: 251.66 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35106,Workers: 11
Dashboard: http://127.0.0.1:38235/status,Total threads: 88
Started: Just now,Total memory: 251.66 GiB

0,1
Comm: tcp://127.0.0.1:43414,Total threads: 8
Dashboard: http://127.0.0.1:32958/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:44367,
Local directory: /tmp/dask-worker-space/worker-c9lti4k0,Local directory: /tmp/dask-worker-space/worker-c9lti4k0

0,1
Comm: tcp://127.0.0.1:45364,Total threads: 8
Dashboard: http://127.0.0.1:36804/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:44589,
Local directory: /tmp/dask-worker-space/worker-w5tts4ch,Local directory: /tmp/dask-worker-space/worker-w5tts4ch

0,1
Comm: tcp://127.0.0.1:42539,Total threads: 8
Dashboard: http://127.0.0.1:33907/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:39299,
Local directory: /tmp/dask-worker-space/worker-4bfwxzrm,Local directory: /tmp/dask-worker-space/worker-4bfwxzrm

0,1
Comm: tcp://127.0.0.1:42917,Total threads: 8
Dashboard: http://127.0.0.1:43784/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:34906,
Local directory: /tmp/dask-worker-space/worker-5cm2pjz3,Local directory: /tmp/dask-worker-space/worker-5cm2pjz3

0,1
Comm: tcp://127.0.0.1:35667,Total threads: 8
Dashboard: http://127.0.0.1:37448/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:39111,
Local directory: /tmp/dask-worker-space/worker-6ujm4mxd,Local directory: /tmp/dask-worker-space/worker-6ujm4mxd

0,1
Comm: tcp://127.0.0.1:35091,Total threads: 8
Dashboard: http://127.0.0.1:43383/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:39024,
Local directory: /tmp/dask-worker-space/worker-smkgc6r8,Local directory: /tmp/dask-worker-space/worker-smkgc6r8

0,1
Comm: tcp://127.0.0.1:41920,Total threads: 8
Dashboard: http://127.0.0.1:34304/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:33918,
Local directory: /tmp/dask-worker-space/worker-_hq15jdh,Local directory: /tmp/dask-worker-space/worker-_hq15jdh

0,1
Comm: tcp://127.0.0.1:36899,Total threads: 8
Dashboard: http://127.0.0.1:42247/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:41295,
Local directory: /tmp/dask-worker-space/worker-y1wmanqu,Local directory: /tmp/dask-worker-space/worker-y1wmanqu

0,1
Comm: tcp://127.0.0.1:40066,Total threads: 8
Dashboard: http://127.0.0.1:33937/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:33562,
Local directory: /tmp/dask-worker-space/worker-i1utld42,Local directory: /tmp/dask-worker-space/worker-i1utld42

0,1
Comm: tcp://127.0.0.1:38153,Total threads: 8
Dashboard: http://127.0.0.1:40201/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:46544,
Local directory: /tmp/dask-worker-space/worker-g6bq1i44,Local directory: /tmp/dask-worker-space/worker-g6bq1i44

0,1
Comm: tcp://127.0.0.1:40056,Total threads: 8
Dashboard: http://127.0.0.1:37700/status,Memory: 22.88 GiB
Nanny: tcp://127.0.0.1:35014,
Local directory: /tmp/dask-worker-space/worker-ht_le486,Local directory: /tmp/dask-worker-space/worker-ht_le486


In [32]:
# load segy , check if seisnc exists, if not, convert to seisnc 

if not os.path.exists(noisy_seisnc_path) :
    byte_locations = derive_byte_locations(noisy_segy_path, defaults=config['byte_locations'])
    generate_xarray_from_segy(noisy_segy_path, noisy_seisnc_path, byte_locations)


In [33]:
model = LitAutoEncoder().load_from_checkpoint(model_checkpoint_path).to('cuda')
model.eval()

LitAutoEncoder(
  (encoder): Encoder(
    (l1): Sequential(
      (0): Conv2d(1, 48, kernel_size=(4, 4), stride=(1, 1), padding=same, bias=False)
      (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): LeakyReLU(negative_slope=0.2)
      (4): Dropout(p=0.25, inplace=False)
      (5): Conv2d(48, 32, kernel_size=(4, 4), stride=(1, 1), padding=same, bias=False)
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): LeakyReLU(negative_slope=0.2)
      (9): Dropout(p=0.25, inplace=False)
      (10): Conv2d(32, 16, kernel_size=(4, 4), stride=(1, 1), padding=same, bias=False)
      (11): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (12): LeakyReLU(negative_slope=0.2)
      (13): Dropout(p=0.2

In [34]:
# inference function 



noisy_seisnc = load_seisnc(noisy_seisnc_path)


def inference(iline_ds) : # dataset

    print('Denoising iline #', iline_ds['iline'].values)
    source = iline_ds # preserve dataset

    coords = {
        'twt' :source['twt'].values, 
        'xline' : source['xline'].values
    }

    iline_data = iline_ds.to_array().to_numpy().T # numpy array with noisy data (xline,twt) 
    
    result = np.zeros_like(iline_data) # create a result numpy array

    iline_data = iline_data.T # nn is trained on (twt,xline) data

    abs_max = np.quantile(np.abs(iline_data),0.99)

    iline_data_norm = np.clip(iline_data, -abs_max , abs_max) / abs_max

    with torch.no_grad() : 
        noisy_input = torch.from_numpy(iline_data_norm).to('cuda')
        noisy_input = noisy_input[None, :].float()
        
        denoised = model(noisy_input).to('cpu')
        denoised = torch.squeeze(denoised).numpy().T

        denoised = np.expand_dims(denoised, axis=-1)
        
        denoised_denorm = denoised * abs_max

        result[:denoised_denorm.shape[0], :denoised_denorm.shape[1]] = denoised_denorm 
        
        result = np.squeeze(result)

        source['data'] = xr.DataArray(result, coords=coords, dims=['twt','xline'])

        return source

denoised_seinc = noisy_seisnc.groupby('iline').map(inference)

Denoising iline # 1150
Denoising iline # 1151
Denoising iline # 1152
Denoising iline # 1153
Denoising iline # 1154
Denoising iline # 1155
Denoising iline # 1156
Denoising iline # 1157
Denoising iline # 1158
Denoising iline # 1159
Denoising iline # 1160
Denoising iline # 1161
Denoising iline # 1162
Denoising iline # 1163
Denoising iline # 1164
Denoising iline # 1165
Denoising iline # 1166
Denoising iline # 1167
Denoising iline # 1168
Denoising iline # 1169
Denoising iline # 1170
Denoising iline # 1171
Denoising iline # 1172
Denoising iline # 1173
Denoising iline # 1174
Denoising iline # 1175
Denoising iline # 1176
Denoising iline # 1177
Denoising iline # 1178
Denoising iline # 1179
Denoising iline # 1180
Denoising iline # 1181
Denoising iline # 1182
Denoising iline # 1183
Denoising iline # 1184
Denoising iline # 1185
Denoising iline # 1186
Denoising iline # 1187
Denoising iline # 1188
Denoising iline # 1189
Denoising iline # 1190
Denoising iline # 1191
Denoising iline # 1192
Denoising i

In [37]:
# writing denoised seisnc

# copy only non-null attributes
denoised_seinc.attrs = {key:value for key,value in noisy_seisnc.attrs.items() if not isinstance(value,type(None)) }

denoised_seinc.to_netcdf(denoised_seisnc_path, engine='netcdf4', format='NETCDF4', compute=True)

HDF5-DIAG: Error detected in HDF5 (1.12.2) thread 1:
  #000: H5A.c line 528 in H5Aopen_by_name(): can't open attribute
    major: Attribute
    minor: Can't open object
  #001: H5VLcallback.c line 1091 in H5VL_attr_open(): attribute open failed
    major: Virtual Object Layer
    minor: Can't open object
  #002: H5VLcallback.c line 1058 in H5VL__attr_open(): attribute open failed
    major: Virtual Object Layer
    minor: Can't open object
  #003: H5VLnative_attr.c line 130 in H5VL__native_attr_open(): can't open attribute
    major: Attribute
    minor: Can't open object
  #004: H5Aint.c line 545 in H5A__open_by_name(): unable to load attribute info from object header
    major: Attribute
    minor: Unable to initialize object
  #005: H5Oattribute.c line 494 in H5O__attr_open_by_name(): can't locate attribute: '_QuantizeBitGroomNumberOfSignificantDigits'
    major: Attribute
    minor: Object not found
HDF5-DIAG: Error detected in HDF5 (1.12.2) thread 1:
  #000: H5A.c line 528 in H5Ao

In [39]:
trace_header_map  = dict(cdp_x=181, cdp_y=185, iline=189, xline=193)

from segysak.segy import segy_writer

segy_writer(denoised_seinc, denoised_segy_path,dimension='twt', trace_header_map=trace_header_map, silent=False,il_chunks=100)

         189   193         181          185
0       1150  1005  23376992.0  254785024.0
1       1150  1006  23376992.0  254787024.0
2       1150  1007  23376992.0  254789024.0
3       1150  1008  23376992.0  254791024.0
4       1150  1009  23376992.0  254793024.0
...      ...   ...         ...          ...
108872  1250  2077  23776992.0  256929024.0
108873  1250  2078  23776992.0  256931024.0
108874  1250  2079  23776992.0  256933024.0
108875  1250  2080  23776992.0  256935024.0
108876  1250  2081  23776992.0  256937024.0

[108440 rows x 4 columns]


Writing to SEG-Y:   0%|          | 0/108878 [00:00<?, ? traces/s]

In [None]:
denoised_seinc

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,4.21 kiB
Shape,"(101, 1078)","(1, 1078)"
Dask graph,101 chunks in 203 graph layers,101 chunks in 203 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 425.30 kiB 4.21 kiB Shape (101, 1078) (1, 1078) Dask graph 101 chunks in 203 graph layers Data type float32 numpy.ndarray",1078  101,

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,4.21 kiB
Shape,"(101, 1078)","(1, 1078)"
Dask graph,101 chunks in 203 graph layers,101 chunks in 203 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,4.21 kiB
Shape,"(101, 1078)","(1, 1078)"
Dask graph,101 chunks in 201 graph layers,101 chunks in 201 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 425.30 kiB 4.21 kiB Shape (101, 1078) (1, 1078) Dask graph 101 chunks in 201 graph layers Data type float32 numpy.ndarray",1078  101,

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,4.21 kiB
Shape,"(101, 1078)","(1, 1078)"
Dask graph,101 chunks in 201 graph layers,101 chunks in 201 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
noisy_seisnc.attrs


{'ns': None,
 'sample_rate': None,
 'text': None,
 'measurement_system': None,
 'd3_domain': None,
 'epsg': None,
 'corner_points': None,
 'corner_points_xy': None,
 'source_file': None,
 'srd': None,
 'datatype': None,
 'percentiles': None,
 'coord_scalar': None}

In [None]:
sample = load_seisnc('/local1/workspace/random_denoising/data/mp41b_pstm_stk_rnd/MP41B_PSTM_STK_RND0.4.seisnc')
sample

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,425.30 kiB
Shape,"(101, 1078)","(101, 1078)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 425.30 kiB 425.30 kiB Shape (101, 1078) (101, 1078) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",1078  101,

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,425.30 kiB
Shape,"(101, 1078)","(101, 1078)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,425.30 kiB
Shape,"(101, 1078)","(101, 1078)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 425.30 kiB 425.30 kiB Shape (101, 1078) (101, 1078) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",1078  101,

Unnamed: 0,Array,Chunk
Bytes,425.30 kiB,425.30 kiB
Shape,"(101, 1078)","(101, 1078)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.62 GiB,1.62 GiB
Shape,"(101, 1078, 4000)","(101, 1078, 4000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.62 GiB 1.62 GiB Shape (101, 1078, 4000) (101, 1078, 4000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",4000  1078  101,

Unnamed: 0,Array,Chunk
Bytes,1.62 GiB,1.62 GiB
Shape,"(101, 1078, 4000)","(101, 1078, 4000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
