In [1]:
import numpy as np 
import pandas as pd 
from tqdm import tqdm

import sys 
sys.path.append('../utils')


import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 1000
import matplotlib.pyplot as plt

from preprocess import gain

In [2]:
## cleaned dataset paths 

# interpretation
INTERPRETATION_VOL_PATH_CLEAN = '../data/clean/f3_interpretation/inline_vol.npy'
INTERPRETATION_LABEL_PATH_CLEAN = '../data/clean/f3_interpretation/inline_label.npy'

# faciesmark dataset 
FACIESMARK_VOL_PATH_CLEAN = '../data/clean/faciesmark/raw/seismic_entire_volume.npy'
FACIESMARK_LABEL_PATH_CLEAN = '../data/clean/faciesmark/raw/labels_entire_volume.npy'


# stdata12 dataset 
STDATA_VOL_PATH_CLEAN = '../data/clean/stdata12/stdata_12_amplitude.npy'
STDATA_LABEL_PATH_CLEAN = '../data/clean/stdata12/stdata_12_labels.npy'

# PREPROCESSED FILE PATHS 

# interpretation
INTERPRETATION_VOL_PATH_PREPROCESSED = '../data/preprocessed/f3_interpretation/inline_vol.npy'
INTERPRETATION_LABEL_PATH_PREPROCESSED = '../data/preprocessed/f3_interpretation/inline_label.npy'

# faciesmark dataset 
FACIESMARK_VOL_PATH_PREPROCESSED = '../data/preprocessed/faciesmark/raw/seismic_entire_volume.npy'
FACIESMARK_LABEL_PATH_PREPROCESSED = '../data/preprocessed/faciesmark/raw/labels_entire_volume.npy'


# stdata12 dataset 
STDATA_VOL_PATH_PREPROCESSED = '../data/preprocessed/stdata12/stdata_12_amplitude.npy'
STDATA_LABEL_PATH_PREPROCESSED = '../data/preprocessed/stdata12/stdata_12_labels.npy'


## Interpretation

In [5]:
vol_int = np.load(INTERPRETATION_VOL_PATH_CLEAN)
label_int = np.load(INTERPRETATION_LABEL_PATH_CLEAN)

vol_int.shape, label_int.shape

((601, 951, 361), (601, 951, 361))

In [6]:
def summary_stats(vol) : 
    print(f'''
    Shape : {vol.shape}
    Mean : {np.mean(vol)}
    Stdev : {np.std(vol)}
    Min : {np.min(vol)}
    Median : {np.median(vol)}
    99th Percentile of Abs : {np.quantile(np.abs(vol), 0.99)}
    Max : {np.max(vol)}
    Null values : {np.sum(np.isnan(vol))}
    
    ''')

summary_stats(vol_int)


    Shape : (601, 951, 361)
    Mean : 2.8666460514068604
    Stdev : 2676.749755859375
    Min : -32767.0
    Median : 28.0
    99th Percentile of Abs : 8611.0
    Max : 32767.0
    Null values : 0
    
    


In [7]:
# Clipping data to remove outliers 

abs_99 = np.quantile(np.abs(vol_int), 0.99)
vol_int_c = np.clip(vol_int, -abs_99, abs_99)

summary_stats(vol_int_c)


    Shape : (601, 951, 361)
    Mean : 15.75139331817627
    Stdev : 2544.2041015625
    Min : -8611.0
    Median : 28.0
    99th Percentile of Abs : 8611.0
    Max : 8611.0
    Null values : 0
    
    


In [8]:
# applying AGC
agc_window = 64 
f_s = 4e-3 

vol_int_c_g = np.zeros_like(vol_int)

for i in tqdm(range(vol_int.shape[0])) : 
    vol_int_c_g[i,:,:] = gain(vol_int_c[i,:,:], f_s , 'agc',agc_window * f_s, 2)

summary_stats(vol_int_c_g)

100%|██████████| 601/601 [05:17<00:00,  1.89it/s]



    Shape : (601, 951, 361)
    Mean : 0.011457464657723904
    Stdev : 0.9999324083328247
    Min : -5.015366077423096
    Median : 0.01830044947564602
    99th Percentile of Abs : 2.2190851926803603
    Max : 5.151999473571777
    Null values : 0
    
    


In [9]:
# scale to -1,1

max_vol_int_c_g = np.max(vol_int_c_g)

vol_int_c_g_sc = vol_int_c_g / max_vol_int_c_g

summary_stats(vol_int_c_g_sc)


    Shape : (601, 951, 361)
    Mean : 0.002223885850980878
    Stdev : 0.1940867155790329
    Min : -0.9734795689582825
    Median : 0.0035521062090992928
    99th Percentile of Abs : 0.43072309792041796
    Max : 1.0
    Null values : 0
    
    


In [10]:
# write to file
np.save(INTERPRETATION_VOL_PATH_PREPROCESSED, vol_int_c_g_sc)

import subprocess
subprocess.run(f'cp {INTERPRETATION_LABEL_PATH_CLEAN} {INTERPRETATION_LABEL_PATH_PREPROCESSED}',shell=True)

CompletedProcess(args='cp ../data/clean/f3_interpretation/inline_label.npy ../data/preprocessed/f3_interpretation/inline_label.npy', returncode=0)

## FACIESMARK

AGC seems applied to Faciesmark. Hence, not performing any of the preprocessing operations.

In [3]:
# vol_f = np.load(FACIESMARK_VOL_PATH_CLEAN)
# label_f = np.load(FACIESMARK_LABEL_PATH_CLEAN)

# vol_f.shape, label_f.shape

In [4]:
# summary_stats(vol_f)

In [5]:
# # applying agc window
# agc_window = 64 
# f_s = 4e-3 

# vol_f_g = np.zeros_like(vol_f)

# for i in tqdm(range(vol_f.shape[0])) : 
#     vol_f_g[i,:,:] = gain(vol_f[i,:,:], f_s , 'agc',agc_window * f_s, 2)

# summary_stats(vol_f_g)


In [6]:
# not clipping for outliers in faciesmark since it already looks treated.

In [7]:
# scale to -1,1

# max_vol_f_c_g = np.max(vol_f_c_g)

# vol_f_c_g_sc = vol_f_c_g / max_vol_f_c_g

# summary_stats(vol_f_c_g_sc)

In [8]:

import subprocess
subprocess.run(f'cp {FACIESMARK_LABEL_PATH_CLEAN} {FACIESMARK_LABEL_PATH_PREPROCESSED}',shell=True)
subprocess.run(f'cp {FACIESMARK_VOL_PATH_CLEAN} {FACIESMARK_VOL_PATH_PREPROCESSED}',shell=True)

CompletedProcess(args='cp ../data/clean/faciesmark/raw/seismic_entire_volume.npy ../data/preprocessed/faciesmark/raw/seismic_entire_volume.npy', returncode=0)

## STDATA-12

In [9]:
vol_st= np.load(STDATA_VOL_PATH_CLEAN)
label_st = np.load(STDATA_LABEL_PATH_CLEAN)

In [10]:
summary_stats(vol_st)

NameError: name 'summary_stats' is not defined

In [None]:
# Clipping data to remove outliers 

abs_99 = np.quantile(np.abs(vol_st), 0.99)
vol_st_c = np.clip(vol_st, -abs_99, abs_99)

summary_stats(vol_st_c)


    Shape : (4, 951, 362)
    Mean : 19.99727439880371
    Stdev : 2582.07568359375
    Min : -8838.7060546875
    Median : 0.0
    99th Percentile of Abs : 8838.7060546875
    Max : 8838.7060546875
    Null values : 0
    
    


In [None]:
# applying AGC
agc_window = 64 
f_s = 4e-3 

vol_st_c_g = np.zeros_like(vol_st)

for i in tqdm(range(vol_st.shape[0])) : 
    vol_st_c_g[i,:,:] = gain(vol_st_c[i,:,:], f_s , 'agc',agc_window * f_s, 2)

summary_stats(vol_st_c_g)

100%|██████████| 4/4 [00:02<00:00,  1.89it/s]


    Shape : (4, 951, 362)
    Mean : 0.010058555752038956
    Stdev : 0.9999493956565857
    Min : -4.113037109375
    Median : 0.0
    99th Percentile of Abs : 2.219185905456543
    Max : 4.202054023742676
    Null values : 0
    
    





In [None]:
# scale to -1,1

max_vol_st_c_g = np.max(vol_st_c_g)

vol_st_c_g_sc = vol_st_c_g / max_vol_st_c_g

summary_stats(vol_st_c_g_sc)


    Shape : (4, 951, 362)
    Mean : 0.0023937253281474113
    Stdev : 0.23796683549880981
    Min : -0.9788158535957336
    Median : 0.0
    99th Percentile of Abs : 0.5281193411350251
    Max : 1.0
    Null values : 0
    
    


In [None]:
# write to file
np.save(STDATA_VOL_PATH_PREPROCESSED, vol_st_c_g_sc)

import subprocess
subprocess.run(f'cp {STDATA_LABEL_PATH_CLEAN} {STDATA_LABEL_PATH_PREPROCESSED}',shell=True)

CompletedProcess(args='cp ../data/clean/stdata12/stdata_12_labels.npy ../data/preprocessed/stdata12/stdata_12_labels.npy', returncode=0)