In [None]:
!pip install -qq segysak h5py dask

: 

In [None]:
import numpy as np

import sys 
sys.path.append('./content')

from segysak.segy import (
    segy_loader,
    get_segy_texthead,
    segy_header_scan,
    segy_header_scrape,
    well_known_byte_locs,
)

: 

In [None]:
volume_filepath = '/content/F3Seis_IL190_490_Amplitude.segy'
label_filepath = '/content/F3Seis_IL190_490_Label.segy'

: 

In [None]:
volume = segy_loader(volume_filepath,**well_known_byte_locs("petrel_3d"))
label = segy_loader(label_filepath,**well_known_byte_locs("petrel_3d"))

: 

In [None]:
volume

: 

In [None]:
label

: 

In [None]:
# normalising the data to 99th percentile of abs_max and clipping to abs_max 
abs_max = abs(volume).max()
volume_norm = volume.clip(-abs_max, abs_max) / abs_max
volume_norm

: 

In [None]:
# First inline: 190    Last inline: 490
# First xline:  300    Last xline:  1250

# converting data to numpy array 
volume_npy = volume_norm.data.to_numpy()
labels_npy = label.data.to_numpy()

: 

In [None]:
# writing numpy files to disk 
np.save('stdata_12_amplitude.npy', volume_npy)
np.save('stadata_12_labels.npy', labels_npy)

: 

In [None]:
import matplotlib.pyplot as plt 
plt.figure(figsize=(40,10))
plt.imshow(volume_npy[0].T, cmap='seismic', vmin= - 1, vmax = 1, aspect='auto')
plt.tight_layout()
plt.axis('off')
plt.show()

: 

In [None]:
# sample labels 
plt.figure(figsize=(40,10))
plt.imshow(labels_npy[0].T,aspect='auto')
plt.colorbar()
plt.tight_layout()
plt.axis('off')
plt.show()

: 

In [None]:
# Distribution of class labels 

labels = np.unique(labels_npy).tolist()
labels

counts = []
for label in labels : 
    count = labels_npy[labels_npy == label].shape[0]
    print(f'Class label :{label}','Count :', count )
    counts.append(count)
    
normalised_counts = [x / max(counts) for x in counts]
plt.bar(labels, normalised_counts, )
plt.xlabel('label #')
plt.ylabel('Normalised Counts')
plt.title("Distribution of Labels")

: 

In [None]:
# Mean Distribution of classes in an a typical inline ( class coverage )
# i.e total count by inline image size 
mean_counts_inline = [x / volume_npy.shape[0] / volume_npy.shape[1] for x in counts ]
mean_counts_inline_normalised = [ x / max(mean_counts_inline) for x in mean_counts_inline]

plt.bar(labels, mean_counts_inline_normalised, )
plt.xlabel('label #')
plt.ylabel('Normalised Counts per inline')
plt.title("Distribution of Labels");

: 

In [None]:
# distrbution of data 
plt.figure('Data Probability Distribution')
plt.hist(volume_npy.reshape(-1), bins=250,density=True)
plt.show()

: 

In [None]:
# selecting data of each class 
from copy import deepcopy 

def class_mask(label_npy, label) :
    label_npy = deepcopy(label_npy)
    condition = (label_npy == label)
    label_npy[condition] = 1 
    label_npy[~condition] = 0
    return label_npy

# sample plot 

label = 4

label_mask = class_mask(labels_npy, label)
print(label_mask.shape,np.unique(label_mask))
plt.imshow(label_mask[0].T, aspect='auto')

: 

In [None]:
# distribution of data in each class

# data filtered by class 
label = 2
def data_by_class(volume_npy, label_npy, label) : 
    label_mask = class_mask(labels_npy, label)
    positions = np.where(label_mask == 1)
    return volume_npy[positions]

# data distribution by class
display(labels)
fig, ax = plt.subplots(ncols=len(labels), figsize=(30,8))
for i,label in enumerate(labels) : 
    volume = data_by_class(volume_npy, labels_npy, label)
    ax[i].hist(volume.tolist(), bins=250, density=True)
    ax[i].set_title(f'Class : {label}')


plt.suptitle('Data Probability distribution by class')

plt.show()

: 

: 