### Machine learning methods for effective thickness prediction

In [2]:
import segyio
import numpy as np

In [3]:
# Upload cube and cut off horison

# Seismic cube data
cubic = segyio.tools.cube('data/seis.sgy')

# Top of layer coordinates
top_layer = np.loadtxt('data/top.char', usecols=(2,5,6,7,8))
# 2 - Inline number, 5 - Cross-line number, 6 - Coordinate X , 7 - Coordinate Y, 8 - Coordinate t0

In [4]:
# Reflecting horison in the cube (known a priori)
finl = 479 # Number of first inline 
fcrl = 5000 # Number of first xline
tmin = 1750 # Time of first horison sample
tinc = 2 # Time sampling step

ninl = np.rint(top_layer[:,0]-finl).astype(np.int64) # Array with inline numbers
ncrl = np.rint(top_layer[:,1]-fcrl).astype(np.int64) # Array with xline numbers

coord_x = top_layer[:,2]
coord_y = top_layer[:,3]

t_hor_top = (top_layer[:,4]-tmin)/tinc

horizon_top = np.zeros((cubic.shape[0],cubic.shape[1])) 
mask = np.zeros((cubic.shape[0],cubic.shape[1])) 

horizon_top[ninl[:],ncrl[:]] = t_hor_top[:].copy()
mask[ninl[:],ncrl[:]] = 1

In [47]:
# Upload wells for learning

train_wells = np.loadtxt('data/train_points.txt')
train_wells_x = l_wells[:, 0]
train_wells_y = l_wells[:, 1]
train_wells_depth = l_wells[:, 2]

test_wells = np.loadtxt('data/test_points.txt')
test_wells_x = r_wells[:, 1]
test_wells_y = r_wells[:, 2]

In [48]:
# Fuctions for attributes calculation (Firstly we will use two parameters: kinamic and dynamic - first dip after layer 
# and multiplyied amplitude module, those parameters are independent)


def first_dip(cube, horison, inline_ar, crossline_ar):
    first_dp = np.zeros((cube.shape[0], cube.shape[1]))

    
    trace_last_index = int(len(cubic[inline_ar[0], crossline_ar[0], :]) - 1)
    
    for i in range(0, 511914):
                x = True
                time_sample_step = 1
                while x:
                    time_sample_step += 1
                    
                    sample_num = int(horison[inline_ar[i], crossline_ar[i]].astype(np.int64) + time_sample_step)
                
                    if sample_num + 1 > trace_last_index:
                        break
                    else:
                        a = cubic[inline_ar[i], crossline_ar[i], sample_num]
                        b = cubic[inline_ar[i], crossline_ar[i], sample_num + 1]
                        c = cubic[inline_ar[i], crossline_ar[i], sample_num - 1]

                        if b > a and c > a:
                            
                            first_dp[inline_ar[i], crossline_ar[i]] = time_sample_step
                            break
                                                
    return first_dp


def amplitudes_map(cube, horison, inline_ar, crossline_ar, step_down_from_top):
    amplitudes = np.zeros((cube.shape[0], cube.shape[1]))
    amplitudes[inline_ar[:], crossline_ar[:]] = cubic[inline_ar[:], crossline_ar[:], 
                                                      horison[inline_ar[:], crossline_ar[:]].astype(np.int64) 
                                                      + step_down_from_top ]
    return amplitudes


def summ_module_amp(cube, horison, inline_ar, crossline_ar, numbers_of_step_down):
    m_amplitudes = np.zeros((cube.shape[0], cube.shape[1]))
    for i in range(numbers_of_step_down):
        m_amplitudes += np.abs(amplitudes_map(cubic, horizon_top, ninl, ncrl, i))
    return m_amplitudes




In [49]:
first_dip_map = first_dip(cubic, horizon_top, ninl, ncrl)
sm_amp_map = summ_module_amp(cubic, horizon_top, ninl, ncrl, 14)

In [50]:
# Getting attribute values in coordinates, where wells are located
def get_attr(x, y, att_map):
    dif_x = []
    for i in coord_x:
        dif = abs(x - i)
        dif_x.append(dif)
    dif_y = []
    for j in coord_y:
        dif = abs(y - j)
        dif_y.append(dif)
    summ_xy = list(map(lambda q, w: q + w, dif_x, dif_y))
    #print(min(summ_xy))
    ind = summ_xy.index(min(summ_xy))
    #print(x, y)
    #print(top_layer[ind, 2], top_layer[ind, 3])
    inline = int(top_layer[ind, 0]-finl) 
    xline = int(top_layer[ind, 1]-fcrl)
    return att_map[inline, xline]


attrs = np.zeros((len(train_wells_x), 2))

for i in range(len(train_wells_x)):
    attrs[i, 0] = get_attr(train_wells_x[i], test_wells_y[i], first_dip_map)
    attrs[i, 1] = get_attr(train_wells_x[i], test_wells_y[i], sm_amp_map)
    
    


In [51]:
# Data normalisation 
from sklearn import preprocessing

min_max_scaler_1 = preprocessing.MinMaxScaler()
min_max_scaler_2 = preprocessing.MinMaxScaler()

Xn_1 = min_max_scaler_1.fit_transform(attrs[:, 0].reshape(-1, 1))
Xn_2 = min_max_scaler_2.fit_transform(attrs[:, 1].reshape(-1, 1))


X = np.hstack((Xn_1, Xn_2))
y = train_wells_depth

In [59]:
y

array([ 4.9 , 14.56,  9.67, 10.37, 10.09,  3.77, 17.96,  3.44, 20.47,
       17.98, 18.4 ,  2.08,  2.75, 13.68, 18.59,  7.63, 13.11,  8.78,
       11.8 , 14.78, 10.47,  4.1 , 10.65, 19.6 , 12.89, 15.77,  6.6 ,
       13.76,  0.  ,  4.69, 15.92, 12.65, 18.8 , 12.53, 17.98,  6.76,
       12.26, 10.78, 15.35, 10.53,  9.82,  3.06, 17.36, 13.91, 11.66,
       18.8 ,  2.5 ,  4.  , 18.44, 16.3 , 13.29,  3.81, 12.59, 16.  ,
        2.33,  3.8 , 21.29, 18.11, 15.8 , 11.59, 13.35, 19.1 , 29.46,
       31.24, 23.01, 21.18, 17.24, 46.02,  6.45, 36.27, 31.68, 24.23,
       32.57, 26.21,  0.  ,  0.91,  0.76,  0.23, 11.89, 35.36])

In [52]:
# For thickness prediction we apply GaussianNB, KNeighbors, RandomForest

from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [60]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=11)

# Applying different classificators

gnb = GaussianNB() 
y_pred = gnb.fit(X_train, y_train).predict(X_test)

print('GaussianNB')
print(metrics.classification_report(y_test, y_pred))
print()

kn = KNeighborsClassifier(20)  
y_pred = kn.fit(X_train, y_train).predict(X_test)

print('KNeighbors')
print(metrics.classification_report(y_test, y_pred)) 
print()

rfc = RandomForestClassifier()  
y_pred = rfc.fit(X_train, y_train).predict(X_test)

print('RandomForest')
print(metrics.classification_report(y_test, y_pred))
print()


ValueError: Unknown label type: (array([ 0.  ,  0.23,  2.75,  3.44,  3.8 ,  4.  ,  4.9 ,  7.63,  8.78,
        9.82, 10.09, 10.37, 10.78, 11.59, 11.66, 11.89, 12.53, 12.59,
       12.89, 13.29, 13.35, 13.68, 13.76, 13.91, 14.56, 15.77, 15.8 ,
       17.96, 17.98, 18.4 , 18.44, 18.59, 18.8 , 19.6 , 21.29, 24.23,
       26.21, 31.24, 35.36]),)