#### PI-NN test for TOC data
#### alpha = 0.1

In [7]:
import numpy as np

from models.tensorflow_pi import TF_PI
from utils.data_preprocess import pre_process
from utils.plot_func import plot_pi_toc
from utils.tools import Loss_func

TOC_file = './data/well_3/TOC_data_liushagang.csv'
# TOC_file = './data/well_3/TOC_data.csv'
welllog_file = './data/well_3/welllog_data.csv'

stratum_depth = [2402.4, 2543.3, 2790.3, 2995]
stratum_name = ['Liushagang_1', 'Liushagang_2', 'Liushagang_3']
logging_data, toc_data, unit, merge_toc, _ = pre_process(
    TOC_file, welllog_file, stratum_depth)
X = merge_toc[merge_toc.columns.difference(['DEPT', 'TOC'])].to_numpy()
y = merge_toc['TOC'].to_numpy()

test_data = logging_data[logging_data.columns.difference(['DEPT'])]

alpha = 0.15
tf_pi = TF_PI(alpha, weight=[1, 0.8, 0.5])
history = tf_pi.fit(X, y, epochs=800)
result = tf_pi.predict(test_data)
model_loss = Loss_func(result, y, (1-alpha),
                      merge_toc['DEPT'].to_numpy(), logging_data['DEPT'].to_numpy())

print("PICP = {}, PIMW = {}, PIAD = {}, PIEI = {}".format(model_loss.picp, model_loss.pimw,
                                                          model_loss.piad, model_loss.loss))

plot_pi_toc(result, merge_toc, logging_data, stratum_depth,
            stratum_name, model_loss.outlier, model_std=None)

100%|██████████| 800/800 [00:12<00:00, 62.23it/s]


PICP = 0.87, PIMW = 0.13, PIAD = 0.11, PIEI = 0.31


#### Different alphas test

In [8]:
from models.ensemble_pi import diff_alphas_PI
from utils.data_preprocess import pre_process
from utils.plot_func import subplot_fit_process, subplot_multi_toc

TOC_file = './data/well_3/TOC_data_liushagang.csv'
welllog_file = './data/well_3/welllog_data.csv'

stratum_depth = [2402.4, 2543.3, 2790.3, 2995]
stratum_name = ['Liushagang_1', 'Liushagang_2', 'Liushagang_3']
logging_data, toc_data, unit, merge_toc, _ = pre_process(
    TOC_file, welllog_file, stratum_depth)
X = merge_toc[merge_toc.columns.difference(['DEPT', 'TOC'])].to_numpy()
y = merge_toc['TOC'].to_numpy()
test_data = logging_data[logging_data.columns.difference(['DEPT'])]
alphas = [0.2, 0.15, 0.1, 0.05]
result_all, index_all, outlier_list, hist = diff_alphas_PI(
    X, y, test_data, merge_toc['DEPT'].to_numpy(), logging_data['DEPT'].to_numpy(), 
    alphas, weight=[2, 0.7, 1])

print(index_all)
subplot_multi_toc(result_all, alphas, outlier_list,
                       merge_toc['DEPT'].to_numpy(), logging_data['DEPT'].to_numpy(), y, stratum_depth, stratum_name)

subplot_fit_process(hist, alphas)

100%|██████████| 1000/1000 [00:21<00:00, 47.58it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.40it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.23it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.46it/s]
100%|██████████| 4/4 [01:25<00:00, 21.27s/it]


[array([0.87, 0.21, 0.1 , 1.01]), array([0.83, 0.35, 0.16, 0.58]), array([0.91, 0.36, 0.18, 0.57]), array([1.  , 0.59, 0.16, 1.96])]


##### Ensemble test

In [1]:
import numpy as np

from models.ensemble_pi import Bootstrap_PI
from utils.data_preprocess import pre_process
from utils.plot_func import plot_pi_toc, plot_simple_boundary
from utils.tools import Loss_func

TOC_file = './data/well_3/TOC_data_liushagang.csv'
welllog_file = './data/well_3/welllog_data.csv'

stratum_depth = [2402.4, 2543.3, 2790.3, 2995]
stratum_name = ['Liushagang_1', 'Liushagang_2', 'Liushagang_3']
logging_data, toc_data, unit, merge_toc, _ = pre_process(
    TOC_file, welllog_file, stratum_depth)
X = merge_toc[merge_toc.columns.difference(['DEPT', 'TOC'])].to_numpy()
y = merge_toc['TOC'].to_numpy()
test_data = logging_data[logging_data.columns.difference(['DEPT'])]


alpha = 0.1
model = Bootstrap_PI(5, alpha, [2, 0.5, 0.5], bootstrap_method='prop_of_data')
result_all, hist, y_pred_gauss_mid, y_pred_gauss_dev, up_low = model.fit_predict(
    X, y, test_data)

model_loss = Loss_func(up_low, y, (1-alpha),
                     toc_data['DEPT'].to_numpy(), logging_data['DEPT'].to_numpy())
print("PICP = {}, PIMW = {}, PIAD = {}, Loss = {}".format(model_loss.picp, model_loss.pimw,
                                                          model_loss.piad, model_loss.loss))
plot_pi_toc(up_low, merge_toc, logging_data, stratum_depth,
            stratum_name, model_loss.outlier, y_pred_gauss_dev)

100%|██████████| 1000/1000 [00:22<00:00, 45.13it/s]
100%|██████████| 1000/1000 [00:20<00:00, 47.66it/s]
100%|██████████| 1000/1000 [00:20<00:00, 47.73it/s]
100%|██████████| 1000/1000 [00:20<00:00, 49.10it/s]
100%|██████████| 1000/1000 [00:20<00:00, 49.66it/s]
100%|██████████| 5/5 [01:46<00:00, 21.26s/it]


PICP = 0.96, PIMW = 0.8, PIAD = 0.26, Loss = 1.98


##### GPR TOC Test

In [1]:
import numpy as np
from scipy import stats
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import ConstantKernel as C
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

from utils.data_preprocess import pre_process
from utils.plot_func import plot_pi_toc
from utils.tools import Loss_func

TOC_file = './data/well_3/TOC_data_liushagang.csv'
# TOC_file = './data/well_3/TOC_data.csv'
welllog_file = './data/well_3/welllog_data.csv'

stratum_depth = [2402.4, 2543.3, 2790.3, 2995]
stratum_name = ['Liushagang_1', 'Liushagang_2', 'Liushagang_3']
logging_data, toc_data, unit, merge_toc, _ = pre_process(
    TOC_file, welllog_file, stratum_depth)
X = merge_toc[merge_toc.columns.difference(['DEPT', 'TOC'])].to_numpy()
y = merge_toc['TOC'].to_numpy()
test_data = logging_data[logging_data.columns.difference(['DEPT'])]

alpha = 0.1

# kernel = C(1e-1, (1e-5, 1e5)) * RBF(1e-1, (1e-5, 1e5))
# gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=0.1).fit(X,y)
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100,random_state=10).fit(X,y)
result, std = gpr.predict(test_data, return_std=True)
up_low = stats.norm.interval(1-alpha, loc=result, scale=std)

up_low = np.array(up_low).T

model_loss = Loss_func(up_low, y, (1-alpha),
                     toc_data['DEPT'].to_numpy(), logging_data['DEPT'].to_numpy())
print("PICP = {}, PIMW = {}, PIAD = {}, Loss = {}".format(model_loss.picp, model_loss.pimw,
                                                          model_loss.piad, model_loss.loss))

plot_pi_toc(up_low, merge_toc, logging_data, stratum_depth,
            stratum_name, model_loss.outlier)



PICP = 0.96, PIMW = 0.8, PIAD = 0.2, Loss = 1.92
