<div style="background-color:#00000">
    <img src="https://fundacionsadosky.org.ar/wp-content/uploads/2022/08/logo.png" />
</div>
<div style="background-color:#03030a; margin:20px 40%">
    <img src="https://www.rfindustrial.com/wp-content/uploads/2023/04/cropped-1080x1080_Mesa-de-trabajo-1.png" />
</div>
<div style="background-color:#00000;">
    <img src="https://sinc.unl.edu.ar/wp-content/themes/sinci/img/sinc-logo.png" />
</div>

<p style="font-size: 30px">
    <strong>COPE - “Sistema inteligente de medición de nivel y control de velocidad de bombeo para pozos petrolíferos"</strong>
</p>

<p style="font-size: 20px">
    Objetivo del análisis: Optimización de parámetros de los métodos de estimación de fondo utilizando búsqueda en grilla.
</p>

04/09/2023

In [4]:
import itertools
from tqdm import tqdm
import random
import numpy as np
from scipy import signal
from scipy.fft import fft, ifft
import matplotlib.pyplot as plt
from glob import glob

import sys
sys.path.append("../") # go to parent dir

import pandas as pd

from src.methods.base import InitialEstimation, InitialEstimationHilbert, CepstrumEstimation
from src.methods import base_depth
from src.data.utils import load_sample_file
from src.data.utils import remove_saturation as remove_saturation_method
from src.methods.utils import get_input_signal

from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [5]:
well_values = {
    1: 380.43,
    2: 898.54,
    3: 715.4,
    4: 805.83,
    5: 683.26,
    6: 915.83,
    8: 770.93
}

# Optimización para método inicial

In [6]:
params_grid_dict = {
    'f1': [1, 5, 7],
    'f2': [50, 55, 60],
    'fir_filter_order': [250, 400, 500],
    'low_pass_filter_order': [5, 10, 15],
    'low_pass_cutoff': [25, 50, 100],
    'peak_min_height': [0.1, 0.2, 0.5],
    'peak_min_distance': [250, 500, 750]
}

params_grid = list(itertools.product(*[v for k, v in params_grid_dict.items()]))

In [7]:
results = []

for params in tqdm(params_grid):
    params_config_dict = dict(zip(params_grid_dict.keys(), params))
    initial = base_depth.InitialEstimation(**params_config_dict)
    
    for well_id, well_depth in well_values.items():
        ecometries_dir = glob(f'../data/raw/20_10_2021 ( pruebas tiro cuplas largo)/{well_id}/*')
        ecometries_dir = [e for e in ecometries_dir if 'Frec1(7)_IncF(1)' in e]
        for ecometry_dir in ecometries_dir:
            trials = glob(ecometry_dir + '/*.json')
            
            for ecometry_name in trials:
                estimated_speed = InitialEstimation(remove_saturation=True).predict(ecometry_name)

                if (estimated_speed is None) or (estimated_speed < 100):
                    estimated_speed = 365

                df, output_signal = load_sample_file(ecometry_name)
                signal_len = len(output_signal)
                output_signal = remove_saturation_method(output_signal)
                removed_samples = signal_len - len(output_signal)

                depth = initial.predict(output_signal_values=output_signal,
                                        df_ecometry_params=df,
                                        estimated_speed=estimated_speed,
                                        removed_samples=removed_samples,
                                        use_correlation=True)
                
                error = abs(well_depth - depth) if (depth) else None

                params_str = '__'.join([f'{k}-{v}' for k, v in params_config_dict.items()])
                results.append([well_id, ecometry_name, params_str, depth, error])

100%|████████████████████████████████████████████████████████████████████| 2187/2187 [22:05<00:00,  1.65it/s]


In [8]:
df_initial = pd.DataFrame(results, columns=['pozo', 'ecometria', 'params', 'profundidad_estimada', 'error'])

In [9]:
df_initial['f1'] = df_initial.params.str.split('__').apply(lambda x: x[0]).str.split('-').apply(lambda x: x[-1])
df_initial['f2'] = df_initial.params.str.split('__').apply(lambda x: x[1]).str.split('-').apply(lambda x: x[-1])
df_initial['fir_filter_order'] = df_initial.params.str.split('__').apply(lambda x: x[2]).str.split('-').apply(lambda x: x[-1])
df_initial['low_pass_filter_order'] = df_initial.params.str.split('__').apply(lambda x: x[3]).str.split('-').apply(lambda x: x[-1])
df_initial['low_pass_cutoff'] = df_initial.params.str.split('__').apply(lambda x: x[4]).str.split('-').apply(lambda x: x[-1])
df_initial['peak_min_height'] = df_initial.params.str.split('__').apply(lambda x: x[5]).str.split('-').apply(lambda x: x[-1])
df_initial['peak_min_distance'] = df_initial.params.str.split('__').apply(lambda x: x[6]).str.split('-').apply(lambda x: x[-1])

In [10]:
method_params_cols = ['f1', 'f2',
                      'fir_filter_order', 'low_pass_filter_order', 
                      'low_pass_cutoff', 'peak_min_height',
                      'peak_min_distance']
method_cols = []
df_summary_initial = df_initial.groupby(method_params_cols).agg(
    {'pozo': 'nunique',
     'ecometria': 'nunique',
     'error': ['mean', 'std']}).reset_index()
df_summary_initial.rename(columns={'ecometria': 'cantidad_ecometrias'}, inplace=True)

In [11]:
df_summary_initial.columns = [f'{c}_{s}' if s != '' else c for (c, s) in zip(df_summary_initial.columns.droplevel(1), df_summary_initial.columns.droplevel(0))]

In [12]:
df_summary_initial.sort_values(by='error_mean')

Unnamed: 0,f1,f2,fir_filter_order,low_pass_filter_order,low_pass_cutoff,peak_min_height,peak_min_distance,pozo_nunique,cantidad_ecometrias_nunique,error_mean,error_std
709,1,60,500,5,100,0.5,500,7,25,43.373420,49.117434
466,1,55,500,5,100,0.5,500,7,25,43.373420,49.117434
224,1,50,500,5,100,0.5,750,7,25,43.373420,49.117434
223,1,50,500,5,100,0.5,500,7,25,43.373420,49.117434
710,1,60,500,5,100,0.5,750,7,25,43.373420,49.117434
...,...,...,...,...,...,...,...,...,...,...,...
928,5,50,500,15,25,0.1,500,7,25,108.653541,145.947477
1414,5,60,500,15,25,0.1,500,7,25,108.690076,145.803765
684,1,60,500,15,25,0.1,250,7,25,109.018897,145.409066
687,1,60,500,15,25,0.2,250,7,25,109.018897,145.409066


# Optimización para método de Hilbert

In [13]:
params_grid_dict = {
    'f1': [1, 5, 7],
    'f2': [50, 55, 60],
    'order': [50, 100, 150],
    'peak_min_height': [0.1, 0.2, 0.5],
    'peak_min_distance': [250, 500, 750]
}

params_grid = list(itertools.product(*[v for k, v in params_grid_dict.items()]))

In [14]:
len(params_grid)

243

In [15]:
results = []

for params in tqdm(params_grid):
    params_config_dict = dict(zip(params_grid_dict.keys(), params))
    initial = base_depth.InitialEstimationHilbert(**params_config_dict)
    
    for well_id, well_depth in well_values.items():
        ecometries_dir = glob(f'../data/raw/20_10_2021 ( pruebas tiro cuplas largo)/{well_id}/*')
        ecometries_dir = [e for e in ecometries_dir if 'Frec1(7)_IncF(1)' in e]
        for ecometry_dir in ecometries_dir:
            trials = glob(ecometry_dir + '/*.json')
            
            for ecometry_name in trials:
                estimated_speed = InitialEstimation(remove_saturation=True).predict(ecometry_name)

                if (estimated_speed is None) or (estimated_speed < 100):
                    estimated_speed = 365

                df, output_signal = load_sample_file(ecometry_name)
                signal_len = len(output_signal)
                output_signal = remove_saturation_method(output_signal)
                removed_samples = signal_len - len(output_signal)

                depth = initial.predict(output_signal_values=output_signal,
                                        df_ecometry_params=df,
                                        estimated_speed=estimated_speed,
                                        removed_samples=removed_samples,
                                        use_correlation=True)
                
                error = abs(well_depth - depth) if (depth) else None

                params_str = '__'.join([f'{k}-{v}' for k, v in params_config_dict.items()])
                results.append([well_id, ecometry_name, params_str, depth, error])

100%|██████████████████████████████████████████████████████████████████████| 243/243 [02:30<00:00,  1.61it/s]


In [16]:
df_hilbert = pd.DataFrame(results, columns=['pozo', 'ecometria', 'params', 'profundidad_estimada', 'error'])

In [17]:
df_hilbert['f1'] = df_hilbert.params.str.split('__').apply(lambda x: x[0]).str.split('-').apply(lambda x: x[-1])
df_hilbert['f2'] = df_hilbert.params.str.split('__').apply(lambda x: x[1]).str.split('-').apply(lambda x: x[-1])
df_hilbert['order'] = df_hilbert.params.str.split('__').apply(lambda x: x[2]).str.split('-').apply(lambda x: x[-1])
df_hilbert['peak_min_height'] = df_hilbert.params.str.split('__').apply(lambda x: x[3]).str.split('-').apply(lambda x: x[-1])
df_hilbert['peak_min_distance'] = df_hilbert.params.str.split('__').apply(lambda x: x[4]).str.split('-').apply(lambda x: x[-1])

In [18]:
method_params_cols = ['f1', 'f2',
                      'order', 'peak_min_height',
                      'peak_min_distance']
method_cols = []
df_summary_hilbert = df_hilbert.groupby(method_params_cols).agg(
    {'pozo': 'nunique',
     'ecometria': 'nunique',
     'error': ['mean', 'std']}).reset_index()
df_summary_hilbert.rename(columns={'ecometria': 'cantidad_ecometrias'}, inplace=True)

In [19]:
df_summary_hilbert.columns = [f'{c}_{s}' if s != '' else c for (c, s) in zip(df_summary_hilbert.columns.droplevel(1), df_summary_hilbert.columns.droplevel(0))]

In [20]:
df_summary_hilbert.sort_values(by='error_mean')

Unnamed: 0,f1,f2,order,peak_min_height,peak_min_distance,pozo_nunique,cantidad_ecometrias_nunique,error_mean,error_std
71,1,60,150,0.5,750,7,25,67.843098,38.431100
70,1,60,150,0.5,500,7,25,67.843098,38.431100
40,1,55,150,0.2,500,7,25,67.843098,38.431100
41,1,55,150,0.2,750,7,25,67.843098,38.431100
43,1,55,150,0.5,500,7,25,67.843098,38.431100
...,...,...,...,...,...,...,...,...,...
180,7,50,50,0.1,250,7,25,97.333208,92.220248
153,5,60,50,0.1,250,7,25,97.342777,92.213880
18,1,50,50,0.1,250,7,25,97.361915,92.208473
45,1,55,50,0.1,250,7,25,97.361915,92.208473


# Resultados para el método de RF Industrial

In [21]:
baseline_method = base_depth.RFIndustrialEstimation()
results = []
    
for well_id, well_depth in well_values.items():
    ecometries_dir = glob(f'../data/raw/20_10_2021 ( pruebas tiro cuplas largo)/{well_id}/*')
    ecometries_dir = [e for e in ecometries_dir if 'Frec1(7)_IncF(1)' in e]
    for ecometry_dir in ecometries_dir:
        trials = glob(ecometry_dir + '/*.json')
        estimated_speed = InitialEstimation(remove_saturation=True).predict(trials[0])
        
        if (estimated_speed is None) or (estimated_speed < 100):
            estimated_speed = 365
        
        depth = baseline_method.predict(ecometry_path=ecometry_dir,
                                        estimated_speed=estimated_speed)

        error = abs(well_depth - depth) if (depth) else None

        results.append([well_id, ecometry_dir, depth, error])

In [22]:
df_baseline = pd.DataFrame(results, columns=['pozo', 'ecometria', 'profundidad_estimada', 'error'])

In [23]:
df_baseline.error.describe()

count     11.000000
mean      70.305914
std       42.373942
min       12.340667
25%       47.255860
50%       67.015333
75%       83.026097
max      175.662602
Name: error, dtype: float64