# Machine Learning

## Modulos a emplear

In [1]:
# Importamos las librerias a utilizar
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import FunctionTransformer, PolynomialFeatures, scale, StandardScaler
from sklearn.decomposition import PCA
import sys
import os

# Funciones importadas como modulos
notebook_dir = os.getcwd()
scripts_dir = os.path.join(notebook_dir, "..", "src")
sys.path.append(scripts_dir)
from data_transform.print_unique_values import print_unique_values

# Codigo para imprimir mas de una salida de la misma celda
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Carga y selección de la Información

### Seleccionamos la informacion del pozo clave y la editamos

In [2]:
# Definimos la ruta de la informacion procesada
file_processed = "arroyo_procesed.csv"
path_data_processed = os.path.join(notebook_dir, "..", "data", "processed", "arroyo", file_processed)

# Cargamos la información procesada
arroyo_df = pd.read_csv(path_data_processed)

# Mostramos los pozos en el DataFrame
print_unique_values(arroyo_df, "wellname")

Valores únicos en la columna 'wellname': Arroyo Prieto-11, Arroyo Prieto-8


### Seleccionamos el pozo/pozos de interes para entrenar el modelo

In [3]:
# Seleccionamos el pozo/los pozos de interes
key_wells_lst = ["Arroyo Prieto-8"]
# Creamos el DataFrame key_well con la informacion de interes
key_well = arroyo_df[arroyo_df["wellname"].isin(key_wells_lst)].copy()

# Definimos el pozo a predecir
well_target = "Arroyo Prieto-11"

# Definimos las columnas de interes
key_well_tops = ["fs_e1", "fs_e2", "fs_e3", "fs_e4", "fs_e5", "fs_e6", "fs_e7", "fs_e8"]
features = ["por", "pore", "sw", "rhob", "nphi", "vcl"]

## Machine Learning

### Rhobmod

In [4]:
# Definimos el target de interes
target = ["rhobmod"]

# Creamos un DataFrame con la informacion necesario para el modelo de Machine Learning
key_well_df_ml = key_well[features + target + key_well_tops].copy()
key_well_df_ml.dropna(inplace=True)

# Definimos una variable con los features con los que entrenaremos nuestros modelo de ML y con la variable a predecir
X = key_well_df_ml[features + key_well_tops]
y = key_well_df_ml[target]

# Separamos los valores en set de entramiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Convertimos en un array 1D y_train
y_train = y_train.values.ravel()

# Definimos el modelo a emplear
rf_regressor = RandomForestRegressor(bootstrap=True, n_jobs=-1, criterion="poisson", random_state=50)

# Ajustamos nuestro modelo a nuestros datos de entrenamiento
rf_regressor.fit(X_train, y_train)

# Evaluamos nuestro modelo
y_train_pred = rf_regressor.predict(X_train)
y_test_pred = rf_regressor.predict(X_test)

rf_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
rf_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f'Random forest train/test RMSE: {rf_train: .3f}/{rf_test:.3f}')


Random forest train/test RMSE:  0.002/0.005


In [5]:
# Creamos el DataFrame predict_well con la informacion del pozo de interes
predict_well = arroyo_df[arroyo_df["wellname"] == well_target].copy()
predict_well = predict_well[["wellname", "md"] + features + key_well_tops]
predict_well.dropna(inplace=True)

# Creamos un DataFrame con la informacion para predecir
predict_well_ml = predict_well[features + key_well_tops].copy()

# Aplicamos el modelo para generar el registro sintetico
y_pred_well_log = rf_regressor.predict(predict_well_ml)

# Creamos la columna del registro sintetico
predict_well[target[0] +"_syn"] = y_pred_well_log

# Guardamos la curva en el dataset del pozo objetivo
qe_df = predict_well.copy()
qe_df[target[0] +"_syn"] = predict_well[target[0] +"_syn"]

### Vpmod

In [7]:
# Definimos las columnas de interes
target = ["vpmod"]

# Creamos un DataFrame con la informacion necesario para el modelo de Machine Learning
key_well_df_ml = key_well[features + target + key_well_tops].copy()
key_well_df_ml.dropna(inplace=True)

# Definimos una variable con los features con los que entrenaremos nuestros modelo de ML y con la variable a predecir
X = key_well_df_ml[features + key_well_tops]
y = key_well_df_ml[target]

# Separamos los valores en set de entramiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Convertimos en un array 1D y_train
y_train = y_train.values.ravel()

# Definimos el modelo a emplear
rf_regressor = RandomForestRegressor(bootstrap=True, n_jobs=-1, criterion="poisson", random_state=50)

# Ajustamos nuestro modelo a nuestros datos de entrenamiento
rf_regressor.fit(X_train, y_train)

# Evaluamos nuestro modelo
y_train_pred = rf_regressor.predict(X_train)
y_test_pred = rf_regressor.predict(X_test)

rf_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
rf_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f'Random forest train/test RMSE: {rf_train: .3f}/{rf_test:.3f}')

Random forest train/test RMSE:  21.375/65.213


In [8]:
# Creamos el DataFrame predict_well con la informacion del pozo de interes
predict_well = arroyo_df[arroyo_df["wellname"] == well_target].copy()
predict_well = predict_well[["wellname", "md"] + features + key_well_tops]

predict_well.dropna(inplace=True)

# Creamos un DataFrame con la informacion para predecir
predict_well_ml = predict_well[features + key_well_tops].copy()

# Aplicamos el modelo para generar el registro sintetico
y_pred_well_log = rf_regressor.predict(predict_well_ml)

# Creamos la columna del registro sintetico
predict_well[target[0] +"_syn"] = y_pred_well_log

# Guardamos la curva en el dataset del pozo objetivo
qe_df[target[0] +"_syn"] = predict_well[target[0] +"_syn"]

### Vsmod

In [9]:
# Definimos el target de interes
target = ["vsmod"]

# Creamos un DataFrame con la informacion necesario para el modelo de Machine Learning
key_well_df_ml = key_well[features + target + key_well_tops].copy()
key_well_df_ml.dropna(inplace=True)

# Definimos una variable con los features con los que entrenaremos nuestros modelo de ML y con la variable a predecir
X = key_well_df_ml[features + key_well_tops]
y = key_well_df_ml[target]

# Separamos los valores en set de entramiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Convertimos en un array 1D y_train
y_train = y_train.values.ravel()

# Definimos el modelo a emplear
rf_regressor = RandomForestRegressor(bootstrap=True, n_jobs=-1, criterion="poisson", random_state=50)

# Ajustamos nuestro modelo a nuestros datos de entrenamiento
rf_regressor.fit(X_train, y_train)

# Evaluamos nuestro modelo
y_train_pred = rf_regressor.predict(X_train)
y_test_pred = rf_regressor.predict(X_test)

rf_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
rf_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f'Random forest train/test RMSE: {rf_train: .3f}/{rf_test:.3f}')

Random forest train/test RMSE:  19.740/62.064


In [10]:
# Creamos el DataFrame predict_well con la informacion del pozo de interes
predict_well = arroyo_df[arroyo_df["wellname"] == well_target].copy()
predict_well = predict_well[["wellname", "md"] + features + key_well_tops]
predict_well.dropna(inplace=True)

# Creamos un DataFrame con la informacion para predecir
predict_well_ml = predict_well[features + key_well_tops].copy()

# Aplicamos el modelo para generar el registro sintetico
y_pred_well_log = rf_regressor.predict(predict_well_ml)

# Creamos la columna del registro sintetico
predict_well[target[0] +"_syn"] = y_pred_well_log

# Guardamos la curva en el dataset del pozo objetivo
qe_df[target[0] +"_syn"] = predict_well[target[0] +"_syn"]

## Guardamos el modelo de Machine Learning

In [12]:
qe_df

Unnamed: 0,wellname,md,por,pore,sw,rhob,nphi,vcl,fs_e1,fs_e2,fs_e3,fs_e4,fs_e5,fs_e6,fs_e7,fs_e8,rhobmod_syn,vpmod_syn,vsmod_syn
0,Arroyo Prieto-11,2304.5,0.000100,0.000100,1.000000,2.2648,0.3441,0.000000,False,False,False,False,False,False,False,False,2.251551,3535.197322,1900.668247
1,Arroyo Prieto-11,2305.0,0.000100,0.000100,1.000000,2.3013,0.3736,0.000000,False,False,False,False,False,False,False,False,2.254772,3391.255657,1813.471430
2,Arroyo Prieto-11,2305.5,0.000100,0.000100,1.000000,2.3317,0.4054,0.000000,False,False,False,False,False,False,False,False,2.267376,3387.064866,1814.153659
3,Arroyo Prieto-11,2306.0,0.000100,0.000100,1.000000,2.4099,0.3680,0.000000,False,False,False,False,False,False,False,False,2.345070,3397.940903,1825.627363
4,Arroyo Prieto-11,2306.5,0.000100,0.000100,1.000000,2.4747,0.3130,0.000000,False,False,False,False,False,False,False,False,2.404613,3647.640251,2004.254487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3754,Arroyo Prieto-11,4181.5,0.173766,0.127790,0.884350,2.4954,0.2631,0.459192,False,False,False,False,False,False,False,True,2.430013,3938.345709,2164.872218
3755,Arroyo Prieto-11,4182.0,0.166382,0.121542,0.783276,2.5246,0.2530,0.447864,False,False,False,False,False,False,False,True,2.464401,3990.530846,2182.021270
3756,Arroyo Prieto-11,4182.5,0.166962,0.122245,0.731104,2.4703,0.2518,0.446637,False,False,False,False,False,False,False,True,2.402987,4004.030726,2173.521294
3757,Arroyo Prieto-11,4183.0,0.173131,0.127211,0.735566,2.4679,0.2613,0.458658,False,False,False,False,False,False,False,True,2.401671,3950.286476,2137.543351


In [14]:
# Guardamos el registro sintetico en formato csv
synthetic_file_name = f"{well_target}_model_syn.csv"
path_to_save = os.path.join(notebook_dir, "..", "data", "processed", "arroyo", synthetic_file_name)
qe_df[["wellname", "md", "rhobmod_syn", "vpmod_syn", "vsmod_syn"]].to_csv(path_to_save, index=False)