# Vsh

### Goal: creating new features supported by petrophysical knowledge

### Jupyter Notebook display

In [1]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 60%; }
</style>
"""))

%matplotlib inline

### Import libraries

In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import re
import hvplot.pandas
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
data_path = os.environ.get("DATA_PATH")
df_train = pd.read_csv(data_path+"/train.csv", sep=';')
df_clusters = pd.read_csv(data_path+"/clusters_gmm.csv", sep=',')
wells = df_train['WELL'].unique()

### Integrating cluster feature in training set

In [4]:
df_clusters = df_clusters[df_clusters.Dataset == 'Train']

In [5]:
clusters = df_clusters[['WELL', 'cluster']]

In [6]:
df_train_cluster = df_train.merge(clusters, how='left', on='WELL', validate='many_to_one')

### GR distribution across the clusters

In [9]:
df_train_cluster.hvplot.box(y='GR', by='cluster', height=800, width=800, legend=True)

In [10]:
df_train_cluster.hvplot.box(y='GR', height=800, width=800, legend=True)

### Vsh per well - GR-based

In [11]:
GRmin = []
GRmax = []
for well in wells:
    GRmin.append(df_train_cluster.loc[df_train_cluster['WELL']==well].GR.describe(percentiles=[0.1, 0.9]).loc['10%'])
    GRmax.append(df_train_cluster.loc[df_train_cluster['WELL']==well].GR.describe(percentiles=[0.1, 0.9]).loc['90%'])
wells_list = list(wells)
d = {'WELL': wells_list, 'GRmin': GRmin, 'GRmax': GRmax}
df_GRmin_GRmax = pd.DataFrame.from_dict(d)
df_train_cluster = df_train_cluster.merge(df_GRmin_GRmax, how='left', on='WELL', validate='many_to_one')
df_train_cluster['Vsh_linear'] = (df_train_cluster['GR']-df_train_cluster['GRmin'])/(df_train_cluster['GRmax']-df_train_cluster['GRmin'])
df_train_cluster['Vsh_Stieber'] = df_train_cluster['Vsh_linear']/(3.0 - (2.0*df_train_cluster['Vsh_linear']))
df_train_cluster['Vsh_Clavier'] = 1.7-(3.38-(df_train_cluster['Vsh_linear']+0.7)**2)**0.5

### Vsh - RHOB-NPHI-based

In [12]:
NPHIshale = 0.3
PORshale = 0.15
df_train_cluster['RHOBpor'] = (2.65 - df_train_cluster['RHOB'])/1.65
df_train_cluster['Vsh_DN'] = (df_train_cluster['NPHI'] - df_train_cluster['RHOBpor'])/(NPHIshale - PORshale)

### Vsh per group - GR-based

In [13]:
GRmin_group = []
GRmax_group = []
groups = df_train_cluster['GROUP'].unique()
for group in groups:
    GRmin_group.append(df_train_cluster.loc[df_train_cluster['GROUP']==group].GR.describe(percentiles=[0.1, 0.9]).loc['10%'])
    GRmax_group.append(df_train_cluster.loc[df_train_cluster['GROUP']==group].GR.describe(percentiles=[0.1, 0.9]).loc['90%'])
group_list = list(groups)
d = {'GROUP': group_list, 'GRmin_GROUP': GRmin_group, 'GRmax_GROUP': GRmax_group}
df_GRmin_GRmax_group = pd.DataFrame.from_dict(d)
df_train_cluster = df_train_cluster.merge(df_GRmin_GRmax_group, how='left', on='GROUP', validate='many_to_one')
df_train_cluster['Vsh_linear_GROUP'] = (df_train_cluster['GR']-df_train_cluster['GRmin_GROUP'])/(df_train_cluster['GRmax_GROUP']-df_train_cluster['GRmin_GROUP'])
df_train_cluster['Vsh_Stieber_GROUP'] = df_train_cluster['Vsh_linear_GROUP']/(3.0 - (2.0*df_train_cluster['Vsh_linear_GROUP']))
df_train_cluster['Vsh_Clavier_GROUP'] = 1.7-(3.38-(df_train_cluster['Vsh_linear_GROUP']+0.7)**2)**0.5

In [14]:
df_train_cluster.hvplot.scatter(x='GR', y=['Vsh_linear', 'Vsh_Stieber', 'Vsh_Clavier'], groupby='WELL',
                value_label='Vsh', xlim=(0,200), ylim=(0,1), legend='top', height=800, width=800)