# 1. Data loading

In [None]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np

filename = 'NorthSeaData/FORCE_2020_train.csv' # nome do dado de entrada
df = pd.read_csv(filename) # leitura do dado de entrada

In [None]:
### number feature (well log profiling) visualization
df.columns

In [None]:
# indata 
training_data = df[['WELL', 
                    'DEPTH_MD', 
                    'RMED', 
                    'RDEP', 
                    'RHOB', 
                    'GR', 
                    'NPHI',
                    'DTC', 
                    'PEF', 
                    'FORCE_2020_LITHOFACIES_LITHOLOGY']].copy()

In [None]:
# indata to use 
df = df[['WELL', 
         'DEPTH_MD', 
         'RMED', 
         'RDEP', 
         'RHOB', 
         'GR', 
         'NPHI',
         'DTC', 
         'PEF', 
         'FORCE_2020_LITHOFACIES_LITHOLOGY']].copy()

In [None]:
df.columns

In [None]:
# List the number of wells
for well in df['WELL'].unique():
    
    print(well)

In [None]:
# List of headers
plot_cols = ['WELL', 'DEPTH_MD','RMED', 'RDEP', 'RHOB', 'GR', 'NPHI',
             'DTC', 'PEF','FORCE_2020_LITHOFACIES_LITHOLOGY']

In [None]:
df = df[plot_cols]

In [None]:
df.head()

In [None]:
data_nan = df.copy()

In [None]:
for num, col in enumerate(data_nan.columns[2:]):
    data_nan[col] = data_nan[col].notnull() * (num + 1)
    data_nan[col].replace(0, num, inplace=True)
    print(col, num) #Print out the col name and number to verify it works

In [None]:
data_nan.describe()

# 2. Plotting the Data with and without NaN

In [None]:
grouped = data_nan.groupby('WELL')

In [None]:
#Setup the labels we want to display on the x-axis
#labels = ['RMED','RDEP', 'RHOB','GR', 'NPHI', 'DTC'] # 6 features

#labels = ['CALI','RMED', 'RDEP', 'RHOB', 'GR', 'NPHI', 'PEF','DTC', 'SP','DTS','DRHO', 'RMIC','RXO'] # 13 features
labels = ['RMED', 'RDEP', 'RHOB', 'GR', 'NPHI','DTC', 'PEF']


#Setup the figure and the subplots
fig, axs = plt.subplots(3, 4, figsize=(20,10))

#Loop through each well and column in the grouped dataframe
for (name, df), ax in zip(grouped, axs.flat):
    #ax.set_xlim(0,5) # 6 features
    ax.set_xlim(0,6) # 9 features
    
    #Setup the depth range
    ax.set_ylim(4000, 0)
    
    #Create multiple fill betweens for each curve# This is between
    # the number representing null values and the number representing
    # actual values
    
    #ax.fill_betweenx(df.DEPTH_MD, 0, df.CALI, facecolor='grey')
    ax.fill_betweenx(df.DEPTH_MD, 0, df.RMED, facecolor='lightgrey')
    ax.fill_betweenx(df.DEPTH_MD, 1, df.RDEP, facecolor='mediumseagreen')
    ax.fill_betweenx(df.DEPTH_MD, 2, df.RHOB, facecolor='lightblue')
    ax.fill_betweenx(df.DEPTH_MD, 3, df.GR, facecolor='lightcoral')
    ax.fill_betweenx(df.DEPTH_MD, 4, df.NPHI, facecolor='violet')
    ax.fill_betweenx(df.DEPTH_MD, 5, df.DTC, facecolor='darksalmon')
    ax.fill_betweenx(df.DEPTH_MD, 6, df.PEF, facecolor='red')
    #ax.fill_betweenx(df.DEPTH_MD, 6, df.SP, facecolor='thistle')
  
    
    #Setup the grid, axis labels and ticks
    ax.grid(axis='x', alpha=0.5, color='black')
    ax.set_ylabel('DEPTH (m)', fontsize=14, fontweight='bold')
    
    #Position vertical lines at the boundaries between the bars
    ax.set_xticks([1,2,3,4,5,6,7], minor=False)
    
    #Position the curve names in the centre of each column
    ax.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5], minor=True)
    
    #Setup the x-axis tick labels
    ax.set_xticklabels(labels,  rotation='vertical', minor=True, verticalalignment='bottom')
    ax.set_xticklabels('', minor=False)
    ax.tick_params(axis='x', which='minor', pad=-7)
    
    #Assign the well name as the title to each subplot
    ax.set_title(name, fontsize=16, fontweight='bold')

plt.savefig('missingdata_northsea.pdf')
plt.tight_layout()
plt.subplots_adjust(hspace=0.15, wspace=0.25)
plt.show()


# 3. Select the headers to use in the in-data

In [None]:
training_data.rename(columns={'FORCE_2020_LITHOFACIES_LITHOLOGY':'FACIES'}, inplace=True)

In [None]:
training_data

# 4. Column Remapping / Renaming

In [None]:
lithology_numbers = {30000: 'Sandstone', # sandybrown
                     65030: 'Sandstone/Shale', #darkgoldenrod
                     65000: 'Shale', # olive
                     80000: 'Marl', #gainsboro
                     74000: 'Dolomite',
                     70000: 'Limestone',
                     70032: 'Chalk',
                     88000: 'Halite',
                     86000: 'Anhydrite',
                     99000: 'Tuff',
                     90000: 'Coal',
                     93000: 'Basement'}

second dictionary to tranform in integer

In [None]:
simple_lithology_numbers = {30000: 1,
                            65030: 2,
                            65000: 3,
                            80000: 4,
                            74000: 5,
                            70000: 6,
                            70032: 7,
                            88000: 8,
                            86000: 9,
                            99000: 10,
                            90000: 11,
                            93000: 12}

In [None]:
training_data['LITH'] = training_data['FACIES'].map(lithology_numbers)

In [None]:
training_data['LITH_SI'] = training_data['FACIES'].map(simple_lithology_numbers)

# 5. View the number of samples of the whole data

In [None]:
#plot the count of Facies
training_data['LITH_SI'].value_counts().sort_index().plot(kind='bar')
print(training_data['LITH_SI'].value_counts().sort_index())
X_ind = np.arange(0,11,1)
plt.title('Number of samples')
plt.xticks(X_ind,['Sandstone',
                  'Sandstone/Shale',
                  'Shale',
                  'Marl',
                  'Dolomite',
                  'Limestone',
                  'Chalk',
                  'Halite',
                  'Anhydrite',
                  'Tuff',
                  'Coal'])
plt.show()

# 6. Crossplot RHOB and NPHI (whole data)

In [None]:
import seaborn as sns

g = sns.FacetGrid(training_data, col='LITH', col_wrap=4)
g.map(sns.scatterplot, 'NPHI', 'RHOB', alpha=0.5)
g.set(xlim=(-0.15, 1))
g.set(ylim=(3, 1))
plt.show()

In [None]:
# remove NaN
training_data.dropna(inplace=True)

In [None]:
for well in training_data['WELL'].unique():
    
    print(well)

# 7. sorting out the blind test well

In [None]:
blind = training_data[training_data['WELL'] == '16/2-16'] #seleciona um poço apenas do dado
training_data = training_data[training_data['WELL'] != '16/2-16'] #remove o poço do dado
blind

In [None]:
training_data['WELL'].unique()

In [None]:
import seaborn as sns

g = sns.FacetGrid(training_data, col='LITH', col_wrap=4)
g.map(sns.scatterplot, 'NPHI', 'RHOB', alpha=0.5)
g.set(xlim=(-0.15, 1))
g.set(ylim=(3, 1))
plt.show()

Two lithofacoes are exluded from data after dropping NaN.

In [None]:
#plot the count of Facies
blind['LITH_SI'].value_counts().sort_index().plot(kind='bar')
print(blind['LITH_SI'].value_counts().sort_index())
X_ind = np.arange(0,7,1)
plt.title('Samples - Blind well')
plt.xticks(X_ind,['Sandstone',
                  'Sandstone/Shale',
                  'Shale',
                  'Marl',
                  'Limestone',
                  'Anhydrit','Tuff'])
plt.show()

In [None]:
#['WELL', 'DEPTH_MD', 'RDEP', 'RHOB','GR', 'NPHI', 'PEF', 'DTC','SP']
#col_list = ['LITH_SI','RDEP', 'RHOB','GR', 'NPHI', 'PEF', 'DTC','SP']

col_list = ['LITH_SI','RMED', 'RDEP', 'RHOB', 'GR', 'NPHI','DTC', 'PEF']



plt.figure(figsize=(15,10))
i=0
for col in col_list:
    i+=1
    plt.subplot(3,4,i)
    plt.hist(training_data[col])
    plt.title(col)
plt.show()

In [None]:
#plot the count of Facies
training_data['LITH_SI'].value_counts().sort_index().plot(kind='bar')
print(training_data['LITH_SI'].value_counts().sort_index())
X_ind = np.arange(0,10,1)
plt.title('Samples - Training wells')
plt.xticks(X_ind,['Sandstone',
                  'Sandstone/Shale',
                  'Shale',
                  'Marl',
                  'Dolomite',
                  'Limestone',
                  'Chalk',
                  'Halite',
                  'Tuff',
                  'Coal'])
plt.show()

# 8. Prepare data for modeling and blind test well


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import cross_val_score

from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.metrics import classification_report

In [None]:
features = ['RMED', 'RDEP', 'RHOB', 'GR', 'NPHI','DTC', 'PEF']



y = training_data['LITH_SI']
X = training_data[features]

In [None]:
### Data for modelling

#scaler = StandardScaler().fit(X)
#X_stnd = scaler.transform(X)

# standarization of data for SVM
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X.corr()

In [None]:
### Blind test well

y_blind = blind['LITH_SI']
X_blind = blind[features]
X_blind_stnd = sc.transform(X_blind)

In [None]:
#Plot loss and accuracy

import matplotlib.pyplot as plt
def plot_history(history):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [1000$]')
    plt.plot(history.epoch, np.array(history.history['mae']), label='Train')
    plt.plot(history.epoch, np.array(history.history['val_mae']),label = 'Val')
    plt.legend()
    plt.ylim([0,max(history.history['val_mae'])])

def plot_prediction(test_labels, test_predictions):
    plt.figure()
    plt.scatter(test_labels, test_predictions)
    plt.xlabel('True Values [1000$]')
    plt.ylabel('Predictions [1000$]')
    plt.axis('equal')
    plt.xlim(plt.xlim())
    plt.ylim(plt.ylim())
    _ = plt.plot([-100, 100],[-100,100])

    plt.figure()
    error = test_predictions - test_labels
    plt.hist(error, bins = 50)
    plt.xlabel("Prediction Error [1000$]")
    _ = plt.ylabel("Count")

In [None]:
def plot_confusion_matrix(cm,
                          classes,
                          normalize,
                          title='Confusion matrix',
                          cmap=plt.cm.Greys):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    import itertools
    

    if normalize:
        
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center", verticalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

# 9. Parameter optimization and classifier training

Modeling algorithms:
1. SVM
2. Gradient boosting
3. Random forest
4. KNN

In [None]:
from sklearn.svm import SVC # To use Support Vector Machine
from sklearn import ensemble # To use Gradient Boosting and Random forest
from sklearn.neighbors import KNeighborsClassifier # To use KNN
from sklearn.metrics import f1_score


### 9.1 SVM: Parameter optimiztion

In [None]:

# c_list = [0.01, 1, 5, 10, 20, 50, 100, 1000, 5000, 10000]
# gamma_list = [0.0001, 0.001, 0.01, 0.1, 1, 10]
# #gamma_list = [0.0001]
# i = 0
# plt.figure(figsize=(15,10))

# for gamma_value in gamma_list:
#     i = i + 1
#     scores = list()
#     score_stds = list()
#     score_tests = list()
#     #print('interations gamma_list =',i)
#     j = 0
#     for c_value in c_list:
        
#         j = j + 1
#         print('interations c_list =',j)
        
#         clf_cv = SVC(C=c_value, gamma=gamma_value)
        
#         cv_score = cross_val_score(clf_cv, X_train, y_train)
        
#         scores.append(np.mean(cv_score))
#         score_stds.append(np.std(cv_score))
#         clf_cv.fit(X_train, y_train)
        
#         score_test = clf_cv.score(X_test, y_test)
#         score_tests.append(score_test)
    
#     plt.subplot(2,3,i)
#     plt.semilogx(c_list, scores, label='Train error')
#     plt.semilogx(c_list, score_tests, label='Cross-validation error')
#     #plt.semilogx(c_list, np.array(scores)+np.array(score_stds), 'b--')
#     #plt.semilogx(c_list, np.array(scores)+-np.array(score_stds), 'b--')
#     plt.title('Gamma = {}'.format(gamma_value))
#     plt.xlabel('C values')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)

# plt.show()

SVM classifier training

In [None]:
clf = SVC(C=10, gamma=1)
clf.fit(X_train, y_train)

In [None]:
training_features = ['Ss',
                  'Ss/Sh',
                  'Sh',
                  'M',
                  'D',
                  'L',
                  'Ch',
                  'H',
                  'T',
                  'C']

In [None]:
list_blind_full = ['Ss',
                  'Ss/Sh',
                  'Sh',
                  'M',
                  'L',
                  'Ch',
                  'A',
                  'T',
                  'C']

In [None]:

pred_test = clf.predict(X_test)
print(classification_report(y_test, pred_test))
cm_test_SVM = confusion_matrix(y_test, pred_test)
plot_confusion_matrix(cm_test_SVM, training_features, normalize=True)

In [None]:
microF1_test_SVM = f1_score(y_test, pred_test, average='micro')
print('Test Macro f1 score:', microF1_test_SVM)

In [None]:
pred_blind = clf.predict(X_blind_stnd)
print(classification_report(y_blind, pred_blind, target_names=list_blind_full,zero_division=0))
cm_SVM = confusion_matrix(y_blind, pred_blind)
plot_confusion_matrix(cm_SVM, list_blind_full, normalize=True)

In [None]:
# for i_well in range(0,10):
    
#     aux_pred_svm = clf.predict(X_blind_stnd[i_well])
#     microF1_blind_SVM = f1_score(y_blind[i_well], aux_pred_svm, average='micro')
#     print('Blind micro f1 score:', microF1_blind_SVM)
aux_pred_svm = clf.predict(X_blind_stnd)
microF1_blind_SVM = f1_score(y_blind, aux_pred_svm, average='micro')
print('Blind micro f1 score:', microF1_blind_SVM)

### 9.2 Gradient boosting (GB): Parameter optimiztion

In [None]:
# why do not fit and transform  GRADIENT BOOST
X1_train, X1_test, y1_train, y1_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
# loss_list = ['deviance']
# max_depths = [2,3,4,5,6]

# i = 0
# plt.figure(figsize=(15,5))


# for los in loss_list:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()

#     for depth in max_depths:
        
#         params = {'loss': los, ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': 0.1, ##  learning_ratefloat, default=0.1
#                   'n_estimators': 500, ##  number of iterations, int, default=100
#                   'max_depth': depth, ##  int, default=3
#                   'subsample': 1, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                  }
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
        
#         # Train data
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
        
#         # Test data
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
    
#     plt.subplot(1,2,i)
#     plt.plot(max_depths, scores, 'o-', color='b', label='Train')
#     plt.plot(max_depths, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('Loss = {}'.format(los))
#     plt.xlabel('Max depth')
#     plt.ylabel('Accuracy')
    
    
#     #ax.semilogx(C_range, cv_errors, label='CV error')
#     #ax.semilogx(C_range, train_errors, label='Train error')
    
#     plt.ylim(0,1.1)

# plt.show()

How you could see, there is convergen with 4 deepths

In [None]:
# learning_rates = [0.001, 0.01, 0.1, 0.2, 0.4]
# n_estimators_list = [100, 500, 1000]

# i = 0
# plt.figure(figsize=(15,5))

# for est in n_estimators_list:
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for rate in learning_rates:
#         params = {'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': rate, ##  learning_ratefloat, default=0.1
#                   'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': 4, ##  int, default=3
#                   'subsample': 1, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                   }
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
        
#     plt.subplot(1,3,i)
#     plt.semilogx(learning_rates, scores, 'o-', color='b', label='Train')
#     plt.semilogx(learning_rates, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('N estimators = {}'.format(est))
#     plt.xlabel('learning rate')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)

# plt.show()

N_estimators =500 and learning rate = 0.1

In [None]:
# subsamples = [0.2, 0.6, 1]
# n_estimators_list = [500, 1000, 2000]

# i = 0
# plt.figure(figsize=(15,5))

# for est in n_estimators_list:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for sub in subsamples:
        
#         params = {'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': 0.1, ##  learning_ratefloat, default=0.1
#                   'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': 5, ##  int, default=3
#                   'subsample': sub, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                   }
        
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
#         clf_cv.fit(X_train, y_train)
#         cv_score = clf_cv.score(X_train, y_train)
        
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X_test, y_test)
#         score_tests.append(score_test)
        
        
#     plt.subplot(1,3,i)
#     plt.plot(subsamples, scores, 'o-', color='b', label='Train')
#     plt.plot(subsamples, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('n_estimators = {}'.format(est))
#     plt.xlabel('sub samples')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)

# plt.show()

Based on the accuracy plot, max_depth=4, learning_rate=0.1, n_estimators=500, subsample=0.6

Gradient Boosting classifier training

In [None]:
#target_list_gb = ['Sandstone',
 #              'Sandstone/Shale',
  #             'Shale','Marl',
   #            'Dolomite',
    #           'Limestone',
     #          'Chalk',
      #         'Tuff']

params = {#'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
          'learning_rate': 0.1, ##  learning_ratefloat, default=0.1
          'n_estimators': 500, ##  number of iterations, int, default=100
          'max_depth': 3, ##  int, default=3
          'subsample': 1, ## float, default=1.0
          'min_samples_split': 2 ## int or float, default=2
          }
clf_GB = ensemble.GradientBoostingClassifier(**params)
clf_GB.fit(X1_train, y1_train)
preds_GB = clf_GB.predict(X1_test)

print(classification_report(y1_test, preds_GB))
cm_test_GB = confusion_matrix(y1_test, preds_GB)
plot_confusion_matrix(cm_test_GB, training_features, normalize=True)

In [None]:
microF1_test_gb = f1_score(y1_test, preds_GB, average='micro')
print('Test Micro f1 score:', microF1_test_gb)

In [None]:
list_blind_full = ['Ss',
                  'Ss/Sh',
                  'Sh',
                  'M',
                   'D',
                  'L',
                  'Ch',
                  'A',
                  'T',
                  'C']

pred_GB_blind = clf_GB.predict(X_blind)
print(classification_report(y_blind, pred_GB_blind))
cm_GB = confusion_matrix(y_blind, pred_GB_blind)
plot_confusion_matrix(cm_GB, list_blind_full, normalize=True)

In [None]:
# for i_well in range(0,10):
    

#     aux_pred_GB_blinda = clf_GB.predict(X_blind[i_well])

#     microF1_blind_GB = f1_score(y_blind[i_well], aux_pred_GB_blinda, average='micro')
    
#     print('Blind micro f1 score:', microF1_blind_GB)
microF1_blind_GB = f1_score(y_blind, pred_GB_blind, average='micro')    
print('Blind micro f1 score:', microF1_blind_GB)

### 9.3. Random forest (RF) parameter optimization

In [None]:
# max_depths = [2, 3, 4]
# n_estimators_list = [100, 500, 1000, 2000, 5000]

# i = 0
# plt.figure(figsize=(15,5))

# for depth in max_depths:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for est in n_estimators_list:
#         params = {'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': depth, ##  int, default=None
#                   'n_jobs': -1 #to speed up computations by taking advantage of parallel processing.
                  
#                   }
#         clf_cv = ensemble.RandomForestClassifier(**params)
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
        
#     plt.subplot(1,4,i)
#     plt.plot(n_estimators_list, scores, color='b', label='Train')
#     plt.plot(n_estimators_list, score_tests, color='r', label='Test')
#     plt.legend()
#     plt.title('max depth = {}'.format(depth))
#     plt.xlabel('n_estimators')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)

# scores = list()
# score_tests = list()

# for est in n_estimators_list:
    
#     clf_cv = ensemble.RandomForestClassifier(n_estimators=est)
#     clf_cv.fit(X1_train, y1_train)
#     cv_score = clf_cv.score(X1_train, y1_train)
#     scores.append(np.mean(cv_score))
#     score_test = clf_cv.score(X1_test, y1_test)
#     score_tests.append(score_test)
    
# plt.subplot(1,4,4)
# plt.plot(n_estimators_list, scores, color='b', label='Train')
# plt.plot(n_estimators_list, score_tests, color='r', label='Test')
# plt.legend()
# plt.title('max depth = {}'.format('None'))
# plt.xlabel('n_estimators')
# plt.ylabel('Accuracy')
# plt.ylim(0,1.1)
# plt.show()

Max_depth can be default (None), and n_estimator = 2000 gives best accuracy.

Random forest classifier training

In [None]:
clf_RF = ensemble.RandomForestClassifier(n_estimators=2000, criterion='gini')
#print(cross_val_score(clf_RF, X1_train, y1_train, cv=5))
clf_RF.fit(X1_train, y1_train)
preds_RF = clf_RF.predict(X1_test)
print(classification_report(y1_test, preds_RF))
cm_test_RF = confusion_matrix(y1_test, preds_RF)
plot_confusion_matrix(cm_test_RF, training_features, normalize=True)

In [None]:
microF1_test_rf = f1_score(y1_test, preds_RF, average='micro')
print('Test Macro f1 score:', microF1_test_rf)

Random forest blind predction

In [None]:
list_blind_full = ['Ss',
                  'Ss/Sh',
                  'Sh',
                  'M',
                   'D',
                  'L',
                  'Ch',
                  'A',
                  'T']

preds_RF_blind = clf_RF.predict(X_blind)
print(classification_report(y_blind, preds_RF_blind))
cm_RF = confusion_matrix(y_blind, preds_RF_blind)
plot_confusion_matrix(cm_RF, list_blind_full, normalize=True)

In [None]:
microF1_blind_rf = f1_score(y_blind, preds_RF_blind, average='micro')
print('Test Micro f1 score:', microF1_blind_rf)

### 9.4. KNN Parameter optimzation

In [None]:
# neighbor_list = [5, 7, 10]
# weight_list = ['uniform', 'distance']
# i=0

# for weight in weight_list:
    
    
#     scores = list()
#     score_tests = list()
#     i = i + 1
    
#     for neighbor in neighbor_list:
#         clf_cv = KNeighborsClassifier(n_neighbors=neighbor, weights=weight)
#         clf_cv.fit(X1_train, y1_train)
#         scores.append(clf_cv.score(X1_train, y1_train))
#         score_tests.append(clf_cv.score(X1_test, y1_test))
#         print(scores)
        
#     plt.subplot(1,3,i)
#     plt.plot(neighbor_list, scores, 'b')
#     plt.plot(neighbor_list, score_tests, 'r')
#     plt.title('Weight = {}'.format(weight))
#     plt.xlabel('Number of neighbors')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)
# plt.show()

Using weight has a better KNN modeling score.

KNN classifer training

In [None]:
clf_knn = KNeighborsClassifier(weights='distance')
print(cross_val_score(clf_knn, X1_train, y1_train, cv=5))

In [None]:
clf_knn = KNeighborsClassifier(weights='distance')
clf_knn.fit(X1_train, y1_train)
preds_knn = clf_knn.predict(X1_test)

print(classification_report(y1_test, preds_knn))
cm_test_knn = confusion_matrix(y1_test, preds_knn)
plot_confusion_matrix(cm_test_knn, training_features, normalize=True)

In [None]:
microF1_test_knn = f1_score(y1_test, preds_knn, average='micro')
print('Test Macro f1 score:', microF1_test_knn)

KNN blind well prediction

In [None]:
list_blind_rf = ['Ss',
              'Ss/Sh',
              'Sh',
              'M',
                 'D',
                 'L',
                 'Ch',
                 'A','T','C']


preds_knn_blind = clf_knn.predict(X_blind)
print(classification_report(y_blind, preds_knn_blind))
cm_knn = confusion_matrix(y_blind, preds_knn_blind)
plot_confusion_matrix(cm_knn, list_blind_rf, normalize=True)

In [None]:
microF1_blind_knn = f1_score(y_blind, preds_knn_blind, average='micro')
print('Test Macro f1 score:', microF1_blind_knn)

### 9.5 CNN

In [None]:
import random
import numpy as np
import tensorflow as tf
random.seed(10)
np.random.seed(10)
tf.random.set_seed(10)
from tensorflow import keras
import pandas as pd


print(tf.__version__)

In [None]:
print(X_train.shape)
print(X_train[1].shape)
print(X_train[0])

In [None]:
sample_size = X_train.shape[0] # number of samples in train set
time_steps  = X_train.shape[1] # number of features in train set
input_dimension = 1               # each feature is represented by 1 number

train_data_reshaped = X_train.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])

In [None]:
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
test_data_reshaped.shape

In [None]:
def build_conv1D_model():

    n_timesteps = train_data_reshaped.shape[1] #
    n_features  = train_data_reshaped.shape[2] # 
       
    
    model = keras.Sequential(name="model_conv1D")
    
    # 1st layer
    ks = 2
    model.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=ks, strides=1, padding='valid', activation='relu', name="Conv1D_1"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=ks, strides=1, padding='valid', activation='relu', name="Conv1D_2"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=ks, strides=1, padding='valid', activation='relu', name="Conv1D_3"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=ks, strides=1, padding='valid', activation='relu', name="Conv1D_4"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    
    #model.add(keras.layers.MaxPooling1D(pool_size=1, name="MaxPooling1D_fisrt"))
    
    # Dense
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(12, activation='softmax'))


    optimizer_aux = tf.keras.optimizers.Adam()
    model.compile(loss = "sparse_categorical_crossentropy", optimizer = optimizer_aux ,metrics = ['accuracy'])
    
    return model

model_conv1D = build_conv1D_model()
model_conv1D.summary()


In [None]:
earlystoping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                patience=5,
                                                verbose=1,
                                                mode='auto',
                                                restore_best_weights=True)
checkpoint_filepath = 'weights.{epoch:02d}-{val_loss:.2f}.h5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                      save_weights_only=True,
                                                      monitor='val_accuracy',
                                                      mode='max',
                                                      verbose=1,
                                                      save_best_only=True)

In [None]:
history_cnn = model_conv1D.fit(train_data_reshaped, y_train, validation_data = (test_data_reshaped,y_test),
                           batch_size = 512, 
                           callbacks = [model_checkpoint,earlystoping],
                           epochs = 1000,
                           verbose=1)

In [None]:
plt.plot(history_cnn.history['loss'])
plt.plot(history_cnn.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])


In [None]:
plt.plot(history_cnn.history['accuracy'])
plt.plot(history_cnn.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])

plt.show()

In [None]:
pred_test_cnn = model_conv1D.predict(test_data_reshaped)

In [None]:
pred_test_cnn = tf.argmax(pred_test_cnn, axis=1)

In [None]:
test_loss, test_acc = model_conv1D.evaluate(test_data_reshaped,  y_test, verbose=2)

In [None]:
print(classification_report(y_test, pred_test_cnn, target_names=training_features))
cm_test_cnn = confusion_matrix(y_test, pred_test_cnn)
plot_confusion_matrix(cm_test_cnn, training_features, normalize=True)

In [None]:
microF1_test_cnn = f1_score(y_test, pred_test_cnn, average='micro')
print('Test Macro f1 score:', microF1_test_cnn)

In [None]:
X_blind_reshaped = X_blind_stnd.reshape(X_blind_stnd.shape[0],X_blind_stnd.shape[1],1)
X_blind_reshaped.shape

In [None]:
aux = model_conv1D.predict(X_blind_reshaped)

In [None]:
pred_blind_cnn = tf.argmax(aux, axis=1)

In [None]:
print(classification_report(y_blind, pred_blind_cnn))
cm_cnn = confusion_matrix(y_blind, pred_blind_cnn)
plot_confusion_matrix(cm_cnn, list_blind_full, normalize=True)

In [None]:
microF1_blind_cnn = f1_score(y_blind, pred_blind_cnn, average='micro')
print('Test Macro f1 score:', microF1_blind_cnn)

### 9.6 CNN (RBF)

In [None]:
# import keras
# from keras.layers import Layer
# from keras import backend as K

# class RBFLayer(Layer):
#     def __init__(self, units, gamma, ** kwargs):
#         super(RBFLayer, self).__init__( ** kwargs)
#         self.units = units
#         self.gamma = K.cast_to_floatx(gamma)

#     def build(self, input_shape):
#         self.mu = self.add_weight(name = 'mu',
#                                   shape = (int(input_shape[1]), self.units),
#                                   initializer = 'uniform',
#                                   trainable = True)
#         super(RBFLayer, self).build(input_shape)

#     def call(self, inputs):
#         diff = K.expand_dims(inputs) - self.mu
#         l2 = K.sum(K.pow(diff, 2), axis = 1)
#         res = K.exp(-1 * self.gamma * l2)
#         return res
    
#     def compute_output_shape(self, input_shape):
#         return (input_shape[0], self.units)
from rbflayer import RBFLayer, InitCentersRandom

In [None]:
def build_conv1D_rbf_model():
    #
    n_timesteps = train_data_reshaped.shape[1] #
    n_features  = train_data_reshaped.shape[2] # 
    #
    model_rbf = keras.Sequential(name="model_conv1D_rbf")
    # 1st layer
    ks = 2
    mp=1
    f=128
    model_rbf.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_1"))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_2"))
    model_rbf.add(keras.layers.MaxPooling1D(pool_size=mp))
    model_rbf.add(keras.layers.Dropout(0.2))
    model_rbf.add(keras.layers.BatchNormalization())
    # # 2nd layer
    model_rbf.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_3"))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_4"))
    model_rbf.add(keras.layers.MaxPooling1D(pool_size=mp))
    model_rbf.add(keras.layers.Dropout(0.2))
    model_rbf.add(keras.layers.BatchNormalization())
    # # 3rd layer
    model_rbf.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_5"))
    model_rbf.add(keras.layers.Conv1D(filters=f, kernel_size=ks, activation='relu', name="Conv1D_6"))
    model_rbf.add(keras.layers.MaxPooling1D(pool_size=mp))
    model_rbf.add(keras.layers.Dropout(0.2))
    model_rbf.add(keras.layers.BatchNormalization())
    # Dense
    model_rbf.add(keras.layers.Flatten())
    model_rbf.add(keras.layers.Dense(512, activation='relu'))
    model_rbf.add(keras.layers.Dropout(0.2))
    model_rbf.add(keras.layers.Dense(12, activation='softmax'))


    optimizer_aux = tf.keras.optimizers.Adam()
    model_rbf.compile(loss = "sparse_categorical_crossentropy", optimizer = optimizer_aux ,metrics = ['accuracy'])
    
    return model_rbf

model_conv1D_rbf = build_conv1D_rbf_model()
model_conv1D_rbf.summary()


In [None]:
history_rbf = model_conv1D_rbf.fit(
    train_data_reshaped, 
    y_train, 
    epochs = 1000,
    steps_per_epoch=len(train_data_reshaped)/10,
    validation_data = (test_data_reshaped,y_test),
    validation_steps= len(test_data_reshaped),
    batch_size = 512, 
    callbacks = [model_checkpoint,earlystoping], 
    
    verbose=1)

In [None]:
plt.plot(history_rbf.history['loss'])
plt.plot(history_rbf.history['val_loss'])
plt.title('model loss CNN (RBF)')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])


In [None]:
plt.plot(history_rbf.history['accuracy'])
plt.plot(history_rbf.history['val_accuracy'])
plt.title('model accuracy CNN (RBF)')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])

plt.show()

In [None]:
pred_test_rbf = model_conv1D_rbf.predict(test_data_reshaped)

In [None]:
pred_test_rbf = tf.argmax(pred_test_rbf, axis=1)

In [None]:
print(classification_report(y_test, pred_test_rbf))
cm_test_rbf = confusion_matrix(y_test, pred_test_rbf)
plot_confusion_matrix(cm_test_rbf, training_features, normalize=True)

In [None]:
microF1_test_rbf = f1_score(y_test, pred_test_rbf, average='micro')
print('Test Macro f1 score:', microF1_test_rbf)

In [None]:
pred_blind_rbf = model_conv1D_rbf.predict(X_blind_reshaped)

In [None]:
pred_blind_rbf = tf.argmax(pred_blind_rbf, axis=1)

In [None]:
list_blind = ['Ss',
              'Ss/Sh',
              'Sh',
              'M',
              'L',
              'T']

print(classification_report(y_blind, pred_blind_rbf))
cm_rbf = confusion_matrix(y_blind, pred_blind_rbf)
plot_confusion_matrix(cm_rbf, list_blind_full, normalize=True)

### 9.7. MLP

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(50,50)).fit(X_train, y_train)

In [None]:
pred_mlp_test = mlp.predict(X_test)

In [None]:
print(classification_report(y_test, pred_mlp_test, target_names=training_features))
cm_test_MLP = confusion_matrix(y_test, pred_mlp_test)
plot_confusion_matrix(cm_test_MLP, training_features, normalize=True)

In [None]:
microF1_test_mlp = f1_score(y_test, pred_mlp_test, average='micro')
print('Test Macro f1 score:', microF1_test_mlp)

In [None]:
pred_mlp_blind = mlp.predict(X_blind_stnd)
print(classification_report(y_blind, pred_mlp_blind))
cm_mlp = confusion_matrix(y_blind, pred_mlp_blind)
plot_confusion_matrix(cm_mlp, list_blind_full, normalize=True)

# 10. Model performance evaluation

I will use the diagnosis of confusion matrix from train data set to evaluate the model performance. The diagnosis of confusion matrix points how much percentage of the stone is correctly predicted.

In [None]:
### To create a data frame recording the correct prediction (normalized) of 
### facies for each machine learning algorithm

mod_test_list = ['SVM', 'GB', 'RF','KNN','CNN','CNN-RBF','MLP']
cm_test_list = [cm_test_SVM, cm_test_GB, cm_test_RF, cm_test_knn,cm_test_cnn, cm_test_rbf, cm_test_MLP]
face_test_list = training_features
pred_test_df = pd.DataFrame(index=training_features, columns=mod_test_list)

for mod in mod_test_list:
    
    col_index = int(mod_test_list.index(mod))
    cm = cm_test_list[col_index]
    
    for face in face_test_list:
        row_index = training_features.index(face)
        #print(face, row_index, col_index)
        pred_test_df.iloc[row_index, col_index] = cm[row_index][row_index]/sum(cm[row_index])
        

### add the accuracy factor
df_1 = pd.DataFrame([[microF1_test_SVM, 
                      microF1_test_gb, 
                      microF1_test_rf, 
                      microF1_test_knn, 
                      microF1_test_cnn, 
                      microF1_test_rbf, 
                      microF1_test_mlp]], index=['Accuracy'], columns=mod_test_list)    


pred_test_conc = pd.concat([pred_test_df,df_1])
pred_test_conc

### To create a data frame recording the correct prediction (normalized) of 
### facies for each machine learning algorithm

#mod_test_list = ['SVM', 'GB', 'RF','KNN']
#cm_test_list = [cm_test_SVM, cm_test_GB, cm_test_RF, cm_test_knn]
#face_test_list = ['Sandstone','Sandstone/Shale','Shale','Marl','Dolomite','Limestone','Chalk']
#pred_test_df = pd.DataFrame(index=target_list, columns=mod_test_list)

#for mod in mod_test_list:
#    
#    col_index = int(mod_test_list.index(mod))
#    cm = cm_test_list[col_index]
#    
#    for face in face_test_list:
#        row_index = target_list.index(face)
#        #print(face, row_index, col_index)
#        pred_test_df.iloc[row_index, col_index] = cm[row_index][row_index]/sum(cm[row_index])
        

### add the accuracy factor
#df_1 = pd.DataFrame([[0.94, 0.93, 0.94, 0.93]], index=['Accuracy'], columns=mod_test_list)    
#pred_test_df = pred_test_df.append(df_1)

#print(pred_test_df.head(10))

In [None]:
X_ind = np.arange(pred_test_df.shape[0])
(pred_df_index_list) = training_features
aux=0.1
plt.figure(figsize=(10,5))
plt.bar(X_ind, pred_test_df['SVM'], color='k', width=aux)
plt.bar(X_ind+0.1, pred_test_df['GB'], color='yellow', width=aux)
plt.bar(X_ind+0.2, pred_test_df['RF'], color='darkgreen', width=aux)
plt.bar(X_ind+0.3, pred_test_df['KNN'], color='orange', width=aux)
plt.bar(X_ind+0.4, pred_test_df['CNN'], color='blue', width=aux)
plt.bar(X_ind+0.5, pred_test_df['CNN-RBF'], color='red', width=aux)
plt.bar(X_ind+0.6, pred_test_df['MLP'], color='lime', width=aux)
plt.xticks(X_ind, pred_df_index_list)
plt.xlabel('Facies')
plt.ylabel('Correct predictions')
plt.legend(labels=mod_test_list)
plt.savefig('canada_performance_evaluation_test_data.pdf',bbox_inches='tight')
plt.show()

# 11. Calssifier evluation using blind test well

I will use the same method shown in item4 for evaluation.

In [None]:
### To create a data frame recording the correct prediction (normalized) of facies of blind test well for each machine learning algorithm

blind_class  = ['Sandstone',
                  'Sandstone/Shale',
                  'Shale',
                  'Marl',
                  'Limestone',
                  'Chalk',
                  'Anhydrite',
                  'Tuff']

mod_list = ['SVM', 'GB', 'RF','KNN','CNN','CNN-RBF','MLP']
cm_list = [cm_SVM, cm_GB, cm_RF, cm_knn, cm_cnn, cm_rbf, cm_mlp]
pred_df = pd.DataFrame(index=blind_class, columns=mod_list)

for mod in mod_list:
    col_index = int(mod_list.index(mod))
    cm = cm_list[col_index]
    
    for face in blind_class:
        
        row_index = blind_class.index(face)
        #print(face, row_index, col_index)
        pred_df.iloc[row_index, col_index] = cm[row_index][row_index]/sum(cm[row_index])



In [None]:
blind

In [None]:
preds_knn_blind

In [None]:
X_ind = np.arange(pred_df.shape[0])

aux=0.1
plt.figure(figsize=(10,5))
plt.bar(X_ind, pred_df['SVM'], color='k', width=aux)
plt.bar(X_ind+0.1, pred_df['GB'], color='yellow', width=aux)
plt.bar(X_ind+0.2, pred_df['RF'], color='darkgreen', width=aux)
plt.bar(X_ind+0.3, pred_df['KNN'], color='orange', width=aux)
plt.bar(X_ind+0.4, pred_df['CNN'], color='blue', width=aux)
plt.bar(X_ind+0.5, pred_df['CNN-RBF'], color='red', width=aux)
plt.bar(X_ind+0.6, pred_df['MLP'], color='lime', width=aux)
plt.xticks(X_ind, blind_class)
plt.xlabel('Facies')
plt.ylabel('Correct predictions')
plt.legend(labels=mod_list)
plt.savefig('canada_performance_evaluation_blind_data.pdf',bbox_inches='tight')
plt.show()

# 12. Plot the predicted facies for comparison**

In [None]:
blind = blind.copy()
blind['SVM'] = pred_blind
blind['GB'] = pred_GB_blind
blind['RF'] = preds_RF_blind
blind['KNN'] = preds_knn_blind
blind['CNN'] = pred_blind_cnn
blind['RBF'] = pred_blind_rbf
blind['MLP'] = pred_mlp_blind

blind.head()

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable

facies_colors = ['bisque',
                 'darkorange',
                 'darkgoldenrod',
                 'peachpuff',
                 'beige',
                 'honeydew',
                 'white','red']

blind_class  = ['Ss',
                  'Ss/Sh',
                  'Sh',
                  'M',
                  'L',
                  'Ch',
                  'A',
                  'T']

def compare_facies_plot(logs, compare1, compare2, compare3, compare4, compare5, compare6, compare7, facies_colors):
      #make sure logs are sorted by depth
    logs = logs.sort_values(by='DEPTH_MD')
    cmap_facies = colors.ListedColormap(
            facies_colors[0:len(facies_colors)], 'indexed')
    num_colors = 8
    ztop=logs.DEPTH_MD.min(); zbot=logs.DEPTH_MD.max()
    
    cluster0 = np.repeat(np.expand_dims(logs['LITH_SI'].values,1), 100, 1)
    cluster1 = np.repeat(np.expand_dims(logs[compare1].values,1), 100, 1)
    cluster2 = np.repeat(np.expand_dims(logs[compare2].values,1), 100, 1)
    cluster3 = np.repeat(np.expand_dims(logs[compare3].values,1), 100, 1)
    cluster4 = np.repeat(np.expand_dims(logs[compare4].values,1), 100, 1)
    cluster5 = np.repeat(np.expand_dims(logs[compare5].values,1), 100, 1)
    cluster6 = np.repeat(np.expand_dims(logs[compare6].values,1), 100, 1)
    cluster7 = np.repeat(np.expand_dims(logs[compare7].values,1), 100, 1)
    
    
    f, ax = plt.subplots(nrows=1, ncols=15, figsize=(18, 15))
    ax[0].plot(logs.RMED, logs.DEPTH_MD, '-g')
    ax[1].plot(logs.RDEP, logs.DEPTH_MD, '-')
    ax[2].plot(logs.RHOB, logs.DEPTH_MD, '-', color='0.5')
    ax[3].plot(logs.GR, logs.DEPTH_MD, '-', color='r')
    ax[4].plot(logs.NPHI, logs.DEPTH_MD, '-', color='black')
    ax[5].plot(logs.DTC, logs.DEPTH_MD, '-', color='black')
    ax[6].plot(logs.PEF, logs.DEPTH_MD, '-', color='black')
    im0 = ax[7].imshow(cluster0, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im1 = ax[8].imshow(cluster1, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im2 = ax[9].imshow(cluster2, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im3 = ax[10].imshow(cluster3, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[11].imshow(cluster4, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[12].imshow(cluster5, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[13].imshow(cluster6, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[14].imshow(cluster7, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    
            
    divider = make_axes_locatable(ax[14])
    cax = divider.append_axes("right", size="20%", pad=0.05)
    cbar=plt.colorbar(im4, cax=cax)
    cbar.set_label((30*' ').join(blind_class))
    cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')
    
    for i in range(len(ax)-8):
        ax[i].set_ylim(ztop,zbot)
        ax[i].invert_yaxis()
        ax[i].grid()
        ax[i].locator_params(axis='x', nbins=3)
    
    ax[0].set_xlabel("RMED")
    ax[0].set_xlim(logs.RMED.min(),logs.RMED.max())
    
    ax[1].set_xlabel("RDEP")
    ax[1].set_xlim(logs.RDEP.min(),logs.RDEP.max())
    
    ax[2].set_xlabel("RHOB")
    ax[2].set_xlim(logs.RHOB.min(),logs.RHOB.max())
    
    ax[3].set_xlabel("GR")
    ax[3].set_xlim(logs.GR.min(),logs.GR.max())
    
    ax[4].set_xlabel("NPHI")
    ax[4].set_xlim(logs.NPHI.min(),logs.NPHI.max())
    
    ax[5].set_xlabel("DTC")
    ax[5].set_xlim(logs.DTC.min(),logs.DTC.max())
    
    ax[6].set_xlabel("PEF")
    ax[6].set_xlim(logs.PEF.min(),logs.PEF.max())
    
    ax[7].set_xlabel('Facies')
    ax[8].set_xlabel(compare1)
    ax[9].set_xlabel(compare2)
    ax[10].set_xlabel(compare3)
    ax[11].set_xlabel(compare4)
    ax[12].set_xlabel(compare5)
    ax[13].set_xlabel(compare6)
    ax[14].set_xlabel(compare7)
    
    ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])
    ax[4].set_yticklabels([]); ax[5].set_yticklabels([]); ax[6].set_yticklabels([])
    ax[7].set_yticklabels([]); ax[8].set_yticklabels([]); ax[9].set_yticklabels([])
    ax[10].set_yticklabels([]); ax[11].set_yticklabels([]); ax[12].set_yticklabels([])
    ax[13].set_yticklabels([]); ax[14].set_yticklabels([])
    
    
    ax[5].set_xticklabels([])
    ax[6].set_xticklabels([])
    ax[7].set_xticklabels([])
    ax[8].set_xticklabels([])
    ax[9].set_xticklabels([])
    ax[10].set_xticklabels([])
    ax[11].set_xticklabels([])
    ax[12].set_xticklabels([])
    ax[13].set_xticklabels([])
    ax[14].set_xticklabels([])
    f.suptitle('Well: %s'%logs.iloc[0]['WELL'], fontsize=14,y=0.94)

In [None]:
compare_facies_plot(blind, 'SVM', 'GB', 'RF', 'KNN','CNN','RBF','MLP', facies_colors)

In [None]:
list0 = []
for i in range(0,100,1):
    X_train0, X_test0, y_train0, y_test0 = train_test_split(X, y, test_size=0.05)
    clf0 = ensemble.RandomForestClassifier(n_estimators=2000)
    clf0.fit(X_train0, y_train0)
    list0.append(clf0.score(X_test0, y_test0))
    print(i + 1, clf0.score(X_test0, y_test0))