# 1. Data loading

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import numpy as np
from DataView import DataView

filename = 'CanadaData/wells_canada.csv' # nome do dado de entrada
df = pd.read_csv(filename) # leitura do dado de entrada

filenameLoc = 'CanadaData//MannvilleWells_LatLong.csv' # nome do dado de entrada
dfLoc = pd.read_csv(filenameLoc) # leitura do dado de entrada

In [None]:
dataView = DataView(dfLoc)
dataView.rename_features('SitID')
dataFrame = DataView(df)
dataFrame.rename_features('WELL')

In [None]:
wells_location = dataView.mark_well_postion_on_map(71)

In [None]:
dataSelectedView = DataView(wells_location)
dataSelectedView.well_loc(zs=9)

## 1.2 Blind and training well location

In [None]:
blind_well_name = ['well-125562','well-124862','well-123528','well-122293','well-121444','well-118629',
                'well-154815','well-116458','well-114847','well-113074']
blind_location = dataView.select_data(blind_well_name, sitID = 'SitID')

In [None]:
blind_location

In [None]:
TBLocation = pd.concat([wells_location,blind_location])

In [None]:
dataTBLocationView = DataView(TBLocation)
dataTBLocationView.well_loc(zs=7)

## 1.3 Selecting the training data

In [None]:
wells_location

In [None]:
# training_data = []

# for string in wells_location['SitID']:
        
#     #print(string)
#     training_data.append(df[df['WELL'] == string])

# training_data = pd.concat(training_data)
# display(training_data)
training_data = dataFrame.select_data(wells_location['SitID'], sitID='WELL')

In [None]:
training_data

In [None]:
# List the number of wells
soma=0
for well in training_data['WELL'].unique():
    soma += 1

print(soma, 'wells selected.')

## 1.4 Selecting the blind data

In [None]:
blind_data = []

for string in blind_well_name:
    
    #print(string)
    blind_data.append(df[df['WELL'] == string])

blind_data = pd.concat(blind_data)
display(blind_data)

In [None]:
# List the number of wells

soma = 0
for well in blind_data['WELL'].unique():
    soma = soma + 1

print(soma, 'blind wells selected.')

In [None]:
# indata to use 
df = training_data[['WELL', 'DEPTH', 'FACIES', 'SW', 'VSH', 'PHI', 'RW', 'W_TAR']].copy()

In [None]:
# List of headers
plot_cols = ['WELL', 'DEPTH', 'FACIES', 'SW', 'VSH', 'PHI', 'RW', 'W_TAR']

In [None]:
df = df[plot_cols].copy()

In [None]:
data_nan = df.copy()

In [None]:
for num, col in enumerate(data_nan.columns[2:]):
    data_nan[col] = data_nan[col].notnull() * (num + 1)
    data_nan[col].replace(0, num, inplace=True)
    print(col, num) #Print out the col name and number to verify it works

In [None]:
data_nan.describe()

# 2. Plotting the Data with and without NaN

In [None]:
grouped = data_nan.groupby('WELL')

In [None]:
#Setup the labels we want to display on the x-axis

labels = ['$S_{Water}$', '$V_{Shale}$', '$\phi$', '$R_{Water}$', '$W_{tar}$']

#Setup the figure and the subplots
fig, axs = plt.subplots(3, 3, figsize=(20,10))

#Loop through each well and column in the grouped dataframe
for (name, df), ax in zip(grouped, axs.flat):
    #ax.set_xlim(0,5) # 6 features
    ax.set_xlim(0,5) # 6 features
    #Setup the depth range
    #ax.set_ylim(700, 0)
    
    #Create multiple fill betweens for each curve# This is between
    # the number representing null values and the number representing
    # actual values
    
    #ax.fill_betweenx(df.DEPTH, 0, df.W_TAR, facecolor='grey')
    ax.fill_betweenx(df.DEPTH, 0, df.SW, facecolor='lightgrey')
    ax.fill_betweenx(df.DEPTH, 1, df.VSH, facecolor='mediumseagreen')
    ax.fill_betweenx(df.DEPTH, 2, df.PHI, facecolor='lightblue')
    ax.fill_betweenx(df.DEPTH, 3, df.RW, facecolor='lightcoral')
    ax.fill_betweenx(df.DEPTH, 4, df.W_TAR, facecolor='red')

    
    #Setup the grid, axis labels and ticks
    ax.grid(axis='x', alpha=0.5, color='black')
    ax.set_ylabel('Depth (m)', fontsize=12, fontweight='bold')
    #ax.set_yticklabels(["400","","" ,r"550","","", "700"],size=14)
    
    
    #Position vertical lines at the boundaries between the bars
    ax.set_xticks([1,2,3,4,5], minor=False)
    
    #Position the curve names in the centre of each column
    ax.set_xticks([0.5, 1.5 ,2.5 ,3.5, 4.5], minor=True)
    
    #Setup the x-axis tick labels
    ax.set_xticklabels(labels, fontsize=12, minor=True, verticalalignment='bottom')
    ax.set_xticklabels('', minor=False)
    ax.tick_params(axis='x', which='minor', pad=-5)
    
    #Assign the well name as the title to each subplot
    ax.set_title(name, fontsize=16, fontweight='bold')

plt.savefig('canada_missingdata.pdf',bbox_inches='tight')
plt.tight_layout()
plt.subplots_adjust(hspace=0.15, wspace=0.25)
plt.show()


# 4. Column Remapping / Renaming

In [None]:
lithology_numbers = {0: 'Undefined',
                     1: 'Sand', 
                     2: 'ShalySand',
                     3: 'SandyShale', 
                     4: 'Shale',
                     5: 'Coal', 
                     6: 'CementedSand'}

In [None]:
training_data['LITH'] = training_data['FACIES'].map(lithology_numbers)
blind_data['LITH'] = blind_data['FACIES'].map(lithology_numbers)

In [None]:
training_data.rename(columns={'FACIES':'LITH_SI'}, inplace=True)
blind_data.rename(columns={'FACIES':'LITH_SI'}, inplace=True)

# 5. View the number of samples of the whole data

In [None]:
#plot the count of Facies
training_features = ['Sand','ShSa','SaSh','Sh','Co','CemSa']

training_data['LITH_SI'].value_counts().sort_index().plot(kind='bar')
print(training_data['LITH_SI'].value_counts().sort_index())
X_ind = np.arange(0,6,1)
plt.title('Number of training data samples')
plt.xticks(X_ind,training_features)
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
plt.savefig('canada_number_training_samples.pdf',bbox_inches='tight')
plt.show()

In [None]:
#plot the count of blind Facies

blind_features = ['Undef','Sand','ShSand','SaSh','Sh','Co','CemSand']

blind_data['LITH_SI'].value_counts().sort_index().plot(kind='bar')
print(blind_data['LITH_SI'].value_counts().sort_index())
X_ind = np.arange(0,7,1)
plt.title('Number of blind data samples')
plt.xticks(X_ind,blind_features)
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
plt.savefig('canada_number_blind_samples.pdf',bbox_inches='tight')
plt.show()

# 6. Crossplot RHOB and NPHI (whole training data)

In [None]:
import seaborn as sns

g = sns.FacetGrid(training_data, col='LITH', col_wrap=3)
g.map(sns.scatterplot, 'PHI', 'VSH', alpha=0.5)
#g.set(xlim=(-0.15, 1))
#g.set(ylim=(3, 1))
plt.savefig('canada_cross_plot.pdf',bbox_inches='tight')
plt.show()

# 7. sorting out the blind test well

In [None]:
import seaborn as sns

g = sns.FacetGrid(blind_data, col='LITH', col_wrap=3)
g.map(sns.scatterplot, 'PHI', 'VSH', alpha=0.5)
#g.set(xlim=(-0.15, 1))
#g.set(ylim=(3, 1))
plt.show()

In [None]:
col_list = ['LITH_SI', 'SW', 'VSH', 'PHI', 'RW', 'W_TAR']
plt.figure(figsize=(15,10))
i=0
for col in col_list:
    i+=1
    plt.subplot(3,2,i)
    plt.hist(training_data[col])
    plt.title(col)

plt.savefig('canada_features_chart_values.pdf',bbox_inches='tight')
plt.show()

# 8. Prepare data for modeling and blind test well


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import cross_val_score

from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.metrics import classification_report

In [None]:
features = ['SW', 'VSH', 'PHI', 'RW', 'W_TAR']

y = training_data['LITH_SI']
X = training_data[features]

In [None]:
X.corr()

In [None]:
# standarization of data for SVM
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [None]:
blind_data_sep = []

for string in blind_well_name:
    
    #print(string)
    blind_data_sep.append(blind_data[blind_data['WELL'] == string])

In [None]:
# blind well 6 (example)
#blind_data_sep[1]['LITH_SI']

y_blind = []
X_blind = []
X_blind_stnd = []


for i in range(0,len(blind_data_sep)) :
    
    y_blind.append(blind_data_sep[i]['LITH_SI']) #seleciona um poço apenas do dado
    X_blind.append(blind_data_sep[i][features])
    X_blind_stnd.append(sc.transform(blind_data_sep[i][features]))
    

#
#X_blind_stnd = scaler.transform(X_blind)

# 9. Parameter optimization and classifier training

Modeling algorithms:
1. SVM
2. Gradient boosting
3. Random forest
4. KNN
5. CNN
6. CNN(Kernel RBF)
7. MLP

In [None]:
from sklearn.svm import SVC # To use Support Vector Machine
from sklearn import ensemble # To use Gradient Boosting and Random forest
from sklearn.neighbors import KNeighborsClassifier # To use KNN


### 9.1 SVM: Parameter optimiztion

In [None]:
# c_list = [0.01, 1, 5, 10, 20, 50, 100, 1000, 5000, 10000]
# gamma_list = [0.0001, 0.001, 0.01, 0.1, 1, 10]
# #gamma_list = [0.0001]
# i = 0
# plt.figure(figsize=(15,10))

# for gamma_value in gamma_list:
#     i = i + 1
#     scores = list()
#     score_stds = list()
#     score_tests = list()
#     print('interations gamma_list =',i)
#     j = 0
#     for c_value in c_list:
        
#         j = j + 1
#         print('interations c_list =',j)
        
#         clf_cv = SVC(C=c_value, gamma=gamma_value)
        
#         cv_score = cross_val_score(clf_cv, X_train, y_train)
        
#         scores.append(np.mean(cv_score))
#         score_stds.append(np.std(cv_score))
#         clf_cv.fit(X_train, y_train)
        
#         score_test = clf_cv.score(X_test, y_test)
#         score_tests.append(score_test)
#     #plt.plot(x, y1, "-b", label="sine")
#     plt.subplot(2,3,i)
#     plt.semilogx(c_list, scores, '-b', label='CV-Train')
#     plt.semilogx(c_list, score_tests, '-r', label='CV-Test')
#     #plt.semilogx(c_list, np.array(scores)+np.array(score_stds), 'b--')
#     #plt.semilogx(c_list, np.array(scores)+-np.array(score_stds), 'b--')
#     plt.legend()
#     plt.title('$\gamma$ = {}'.format(gamma_value))
#     plt.xlabel('C values')
#     plt.ylabel('Accuracy')
#     #plt.ylim(0,1.1)
# plt.savefig('canada_optimus_values_svm.pdf',bbox_inches='tight')
# plt.show()

SVM classifier training

In [None]:
clf = SVC(C=100, gamma=1)
clf.fit(X_train, y_train)

In [None]:
def plot_confusion_matrix(cm,
                          classes,
                          normalize,
                          title='Confusion matrix',
                          cmap=plt.cm.Greys):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    import itertools
    

    if normalize:
        
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center", verticalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
pred_test = clf.predict(X_test)
print(classification_report(y_test, pred_test, target_names=training_features))
cm_test_SVM = confusion_matrix(y_test, pred_test)
plot_confusion_matrix(cm_test_SVM, training_features, normalize=True)

In [None]:
microF1_test_SVM = f1_score(y_test, pred_test, average='micro')
print('Test Macro f1 score:', microF1_test_SVM)

In [None]:
blind_features = {0: 'Undef',
                  1: 'Sand',
                  2: 'ShSand',
                  3: 'SaSh',
                  4: 'Sh',
                  5: 'Co',
                  6: 'CemSand'}

In [None]:
#plot the count of blind Facies

# blind_features = ['Undef','Sand','ShSand','SaSh','Sh','Co','CemSand']
#blind_features = ['Sand','ShSand','SaSh','Sh','Co']

blind_class =[]
for j in range(1,11):

    #y_blind[j-1].value_counts().sort_index().plot(kind='bar')
    #print(y_blind[j].value_counts().sort_index())
    num_lith = y_blind[j-1].value_counts().sort_index()
    values_index = num_lith.index

    X_ind = np.arange(0,len(y_blind[j-1].value_counts()),1)

    names = []
    for i in values_index:
        names.append(blind_features[i])
    #print(X_ind)    
    blind_class.append(names)


    #plt.savefig('canada_number_blind_samples.pdf',bbox_inches='tight')


In [None]:
blind_class

In [None]:
k = 9

pred_blind = clf.predict(X_blind_stnd[k])
print(classification_report(y_blind[k], pred_blind, target_names=blind_class[k]))
cm_SVM = confusion_matrix(y_blind[k], pred_blind)
plot_confusion_matrix(cm_SVM, blind_class[k], normalize=True)

In [None]:
for i_well in range(0,10):
    
    aux_pred_svm = clf.predict(X_blind_stnd[i_well])
    microF1_blind_SVM = f1_score(y_blind[i_well], aux_pred_svm, average='micro')
    print('Blind micro f1 score:', microF1_blind_SVM)

### 9.2 Gradient boosting (GB): Parameter optimiztion

In [None]:
# why do not fit and transform  GRADIENT BOOST
X1_train, X1_test, y1_train, y1_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# loss_list = ['deviance']
# max_depths = [1,2,3]

# i = 0
# plt.figure(figsize=(15,5))


# for los in loss_list:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()

#     for depth in max_depths:
        
#         params = {'loss': los, ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': 0.1, ##  learning_ratefloat, default=0.1
#                   'n_estimators': 500, ##  number of iterations, int, default=100
#                   'max_depth': depth, ##  int, default=3
#                   'subsample': 1, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                  }
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
        
#         # Train data
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
        
#         # Test data
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
    
#     plt.subplot(1,2,i)
#     plt.plot(max_depths, scores, 'o-', color='b', label='Train')
#     plt.plot(max_depths, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('Loss = {}'.format(los))
#     plt.xlabel('Max depth')
#     plt.ylabel('Accuracy')
    
    
#     #ax.semilogx(C_range, cv_errors, label='CV error')
#     #ax.semilogx(C_range, train_errors, label='Train error')
    
#     plt.ylim(0,1.1)
# plt.savefig('canada_parameter_gb_max_depth.pdf',bbox_inches='tight')
# plt.show()

How you could see, there is convergen with 4 deepths

In [None]:
# learning_rates = [0.001, 0.01, 0.1, 0.2, 0.4]
# n_estimators_list = [100, 500, 1000]

# i = 0
# plt.figure(figsize=(15,5))

# for est in n_estimators_list:
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for rate in learning_rates:
#         params = {'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': rate, ##  learning_ratefloat, default=0.1
#                   'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': 1, ##  int, default=3
#                   'subsample': 1, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                   }
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
        
#     plt.subplot(1,3,i)
#     plt.semilogx(learning_rates, scores, 'o-', color='b', label='Train')
#     plt.semilogx(learning_rates, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('N estimators = {}'.format(est))
#     plt.xlabel('learning rate')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)
# plt.savefig('canada_parameter_gb_n_estimators.pdf',bbox_inches='tight')
# plt.show()

N_estimators =100 and learning rate = 0.01

In [None]:
# subsamples = [0.2, 0.6, 1]
# n_estimators_list = [100, 500, 1000]

# i = 0
# plt.figure(figsize=(15,5))

# for est in n_estimators_list:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for sub in subsamples:
        
#         params = {'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
#                   'learning_rate': 0.01, ##  learning_ratefloat, default=0.1
#                   'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': 1, ##  int, default=3
#                   'subsample': sub, ## float, default=1.0
#                   'min_samples_split': 2 ## int or float, default=2
#                   }
        
#         clf_cv = ensemble.GradientBoostingClassifier(**params)
#         clf_cv.fit(X_train, y_train)
#         cv_score = clf_cv.score(X_train, y_train)
        
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X_test, y_test)
#         score_tests.append(score_test)
        
        
#     plt.subplot(1,3,i)
#     plt.plot(subsamples, scores, 'o-', color='b', label='Train')
#     plt.plot(subsamples, score_tests, 'o-', color='r', label='Test')
#     plt.legend()
#     plt.title('n_estimators = {}'.format(est))
#     plt.xlabel('sub samples')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)
# plt.savefig('canada_parameter_gb_sub_samples.pdf',bbox_inches='tight')
# plt.show()

Based on the accuracy plot, max_depth=1, learning_rate=0.001, n_estimators=500, subsample=0.2

Gradient Boosting classifier training

In [None]:
params = {'loss': 'deviance', ##  loss{‘deviance’, ‘exponential’}, default=’deviance’
          'learning_rate': 0.01, ##  learning_ratefloat, default=0.1
          'n_estimators': 100, ##  number of iterations, int, default=100
          'max_depth': 1, ##  int, default=3
          'subsample': 0.2, ## float, default=1.0
          'min_samples_split': 2 ## int or float, default=2
          }
clf_GB = ensemble.GradientBoostingClassifier(**params)
print(cross_val_score(clf_GB, X1_train, y1_train, cv=5))

In [None]:
clf_GB = ensemble.GradientBoostingClassifier(**params)
clf_GB.fit(X1_train, y1_train)
preds_GB = clf_GB.predict(X1_test)

print(classification_report(y1_test, preds_GB))
cm_test_GB = confusion_matrix(y1_test, preds_GB)
plot_confusion_matrix(cm_test_GB, training_features, normalize=True)

In [None]:
microF1_test_gb = f1_score(y1_test, preds_GB, average='micro')
print('Test Micro f1 score:', microF1_test_gb)

In [None]:
pred_GB_blind = clf_GB.predict(X_blind[k])
print(classification_report(y_blind[k], pred_GB_blind, target_names = blind_class[k]))
cm_GB = confusion_matrix(y_blind[k], pred_GB_blind)
plot_confusion_matrix(cm_GB, blind_class[k], normalize=True)

In [None]:
for i_well in range(0,10):
    

    aux_pred_GB_blinda = clf_GB.predict(X_blind[i_well])

    microF1_blind_GB = f1_score(y_blind[i_well], aux_pred_GB_blinda, average='micro')
    
    print('Blind micro f1 score:', microF1_blind_GB)

### 9.3. Random forest (RF) parameter optimization

In [None]:
# max_depths = [2, 3, 4]
# n_estimators_list = [100, 500, 1000, 2000, 5000]

# i = 0
# plt.figure(figsize=(15,5))

# for depth in max_depths:
    
#     i = i + 1
#     scores = list()
#     score_tests = list()
    
#     for est in n_estimators_list:
#         params = {'n_estimators': est, ##  number of iterations, int, default=100
#                   'max_depth': depth, ##  int, default=None
#                   'n_jobs': -1 #to speed up computations by taking advantage of parallel processing.
                  
#                   }
#         clf_cv = ensemble.RandomForestClassifier(**params)
#         clf_cv.fit(X1_train, y1_train)
#         cv_score = clf_cv.score(X1_train, y1_train)
#         scores.append(np.mean(cv_score))
#         score_test = clf_cv.score(X1_test, y1_test)
#         score_tests.append(score_test)
        
#     plt.subplot(1,4,i)
#     plt.plot(n_estimators_list, scores, color='b', label='Train')
#     plt.plot(n_estimators_list, score_tests, color='r', label='Test')
#     plt.legend()
#     plt.title('max depth = {}'.format(depth))
#     plt.xlabel('n_estimators')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)

# scores = list()
# score_tests = list()

# for est in n_estimators_list:
    
#     clf_cv = ensemble.RandomForestClassifier(n_estimators=est)
#     clf_cv.fit(X1_train, y1_train)
#     cv_score = clf_cv.score(X1_train, y1_train)
#     scores.append(np.mean(cv_score))
#     score_test = clf_cv.score(X1_test, y1_test)
#     score_tests.append(score_test)
    
# plt.subplot(1,4,4)
# plt.plot(n_estimators_list, scores, color='b', label='Train')
# plt.plot(n_estimators_list, score_tests, color='r', label='Test')
# plt.legend()
# plt.title('max depth = {}'.format('None'))
# plt.xlabel('n_estimators')
# plt.ylabel('Accuracy')
# plt.ylim(0,1.1)

# plt.savefig('canada_parameter_rf_max_depth.pdf',bbox_inches='tight')
# plt.show()

Max_depth = 3, and n_estimator = 100 gives best accuracy.

Random forest classifier training

In [None]:
clf_RF = ensemble.RandomForestClassifier(max_depth=3, n_estimators=100, criterion='entropy')
print(cross_val_score(clf_RF, X1_train, y1_train, cv=5))
clf_RF.fit(X1_train, y1_train)
preds_RF = clf_RF.predict(X1_test)
print(classification_report(y1_test, preds_RF))
cm_test_RF = confusion_matrix(y1_test, preds_RF)
plot_confusion_matrix(cm_test_RF, training_features, normalize=True)

In [None]:
microF1_test_rf = f1_score(y1_test, preds_RF, average='micro')
print('Test Macro f1 score:', microF1_test_rf)

Random forest blind predction

In [None]:
pred_RF_blind = clf_RF.predict(X_blind[k])
print(classification_report(y_blind[k], pred_RF_blind, target_names = blind_class[k]))
cm_RF = confusion_matrix(y_blind[k], pred_RF_blind)
plot_confusion_matrix(cm_RF, blind_class[k], normalize=True)

In [None]:
for i_well in range(0,10):
    
    aux_pred_RF_blind = clf_RF.predict(X_blind[i_well])
    microF1_blind_rf = f1_score(y_blind[i_well], aux_pred_RF_blind, average='micro')
    print('Test Micro f1 score:', microF1_blind_rf)

### 9.4. KNN Parameter optimzation

In [None]:
# neighbor_list = [1, 3, 5, 7, 10]
# weight_list = ['uniform', 'distance']
# i=0

# for weight in weight_list:
    
    
#     scores = list()
#     score_tests = list()
#     i = i + 1
    
#     for neighbor in neighbor_list:
#         clf_cv = KNeighborsClassifier(n_neighbors=neighbor, weights=weight)
#         clf_cv.fit(X1_train, y1_train)
#         scores.append(clf_cv.score(X1_train, y1_train))
#         score_tests.append(clf_cv.score(X1_test, y1_test))
#         print(scores)
        
#     plt.subplot(1,2,i)
#     plt.plot(neighbor_list, scores, '-b', label = 'Train')
#     plt.plot(neighbor_list, score_tests, '-r', label = 'Test')
#     plt.title('Weight = {}'.format(weight))
#     plt.xlabel('K')
#     plt.ylabel('Accuracy')
#     plt.ylim(0,1.1)
# plt.savefig('canada_parameter_knn.pdf',bbox_inches='tight')    
# plt.show()

Using weight has a better KNN modeling score.

KNN classifer training

In [None]:
clf_knn = KNeighborsClassifier(n_neighbors=8, weights='uniform')
print(cross_val_score(clf_knn, X1_train, y1_train, cv=5))

In [None]:
clf_knn = KNeighborsClassifier(n_neighbors=8, weights='uniform')
clf_knn.fit(X1_train, y1_train)
preds_knn = clf_knn.predict(X1_test)
print(classification_report(y1_test, preds_knn))
cm_test_knn = confusion_matrix(y1_test, preds_knn)
plot_confusion_matrix(cm_test_knn, training_features, normalize=True)

In [None]:
microF1_test_knn = f1_score(y1_test, preds_knn, average='micro')
print('Test Macro f1 score:', microF1_test_knn)

KNN blind well prediction

In [None]:
preds_knn_blind = clf_knn.predict(X_blind[k])
print(classification_report(y_blind[k], preds_knn_blind))
cm_knn = confusion_matrix(y_blind[k], preds_knn_blind)
plot_confusion_matrix(cm_knn, blind_class[k], normalize=True)

In [None]:
for i_well in range(0,10):
    
    aux_preds_knn_blind = clf_knn.predict(X_blind[i_well])
    microF1_blind_knn = f1_score(y_blind[i_well], aux_preds_knn_blind, average='micro')
    print('Test Macro f1 score:', microF1_blind_knn)

### 9.5 CNN

In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd


import folium
import numpy as np

print(tf.__version__)

In [None]:
import matplotlib.pyplot as plt
def plot_history(history):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [1000$]')
    plt.plot(history.epoch, np.array(history.history['mae']), label='Train')
    plt.plot(history.epoch, np.array(history.history['val_mae']),label = 'Val')
    plt.legend()
    plt.ylim([0,max(history.history['val_mae'])])

def plot_prediction(test_labels, test_predictions):
    plt.figure()
    plt.scatter(test_labels, test_predictions)
    plt.xlabel('True Values [1000$]')
    plt.ylabel('Predictions [1000$]')
    plt.axis('equal')
    plt.xlim(plt.xlim())
    plt.ylim(plt.ylim())
    _ = plt.plot([-100, 100],[-100,100])

    plt.figure()
    error = test_predictions - test_labels
    plt.hist(error, bins = 50)
    plt.xlabel("Prediction Error [1000$]")
    _ = plt.ylabel("Count")

#### Create the Conv1D model

Let's build an Conv1D model. Here, we'll use a `Sequential` model with 3 Conv1D layers, one MaxPooling1D layer, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, `build_model` as we did above.

#### Reshape Data sets
As you might remember, Conv1D layer expects input shape in 3D as

  `[batch_size, time_steps, input_dimension]`

However, current data is in the shape of

`[batch_size, features]`

See below:

In [None]:
print(X_train.shape)
print(X_train[1].shape)
print(X_train[0])

In [None]:
sample_size = X_train.shape[0] # number of samples in train set
time_steps  = X_train.shape[1] # number of features in train set
input_dimension = 1               # each feature is represented by 1 number

train_data_reshaped = X_train.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])

In [None]:
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
test_data_reshaped.shape

In [None]:
def build_conv1D_model():

    n_timesteps = train_data_reshaped.shape[1] #5
    n_features  = train_data_reshaped.shape[2] #1 
       
    
    model = keras.Sequential(name="model_conv1D")
    
    # 1st layer
    model.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_1"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_2"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_3"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    model.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_4"))
    model.add(keras.layers.MaxPooling1D(pool_size=1))
    #model.add(keras.layers.MaxPooling1D(pool_size=1, name="MaxPooling1D_fisrt"))
    
    # Dense
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(50, activation='relu'))
    model.add(keras.layers.Dense(7, activation='softmax'))


    optimizer_aux = tf.keras.optimizers.Adam()
    model.compile(loss = "sparse_categorical_crossentropy", optimizer = optimizer_aux ,metrics = ['accuracy'])
    
    return model

model_conv1D = build_conv1D_model()
model_conv1D.summary()


In [None]:
earlystoping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                patience=5,
                                                verbose=1,
                                                mode='auto',
                                                restore_best_weights=True)
checkpoint_filepath = 'weights.{epoch:02d}-{val_loss:.2f}.h5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                      save_weights_only=True,
                                                      monitor='val_accuracy',
                                                      mode='max',
                                                      verbose=1,
                                                      save_best_only=True)

In [None]:
history_cnn = model_conv1D.fit(train_data_reshaped, y_train, validation_data = (test_data_reshaped,y_test),
                           batch_size = 512, 
                           callbacks = [model_checkpoint,earlystoping],
                           epochs = 1000,
                           verbose=1)

In [None]:
plt.plot(history_cnn.history['loss'])
plt.plot(history_cnn.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])


In [None]:
plt.plot(history_cnn.history['accuracy'])
plt.plot(history_cnn.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.show()

In [None]:
pred_test_cnn = model_conv1D.predict(test_data_reshaped)

In [None]:
pred_test_cnn = tf.argmax(pred_test_cnn, axis=1)

In [None]:
test_loss, test_acc = model_conv1D.evaluate(test_data_reshaped,  y_test, verbose=2)

In [None]:
print(classification_report(y_test, pred_test_cnn, target_names=training_features))
cm_test_cnn = confusion_matrix(y_test, pred_test_cnn)
plot_confusion_matrix(cm_test_cnn, training_features, normalize=True)

In [None]:
microF1_test_cnn = f1_score(y_test, pred_test_cnn, average='micro')
print('Test Macro f1 score:', microF1_test_cnn)

Applied CNN to Blind well

In [None]:
X_blind_reshaped = X_blind_stnd[k].reshape(X_blind_stnd[k].shape[0],X_blind_stnd[k].shape[1],1)
X_blind_reshaped.shape

In [None]:
aux_cnn = model_conv1D.predict(X_blind_reshaped)
pred_blind_cnn_index = tf.argmax(aux_cnn, axis=1)

In [None]:
print(classification_report(y_blind[k], pred_blind_cnn_index))
cm_blind_cnn = confusion_matrix(y_blind[k], pred_blind_cnn_index)
plot_confusion_matrix(cm_blind_cnn, blind_class[k], normalize=True)

In [None]:
for i_well in range(0,10):
    aux_X_blind_reshaped = X_blind_stnd[i_well].reshape(X_blind_stnd[i_well].shape[0],X_blind_stnd[i_well].shape[1],1)
    
    aux_cnn = model_conv1D.predict(aux_X_blind_reshaped)
    aux_pred_blind_cnn_index = tf.argmax(aux_cnn, axis=1)

    microF1_blind_cnn = f1_score(y_blind[i_well], aux_pred_blind_cnn_index, average='micro')
    print('Test Macro f1 score:', microF1_blind_cnn)

### 9.6 CNN (RBF)

In [None]:
import keras
from keras.layers import Layer
from keras import backend as K

class RBFLayer(Layer):
    def __init__(self, units, gamma, ** kwargs):
        super(RBFLayer, self).__init__( ** kwargs)
        self.units = units
        self.gamma = K.cast_to_floatx(gamma)

    def build(self, input_shape):
        self.mu = self.add_weight(name = 'mu',
                                  shape = (int(input_shape[1]), self.units),
                                  initializer = 'uniform',
                                  trainable = True)
        super(RBFLayer, self).build(input_shape)

    def call(self, inputs):
        diff = K.expand_dims(inputs) - self.mu
        l2 = K.sum(K.pow(diff, 2), axis = 1)
        res = K.exp(-1 * self.gamma * l2)
        return res
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)

In [None]:
def build_conv1D_rbf_model():

    n_timesteps = train_data_reshaped.shape[1] #5
    n_features  = train_data_reshaped.shape[2] #1 
       
    
    model_cnn_rbf = keras.Sequential(name="model_conv1D_rbf")
    
    # 1st layer
    model_cnn_rbf.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    
    model_cnn_rbf.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_1"))
    model_cnn_rbf.add(keras.layers.MaxPooling1D(pool_size=1))
    
    model_cnn_rbf.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_2"))
    model_cnn_rbf.add(keras.layers.MaxPooling1D(pool_size=1))
    
    model_cnn_rbf.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation='relu', name="Conv1D_3"))
    model_cnn_rbf.add(keras.layers.MaxPooling1D(pool_size=1))
    
    model_cnn_rbf.add(keras.layers.Conv1D(filters=200, kernel_size=2, strides=1, padding='valid', activation=RBFLayer(10, 10), name="Conv1D_4"))
    model_cnn_rbf.add(keras.layers.MaxPooling1D(pool_size=1))
    
    # Dense
    
    model_cnn_rbf.add(keras.layers.Flatten())
    model_cnn_rbf.add(keras.layers.Dropout(0.2))
    model_cnn_rbf.add(keras.layers.Dense(50, activation='relu'))
    model_cnn_rbf.add(keras.layers.Dense(50, activation='relu'))
    model_cnn_rbf.add(keras.layers.Dense(50, activation='relu'))
    model_cnn_rbf.add(keras.layers.Dense(7, activation='softmax'))


    optimizer_aux = tf.keras.optimizers.Adam()
    model_cnn_rbf.compile(loss = "sparse_categorical_crossentropy", optimizer = optimizer_aux ,metrics = ['accuracy'])
    
    return model_cnn_rbf

model_conv1D_rbf = build_conv1D_rbf_model()
model_conv1D_rbf.summary()


In [None]:
earlystoping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                patience=5,
                                                verbose=1,
                                                mode='auto',
                                                restore_best_weights=True)
checkpoint_filepath = 'weights.{epoch:02d}-{val_loss:.2f}.h5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                      save_weights_only=True,
                                                      monitor='val_accuracy',
                                                      mode='max',
                                                      verbose=1,
                                                      save_best_only=True)

In [None]:
history_cnn_rbf = model_conv1D_rbf.fit(train_data_reshaped, y_train, validation_data = (test_data_reshaped,y_test),
                                       batch_size = 512, 
                                       callbacks = [model_checkpoint,earlystoping],
                                       epochs = 1000,
                                       verbose=1)

In [None]:
plt.plot(history_cnn_rbf.history['loss'])
plt.plot(history_cnn_rbf.history['val_loss'])
plt.title('Model loss (CNN-RBF)')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])

In [None]:
plt.plot(history_cnn_rbf.history['accuracy'])
plt.plot(history_cnn_rbf.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.show()

In [None]:
pred_test_cnn_rbf = model_conv1D_rbf.predict(test_data_reshaped)

In [None]:
pred_test_cnn_rbf = tf.argmax(pred_test_cnn_rbf, axis=1)

In [None]:
test_loss_rbf, test_acc_rbf = model_conv1D_rbf.evaluate(test_data_reshaped,  y_test, verbose=2)

In [None]:
print(classification_report(y_test, pred_test_cnn_rbf, target_names=training_features))
cm_test_rbf = confusion_matrix(y_test, pred_test_cnn_rbf)
plot_confusion_matrix(cm_test_rbf, training_features, normalize=True)

In [None]:
microF1_test_rbf = f1_score(y_test, pred_test_cnn_rbf, average='micro')
print('Test Macro f1 score:', microF1_test_rbf)

In [None]:
aux_cnn_rbf = model_conv1D_rbf.predict(X_blind_reshaped)
pred_blind_cnn_rbf_index = tf.argmax(aux_cnn_rbf, axis=1)

In [None]:
print(classification_report(y_blind[k], pred_blind_cnn_rbf_index))
cm_blind_rbf = confusion_matrix(y_blind[k], pred_blind_cnn_rbf_index)
plot_confusion_matrix(cm_blind_rbf, blind_class[k], normalize=True)

### 9.7. MLP

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(50,50)).fit(X_train, y_train)

In [None]:
pred_mlp_test = mlp.predict(X_test)

In [None]:
print(classification_report(y_test, pred_mlp_test, target_names=training_features))
cm_test_MLP = confusion_matrix(y_test, pred_mlp_test)
plot_confusion_matrix(cm_test_MLP, training_features, normalize=True)

In [None]:
microF1_test_mlp = f1_score(y_test, pred_mlp_test, average='micro')
print('Test Macro f1 score:', microF1_test_mlp)

In [None]:
pred_mlp_blind = mlp.predict(X_blind_stnd[k])
print(classification_report(y_blind[k], pred_mlp_blind))
cm_blind_mlp = confusion_matrix(y_blind[k], pred_mlp_blind)
plot_confusion_matrix(cm_blind_mlp, blind_class[k], normalize=True)

# 10. Model performance evaluation

I will use the diagnosis of confusion matrix from train data set to evaluate the model performance. The diagnosis of confusion matrix points how much percentage of the stone is correctly predicted.

In [None]:
### To create a data frame recording the correct prediction (normalized) of 
### facies for each machine learning algorithm

mod_test_list = ['SVM', 'GB', 'RF','KNN','CNN','CNN-RBF','MLP']
cm_test_list = [cm_test_SVM, cm_test_GB, cm_test_RF, cm_test_knn,cm_test_cnn, cm_test_rbf, cm_test_MLP]
face_test_list = training_features
pred_test_df = pd.DataFrame(index=training_features, columns=mod_test_list)

for mod in mod_test_list:
    
    col_index = int(mod_test_list.index(mod))
    cm = cm_test_list[col_index]
    
    for face in face_test_list:
        row_index = training_features.index(face)
        #print(face, row_index, col_index)
        pred_test_df.iloc[row_index, col_index] = cm[row_index][row_index]/sum(cm[row_index])
        

### add the accuracy factor
df_1 = pd.DataFrame([[microF1_test_SVM, 
                      microF1_test_gb, 
                      microF1_test_rf, 
                      microF1_test_knn, 
                      microF1_test_cnn, 
                      microF1_test_rbf, 
                      microF1_test_mlp]], index=['Accuracy'], columns=mod_test_list)    


pred_test_conc = pd.concat([pred_test_df,df_1])
pred_test_conc

In [None]:
X_ind = np.arange(pred_test_df.shape[0])
(pred_df_index_list) = training_features
aux=0.1
plt.figure(figsize=(10,5))
plt.bar(X_ind, pred_test_df['SVM'], color='k', width=aux)
plt.bar(X_ind+0.1, pred_test_df['GB'], color='yellow', width=aux)
plt.bar(X_ind+0.2, pred_test_df['RF'], color='darkgreen', width=aux)
plt.bar(X_ind+0.3, pred_test_df['KNN'], color='orange', width=aux)
plt.bar(X_ind+0.4, pred_test_df['CNN'], color='blue', width=aux)
plt.bar(X_ind+0.5, pred_test_df['CNN-RBF'], color='red', width=aux)
plt.bar(X_ind+0.6, pred_test_df['MLP'], color='lime', width=aux)
plt.xticks(X_ind, pred_df_index_list)
plt.xlabel('Facies')
plt.ylabel('Correct predictions')
plt.legend(labels=mod_test_list)
plt.savefig('canada_performance_evaluation_test_data.pdf',bbox_inches='tight')
plt.show()

# 11. Calssifier evluation using blind test well

I will use the same method shown in item4 for evaluation.

In [None]:
### To create a data frame recording the correct prediction (normalized) of facies of blind test well for each machine learning algorithm

mod_list = ['SVM', 'GB', 'RF','KNN','CNN','CNN-RBF','MLP']
cm_list = [cm_SVM, cm_GB, cm_RF, cm_knn, cm_blind_cnn, cm_blind_rbf, cm_blind_mlp]
pred_df = pd.DataFrame(index=blind_class[k], columns=mod_list)

for mod in mod_list:
    col_index = int(mod_list.index(mod))
    cm = cm_list[col_index]
    
    for face in blind_class[k]:
        
        row_index = blind_class[k].index(face)
        #print(face, row_index, col_index)
        pred_df.iloc[row_index, col_index] = cm[row_index][row_index]/sum(cm[row_index])



In [None]:
### add the accuracy factor and percentage of most correct prediction
#df_2 = pd.DataFrame([[microF1_blind_SVM, 
#                      microF1_blind_GB, 
#                      microF1_blind_rf, 
#                      microF1_blind_knn, 
#                      microF1_blind_cnn]], index=['Accuracy'], columns=mod_list)

#pred_blind_conc = pd.concat([pred_df,df_2])
#pred_blind_conc

In [None]:
X_ind = np.arange(pred_df.shape[0])

aux=0.1
plt.figure(figsize=(10,5))
plt.bar(X_ind, pred_df['SVM'], color='k', width=aux)
plt.bar(X_ind+0.1, pred_df['GB'], color='yellow', width=aux)
plt.bar(X_ind+0.2, pred_df['RF'], color='darkgreen', width=aux)
plt.bar(X_ind+0.3, pred_df['KNN'], color='orange', width=aux)
plt.bar(X_ind+0.4, pred_df['CNN'], color='blue', width=aux)
plt.bar(X_ind+0.5, pred_df['CNN-RBF'], color='red', width=aux)
plt.bar(X_ind+0.6, pred_df['MLP'], color='lime', width=aux)
plt.xticks(X_ind, blind_class[k])
plt.xlabel('Facies')
plt.ylabel('Correct predictions')
plt.legend(labels=mod_list)
plt.savefig('canada_performance_evaluation_blind_data.pdf',bbox_inches='tight')
plt.show()

# 12. Plot the predicted facies for comparison**

In [None]:
blind = blind_data_sep[k].copy()
blind['SVM'] = pred_blind
blind['GB'] = pred_GB_blind
blind['RF'] = pred_RF_blind
blind['KNN'] = preds_knn_blind
blind['CNN'] = pred_blind_cnn_index
blind['CNN-RBF'] = pred_blind_cnn_rbf_index
blind['MLP'] = pred_mlp_blind

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable


facies_colors = ['b','g','r','c','m']


def compare_facies_plot(logs, compare1, compare2, compare3, compare4, compare5, compare6, compare7, facies_colors):
      #make sure logs are sorted by depth
    logs = logs.sort_values(by='DEPTH')
    cmap_facies = colors.ListedColormap(
            facies_colors[0:len(facies_colors)], 'indexed')
    num_colors = 5
    ztop=logs.DEPTH.min(); zbot=logs.DEPTH.max()
    
    cluster0 = np.repeat(np.expand_dims(logs['LITH_SI'].values,1), 100, 1)
    cluster1 = np.repeat(np.expand_dims(logs[compare1].values,1), 100, 1)
    cluster2 = np.repeat(np.expand_dims(logs[compare2].values,1), 100, 1)
    cluster3 = np.repeat(np.expand_dims(logs[compare3].values,1), 100, 1)
    cluster4 = np.repeat(np.expand_dims(logs[compare4].values,1), 100, 1)
    cluster5 = np.repeat(np.expand_dims(logs[compare5].values,1), 100, 1)
    cluster6 = np.repeat(np.expand_dims(logs[compare6].values,1), 100, 1)
    cluster7 = np.repeat(np.expand_dims(logs[compare7].values,1), 100, 1)
    
    f, ax = plt.subplots(nrows=1, ncols=13, figsize=(15, 12))
    ax[0].plot(logs.SW, logs.DEPTH, '-g')
    ax[1].plot(logs.VSH, logs.DEPTH, '-')
    ax[2].plot(logs.PHI, logs.DEPTH, '-', color='0.5')
    ax[3].plot(logs.RW, logs.DEPTH, '-', color='r')
    ax[4].plot(logs.W_TAR, logs.DEPTH, '-', color='black')
    im0 = ax[5].imshow(cluster0, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im1 = ax[6].imshow(cluster1, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im2 = ax[7].imshow(cluster2, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im3 = ax[8].imshow(cluster3, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[9].imshow(cluster4, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[10].imshow(cluster5, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[11].imshow(cluster6, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    im4 = ax[12].imshow(cluster7, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=num_colors)
    
            
    divider = make_axes_locatable(ax[12])
    cax = divider.append_axes("right", size="20%", pad=0.05)
    cbar=plt.colorbar(im4, cax=cax)
    cbar.set_label((30*' ').join(blind_class[k]))
    cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')
    
    for i in range(len(ax)-8):
        ax[i].set_ylim(ztop,zbot)
        ax[i].invert_yaxis()
        ax[i].grid()
        ax[i].locator_params(axis='x', nbins=3)
    
    ax[0].set_xlabel("SW")
    ax[0].set_xlim(logs.SW.min(),logs.SW.max())
    ax[1].set_xlabel("VSH")
    ax[1].set_xlim(logs.VSH.min(),logs.VSH.max())
    ax[2].set_xlabel("PHI")
    ax[2].set_xlim(logs.PHI.min(),logs.PHI.max())
    ax[3].set_xlabel("RW")
    ax[3].set_xlim(logs.RW.min(),logs.RW.max())
    ax[4].set_xlabel("W_TAR")
    ax[4].set_xlim(logs.W_TAR.min(),logs.W_TAR.max())
    ax[5].set_xlabel('Facies')
    ax[6].set_xlabel(compare1)
    ax[7].set_xlabel(compare2)
    ax[8].set_xlabel(compare3)
    ax[9].set_xlabel(compare4)
    ax[10].set_xlabel(compare5)
    ax[11].set_xlabel(compare6)
    ax[12].set_xlabel(compare7)
    
    ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])
    ax[4].set_yticklabels([]); ax[5].set_yticklabels([]); ax[6].set_yticklabels([])
    ax[7].set_yticklabels([]); ax[8].set_yticklabels([]); ax[9].set_yticklabels([])
    ax[10].set_yticklabels([]); ax[11].set_yticklabels([]); ax[12].set_yticklabels([])
    
    
    ax[5].set_xticklabels([])
    ax[6].set_xticklabels([])
    ax[7].set_xticklabels([])
    ax[8].set_xticklabels([])
    ax[9].set_xticklabels([])
    ax[10].set_xticklabels([])
    ax[11].set_xticklabels([])
    ax[12].set_xticklabels([])
    f.suptitle('Well: %s'%logs.iloc[0]['WELL'], fontsize=14,y=0.94)

In [None]:
compare_facies_plot(blind, 
                    'SVM', 
                    'GB', 
                    'RF', 
                    'KNN',
                    'CNN', 
                    'CNN-RBF', 
                    'MLP',facies_colors)

In [None]:
blind['LITH_SI']