In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable

from pandas import set_option
set_option("display.max_rows", 10)

import seaborn as sns
import numpy as np
from tqdm import tqdm

from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv

from scipy import stats

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn import preprocessing

from mpl_toolkits.axes_grid1 import make_axes_locatable

from pandas import set_option
set_option("display.max_rows", 10)

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import TimeDistributed
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Conv1D, MaxPool2D, Flatten, Dropout, CuDNNGRU, CuDNNLSTM, Conv2D, MaxPooling1D
from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam, SGD, Nadam
from time import time
from livelossplot import PlotLossesKeras
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.utils import to_categorical
import tensorflow as tf





%matplotlib inline

In [None]:
filename = 'training_data_SMOTE.csv'
training_data = pd.read_csv(filename)
training_data

In [None]:
facies_counts = training_data['Facies_Type'].value_counts().sort_index()
#use facies labels to index each count
facies_counts.index = facies_labels

facies_counts.plot(kind='bar',color=facies_colors, 
                   title='Distribution of Training Data Processed by SMOTE Algorithm by Facies')
facies_counts
plt.savefig('facies distribution_SMOTE.pdf', dpi=300, bbox_inches='tight')

In [None]:
correct_facies_labels = training_data['Facies_Type'].values

feature_vectors = training_data.drop([ 'Well_Name','Facies_Type','FaciesLabels'], axis=1)

col = list(feature_vectors.columns)

feature_vectors[col] = feature_vectors[col].apply(pd.to_numeric, errors='coerce').fillna(0.0)

feature_vectors.describe()

In [None]:
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(feature_vectors)
X_train= scaler.transform(feature_vectors)
X_train

In [None]:
X_train= np.expand_dims(X_train, axis=2)
X_train

In [None]:
from keras.utils import to_categorical
#one-hot encode target column
y_train = to_categorical(correct_facies_labels)
y_train 

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
        X_train,  y_train, test_size=0.2, random_state=42)

In [None]:
## create model
model = Sequential()
#get number of columns in training data
n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]

#add model layers
model.add(Conv1D(64, 2, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(64, 2, activation='relu'))
model.add(MaxPooling1D(1))
model.add(Dropout(0.25))
model.add(Conv1D(64, 1, activation='relu'))
model.add(Conv1D(64, 1, activation='relu'))
model.add(MaxPooling1D(2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))

#compile model using mse as a measure of model performance
model.compile(optimizer='Nadam', loss='categorical_crossentropy', metrics=['accuracy'])



model.summary()

In [None]:
#set early stopping monitor so the model stops training when it won't improve anymore
early_stopping_monitor = EarlyStopping(patience=500)

filepath="models\\CNN_NadamSMOTE.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_mape', verbose=1, save_best_only=True, mode='min')

#train model
model.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.3,
          verbose=1, callbacks=[PlotLossesKeras(), early_stopping_monitor, checkpoint])


In [None]:
eval = model.evaluate(X_test, y_test, verbose=0)
print("Evaluation on test data: loss = %0.6f accuracy = %0.2f%% \n" \
      % (eval[0], eval[1] * 100) )

In [None]:
print("Saving model to disk \n")
model.save('model_Nadam_SMOTE.h5')

In [None]:
y_pred = model.predict_classes(X_test)
y_pred 

In [None]:
truelabel = y_test.argmax(axis=-1)
truelabel

In [None]:
from sklearn.metrics import confusion_matrix
con_mat = confusion_matrix(truelabel, y_pred)

con_mat_norm = con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis]   
con_mat_norm = np.around(con_mat_norm, decimals=2)

In [None]:
figure = plt.figure(figsize=(9, 8))
sns.heatmap(con_mat_norm, annot=True, cmap='Blues')

plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.savefig('heatmap_Nadam_SMOTE.pdf', dpi=300, bbox_inches='tight')
plt.show()