In [2]:
!ls

 __init__.py
'LSTM Test.ipynb'
'LTSM-CCL Data with test, train, valid1, valid2 data.ipynb'
 test.npy
 train.npy
 Untitled1.ipynb
 validation1.npy
 validation2.npy
'Working LTSM Model with Actual CCL Data.ipynb'


In [None]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, Dropout
from keras.optimizers import Adam, RMSprop
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import pandas as pd
import lasio as las


class WirelineLog(object):
    """
    Read LAS File

    """
    las_file = None
    df = None


    def read(self, path):
        """
        Return DataFrame of entire las file
        :param path:
        :return:
        """
        self.las_file = las.read(path)
        self.df = self.las_file.df()


class MultiStepLTSM(Sequential):
    d_train = None
    d_valid1 = None
    d_valid2 = None
    d_test = None
    done = None
    predictions = None

    def __init__(self, data, batch_size=500, epochs=25, look_back=100, look_ahead=1, dropout=0.1, hidden_n=120):
        super(MultiStepLTSM, self).__init__()
        self.data = data
        self.batch_size = batch_size
        self.look_back = look_back
        self.look_ahead = look_ahead
        self.dropout = dropout
        self.hidden_n = hidden_n
        self.epochs = epochs

    def build_model(self, iterations=0):

        self.add(LSTM(self.hidden_n,
                      input_shape=(self.batch_size, self.look_back),
                      batch_size=self.batch_size,
                      batch_input_shape=(self.batch_size, self.look_back, self.look_ahead),
                      return_sequences=True,
                      stateful=True
                      ))

        self.add(Dropout(self.dropout))
        
        for i in range(iterations):
            self.add(LSTM(self.hidden_n,
                          return_sequences=True,
                          stateful=True
                          ))

            self.add(Dropout(self.dropout))
            

        self.add(LSTM(
            self.hidden_n,
            return_sequences=False,
            stateful=True
        ))
        self.add(Dense(1))

        optimizer = Adam(lr=0.002)
        self.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
        self.summary()

    def train_model(self, predict_on=None):
        x_train, y_train = self.d_train

        for i in range(self.epochs):
            print("Epoch", i+1, "/", self.epochs)

            self.fit(x_train, y_train[:,0],
                     batch_size=self.batch_size,
                     epochs=1,
                     verbose=1,
                     shuffle=False)
            self.reset_states()

        self.done = 1
        self.predictions = self.predict(x_train, batch_size=self.batch_size)


    def plot_error(self, true, predictions, i1=None, i2=None):
        b = i1 if i1 is not None else 0
        e = i2 if i2 is not None else len(true)
        if e < b:
            tmp = b; b = e
            e = tmp
        
        plt.figure(figsize=(30,10))
        plt.subplot(2,1,1)
        plt.plot(true[b:e,0], label="True Values")
        plt.plot(predictions[b:e], label="Predicted", linestyle="--")
        plt.legend()
        plt.subplot(2,1,2)
        error = abs(predictions - true[:, 0])
        plt.plot(error[b:e], label="Error", linewidth=0.5, c='g', alpha=0.8)
        plt.legend()
        plt.show()


    def preprocess_data(self):
        """
        Preprocess fed data, assuming it is in structure:
        train,
        valid1,
        valid2,
        test
        :return: N/A
        """
        # for d in range(len(self.data)):
        self.d_train = self._prepare_seq2seq_data(self.data, look_ahead=self.look_ahead, look_back=self.look_back)
        self.d_train = self.clip_data(self.d_train)

    def _prepare_seq2seq_data(self, dataset, look_back, look_ahead):
        dataX, dataY = [], []
        for i in range(len(dataset) - look_back - look_ahead):
            input_seq = dataset[i:(i + look_back)]
            output_seq = dataset[i + look_back:(i + look_back + look_ahead)]
            dataX.append(input_seq)
            dataY.append(output_seq)
        dataX = np.reshape(np.array(dataX), [-1, look_back, 1])
        dataY = np.reshape(np.array(dataY), [-1, look_ahead, 1])
        return dataX, dataY

    def clip_data(self, data_tuple):
        x, y = data_tuple
        #For stateful lstm the batch_size needs to be fixed before hand.
        #We also need to ernsure that all batches shud have the same number of samples. So we drop the last batch as it has less elements than batch size
        if self.batch_size > 1:
            n_train_batches = len(x)/self.batch_size
            len_d = n_train_batches * self.batch_size
            if len_d < len(x):
                x = x[:len_d]
                y = y[:len_d]
            return x, y

    def plot_results(self):
        if self.done is None:
            print("Model hasn't been run yet")
            return







In [None]:
data = WirelineLog()
data.read("/home/duys/Downloads/ccl_data/stage2b-pass4.las")
d = data.df['CCL'].dropna().values

model2 = MultiStepLTSM(data=d, batch_size=250, look_back=100, epochs=25, hidden_n=50)
model2.build_model(iterations=1)
model2.preprocess_data()
print(model2.d_train[0].shape)
model2.train_model()
x, y = model2.d_train
#model.plot_error(x, model.predictions)
print(model2.d_train[0].shape, model2.predictions.shape)

In [None]:
model2.plot_error(model2.d_train[1], model2.predictions, i1=4500, i2=5000)