In [38]:
import os
import sys
import numpy as np
os.environ['KERAS_BACKEND'] = "tensorflow"
import keras as K
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Embedding, GRU, CuDNNGRU, LSTM
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import glob
import pandas as pd
from sklearn.metrics import explained_variance_score

%matplotlib inline

In [18]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Keras: ", K.__version__)
print("Numpy: ", np.__version__)
print("Tensorflow: ", tf.__version__)
print("KERAS backend:", K.backend.backend())


OS:  linux
Python:  3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) 
[GCC 7.2.0]
Keras:  2.2.4
Numpy:  1.14.5
Tensorflow:  1.12.0
KERAS backend: tensorflow


In [19]:
file_name_list = glob.glob("Data/clean/*.csv")

feature_set = ['wellName','DEPT', 'BS', 'CALI', 'DENS', 'DTC', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD']

file_list = []

for file in file_name_list:
    df = pd.read_csv(file, index_col=None, skiprows=[1])
    file_list.append(df[feature_set])

In [39]:
def cross_val(clf, feature_set=['DEPT', 'BS', 'CALI', 'DENS', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD']):
    
    wells = []
    EVS_list = []

    for i in range(len(file_list)):
        test_df = file_list[i]
        wells.append(test_df.iloc[0,0])
        print('%s : %s' %(i, wells[i]))

        train_list = file_list.copy()
        train_list.pop(i)
        train_df = pd.concat(train_list)

        test_x = test_df[feature_set].values
        test_y = test_df[['DTC']].values
        test_y = test_y
        
        train_X = train_df[feature_set].values
        train_y = train_df[['DTC']].values
        train_y = train_y
        
        # feature scaling
        scaler = StandardScaler()
        scaler.fit_transform(train_X)
        scaler.transform(test_x)
        
        yscaler = StandardScaler()
        yscaler.fit_transform(train_y)
        yscaler.transform(test_y)
        
        test_y = test_y.ravel()
        train_y = train_y.ravel()

        # training
        mdl = clf
        history = mdl.fit(train_X, train_y)

        # testing
        pred_y = mdl.predict(test_x)
    
        # error
        abs_error = np.abs(np.subtract(test_y, pred_y))
        
#         plt_this(abs_error)
#         plt_this(pred_y)
#         plt_this(test_y)
            
        #EVS = history.history['mean_absolute_error']#(test_y, pred_y)
        EVS = explained_variance_score(test_y, pred_y)

        EVS_list.append(EVS)

    print()

    avg_EVS = np.mean(EVS_list)

    for i in range(len(wells)):
        print('Test score on %s : %s' %(wells[i], EVS_list[i]))

    print()
    print('Average algorithm mean_absolute_error score: %s' %avg_EVS)

In [40]:
# define Deep NN Arch 
model = Sequential()
model.add(Dense(12, input_dim =11, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear')) # Last layer is linear here 
model.compile(loss='mae', optimizer='adam', metrics=['mse','mae'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 12)                144       
_________________________________________________________________
dense_26 (Dense)             (None, 4)                 52        
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 5         
Total params: 201
Trainable params: 201
Non-trainable params: 0
_________________________________________________________________


In [41]:
cross_val(model)

0 : Cheal-G2
Epoch 1/1
1 : Cheal-B8
Epoch 1/1
2 : Cheal-C3
Epoch 1/1
3 : Cheal-A11
Epoch 1/1
4 : Cheal-C4
Epoch 1/1
5 : Cheal-G3
Epoch 1/1
6 : Cheal-A12
Epoch 1/1
7 : Cheal-A10
Epoch 1/1
8 : Cheal-G1
Epoch 1/1

Test score on Cheal-G2 : 0.020371076629457252
Test score on Cheal-B8 : 0.01690836029473397
Test score on Cheal-C3 : 0.02363349711838114
Test score on Cheal-A11 : 0.021992692424654403
Test score on Cheal-C4 : 0.02322602147334707
Test score on Cheal-G3 : 0.021636539512186914
Test score on Cheal-A12 : 0.024738662196372108
Test score on Cheal-A10 : 0.025181007838333214
Test score on Cheal-G1 : 0.02551169997436986

Average algorithm mean_absolute_error score: 0.02257772860687066


In [5]:
# define and Arch 
model = Sequential()
model.add(LSTM(32, input_shape=(1000, 5))) #50 is timesteps, 2 is the number of features we have
model.add(Dense(12, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear')) # Last layer is linear here 
model.compile(loss='mae', optimizer='adam', metrics=['mse','mae'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                4864      
_________________________________________________________________
dense_1 (Dense)              (None, 12)                396       
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 52        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5         
Total params: 5,317
Trainable params: 5,317
Non-trainable params: 0
_________________________________________________________________
