In [1]:
import os
import sys
import numpy as np
import pandas as pd
os.environ['KERAS_BACKEND'] = "tensorflow"
import keras as K
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Embedding, GRU, CuDNNGRU, LSTM
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import glob
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import explained_variance_score
from sklearn.linear_model import BayesianRidge, LinearRegression
from sklearn.svm import SVR

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Reading the files and modeling the data

In [2]:

file_name_list = glob.glob("Data/clean/*.csv")

feature_set = ['wellName','DEPT', 'BS', 'CALI', 'DENS', 'DTC', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD']

file_list = []

for file in file_name_list:
    df = pd.read_csv(file, index_col=None, skiprows=[1])
    file_list.append(df[feature_set])

# Deep learning network

In [3]:
# define and Arch 
model = Sequential()
model.add(Dense(4, input_dim=5, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear')) # Last layer is linear here 
model.compile(loss='mae', optimizer='adam', metrics=['mse','mae'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 4)                 24        
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 20        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5         
Total params: 49
Trainable params: 49
Non-trainable params: 0
_________________________________________________________________


In [4]:

def cross_val(clf, feature_set=['DEPT', 'BS', 'CALI', 'DENS', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD']):
    
    wells = []
    EVS_list = []

    for i in range(len(file_list)):
        test_df = file_list[i]
        wells.append(test_df.iloc[0,0])
        print('%s : %s' %(i, wells[i]))

        train_list = file_list.copy()
        train_list.pop(i)
        train_df = pd.concat(train_list)

        test_x = test_df[feature_set].values
        test_y = test_df[['DTC']].values
        test_y = test_y.ravel()
        
        train_X = train_df[feature_set].values
        train_y = train_df[['DTC']].values
        train_y = train_y.ravel()
        
        # feature scaling
        scaler = StandardScaler()
        scaler.fit_transform(train_X)
        scaler.transform(test_x)

        # training
        mdl = clf
        mdl.fit(train_X, train_y)

        # testing
        pred_y = mdl.predict(test_x)
    
        # error
        abs_error = np.abs(np.subtract(test_y, pred_y))
        
#         plt_this(abs_error)
#         plt_this(pred_y)
#         plt_this(test_y)
            
        EVS = explained_variance_score(test_y, pred_y)

        EVS_list.append(EVS)

    print()

    avg_EVS = np.mean(EVS_list)

    for i in range(len(wells)):
        print('Test score on %s : %s' %(wells[i], EVS_list[i]))

    print()
    print('Average algorithm score: %s' %avg_EVS)

In [5]:
names = ["Support Victor Regressor", "Bayesian Ridge Regressor", "Linear Regressor"]

classifiers = [
    SVR(kernel='linear', C=1e3),
    BayesianRidge(compute_score=True),
    LinearRegression()]


In [None]:
feature_set = ['DEPT', 'BS', 'CALI', 'DENS', 'RESD', 'RESM']
#feature_set = ['DEPT', 'BS', 'CALI', 'DENS', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD']
for clf, name in zip(classifiers, names):
    print(name,'\n')
    cross_val(clf, feature_set)

Support Victor Regressor 

0 : Cheal-G2


In [None]:
from sklearn.linear_model import LinearRegression

#feature_set = ['DEPT', 'BS', 'CALI', 'DENS', 'GR', 'NEUT', 'PEF', 'RESD', 'RESM', 'RESS', 'TVD'] #-8.0057
feature_set = ['DEPT', 'BS', 'CALI', 'DENS', 'RESD', 'RESM'] #0.372

cross_val(LinearRegression, feature_set)