In [None]:
# Import toolboxes needed for workflow

from tpot import TPOTRegressor

import pandas as pd

from pandas import set_option
set_option('display.max_columns',400)

import numpy as np

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()

In [None]:
# Import vertical wells.

VertA = pd.read_excel('VertA_merge.xlsx')
VertB = pd.read_excel('VertB_merge.xlsx')
VertC = pd.read_excel('VertC_merge.xlsx')
VertD = pd.read_excel('VertD_merge.xlsx')
VertE = pd.read_excel('VertE_merge.xlsx')

In [None]:
# Correct porosity units NPRL:1 = NPOR = NPOR:1
# These values need to be made the same.
VertA['NPOR:1'] = VertA['NPOR:1']/100
VertB['NPRL:1'] = VertB['NPRL:1']/100
VertD['NPRL:1'] = VertD['NPRL:1']/100

In [None]:
# Merge A, B, C, D, and predict DT for E.
# And standardize column names.
# My RPM column for VertA is all zeros. 
# We might be able to use 'TOP_DRIVE_RPM' as a substitute.

VertA_subset = VertA[['DEPT','DT35','GMSG','ROP_-_FAST:1','BIT_WEIGHT','TOP_DRIVE_RPM','DEN','NPOR:1']]
VertB_subset = VertB[['DEPT','DT35','GMGC','ROP_-_FAST:1','BIT_WEIGHT','ROTARY_RPM','DEN','NPRL:1']]
VertC_subset = VertC[['DEPT','DTCO','GR','ROP_-_FAST:1','BIT_WEIGHT','ROTARY_RPM','RHOB','NPOR']]
VertD_subset = VertD[['DEPT','MCDT','GMGC','ROP_-_FAST:1','BIT_WEIGHT','ROTARY_RPM','DEN','NPRL:1']]
VertE_subset = VertE[['DEPT','DT','GRTO','ROP','WOB','RPM','RHOB','NPHI']]


VertA_subset = VertA_subset.rename(index=str,columns={"DT35":"DT","GMSG":"GR","ROP_-_FAST:1":"ROP","BIT_WEIGHT":"WOB","TOP_DRIVE_RPM":"RPM","DEN":"RHOB","NPOR:1":"NPHI"})
VertB_subset = VertB_subset.rename(index=str,columns={"DT35":"DT","GMGC":"GR","ROP_-_FAST:1":"ROP","BIT_WEIGHT":"WOB","ROTARY_RPM":"RPM","DEN":"RHOB","NPRL:1":"NPHI"})
VertC_subset = VertC_subset.rename(index=str,columns={"DTCO":"DT","GR":"GR","ROP_-_FAST:1":"ROP","BIT_WEIGHT":"WOB","ROTARY_RPM":"RPM","RHOB":"RHOB","NPOR":"NPHI"})
VertD_subset = VertD_subset.rename(index=str,columns={"MCDT":"DT","GMGC":"GR","ROP_-_FAST:1":"ROP","BIT_WEIGHT":"WOB","ROTARY_RPM":"RPM","DEN":"RHOB","NPRL:1":"NPHI"})
VertE_subset = VertE_subset.rename(index=str,columns={"GRTO":"GR"})


frames = [VertA_subset,VertB_subset,VertC_subset,VertD_subset]
mergeABCD = pd.concat(frames,ignore_index=True)

In [None]:
# Setup our X and y matrices
X_VertABCD = mergeABCD[['ROP','WOB','RPM','GR','RHOB','NPHI']]
y_VertABCD = mergeABCD['DT']

X_VertE = VertE_subset[['ROP','WOB','RPM','GR','RHOB','NPHI']]
y_VertE = VertE_subset['DT']

In [None]:
## https://epistasislab.github.io/tpot/

In [None]:
tpot = TPOTRegressor(generations=30, population_size=30, verbosity=2, n_jobs=-1, warm_start=True)

In [None]:
tpot.fit(X_VertABCD, y_VertABCD)

In [None]:
print(tpot.score(X_VertE, y_VertE))

In [None]:
tpot_model = tpot.predict(X_VertE)

In [None]:
# tpot.export('tpot_exported_pipeline.py')

In [None]:
x = VertE['DEPT']
y = VertE['DT']
z = tpot_model

p = figure(title='VertE Predicted DT using TPOT', plot_width=600, plot_height=300)
r = p.line(x, y, color='green', line_width=0.5,legend='DT')
r1 = p.line(x, z, color='blue', line_width=0.5,legend='Pred. DT SVR')

p.xaxis.axis_label = 'Depth (ft)'
p.yaxis.axis_label = 'DT (ms/ft)'
p.legend.location = 'top_left'
show(p)

In [None]:
from sklearn.metrics import mean_absolute_error
error = mean_absolute_error(y,z)

In [None]:
error