In [1]:
Plot = require 'itorch.Plot'

# Load the data
We first load the weird t7 converted files containing the data. The conversion step is in the <a href="data_loading.ipynb">data loading notebook</a>.

In [2]:
file = torch.DiskFile('dat/facies_vectors.t7', 'r')
facies = file:readObject()
file:close()
file = torch.DiskFile('dat/validation_data_nofacies.t7', 'r')
validate = file:readObject()
file:close()

# Clean the data
### Extract the useful feature vectors
Let's pick out the well logs and the "geologic constraining variables" by simply dropping the facies and the depth data.

In [3]:
print("facies size: ", facies:size()[1], "x", facies:size()[2])
print("validate size: ", validate:size()[1], "x", validate:size()[2])

facies size: 	4149	x	9	
validate size: 	830	x	8	


In [4]:
training_data = facies[{{},{3,9}}];
print("training data size: ", training_data:size()[1], "x", training_data:size()[2])
testing_data = validate[{{}, {2,8}}];
print("testing data size: ", testing_data:size()[1], "x", testing_data:size()[2])

depth = facies[{{},{2}}]:reshape(4149)

training data size: 	4149	x	7	
testing data size: 	830	x	7	


### Normalize the data
As per the literature and Brandon's suggestion, we now normalize the data to have zero mean and unit variance.

In [5]:
mean = {}
stdv  = {}
for i=1,7 do -- over each well log
    mean[i] = training_data[{ {},{i} }]:mean()
    print('Log ' .. i .. ', Mean: ' .. mean[i])
    training_data[{ {}, {i} }]:add(-mean[i])
    
    stdv[i] = training_data[{ {}, {i} }]:std()
    print('Log ' .. i .. ', Standard Deviation: ' .. stdv[i])
    training_data[{ {}, {i} }]:div(stdv[i])
end

Log 1, Mean: 64.933984574596	
Log 1, Standard Deviation: 30.302530491675	
Log 2, Mean: 0.65956573946493	
Log 2, Standard Deviation: 0.25270344780324	
Log 3, Mean: 4.4024837310195	
Log 3, Standard Deviation: 5.2749471828273	
Log 4, Mean: 13.201065678477	
Log 4, Standard Deviation: 7.1328455992613	
Log 5, Mean: 22104.391960713	
Log 5, Standard Deviation: 41496.238560055	
Log 6, Mean: 1.5184381778742	
Log 6, Standard Deviation: 0.49972014332745	
Log 7, Mean: 0.52185153048927	


Log 7, Standard Deviation: 0.28664420595204	


In [6]:
plot = Plot():line(training_data[{{1,472},{1}}]:reshape(472), depth[{{1,472}}],'red','gamma ray'):draw()

plot:line(training_data[{{1,472},{2}}]:reshape(472)+2, depth[{{1,472}}],'blue','ILD')
plot:line(training_data[{{1,472},{3}}]:reshape(472)+4, depth[{{1,472}}],'green','dPhi')
plot:line(training_data[{{1,472},{4}}]:reshape(472)+6, depth[{{1,472}}],'brown','NeuPor')
plot:line(training_data[{{1,472},{5}}]:reshape(472)+8, depth[{{1,472}}],'grey','PE')
plot:line(training_data[{{1,472},{6}}]:reshape(472)+10, depth[{{1,472}}],'black','NM_M')
plot:line(training_data[{{1,472},{7}}]:reshape(472)+12, depth[{{1,472}}],'yellow','RelPos')

plot:legend(true)
plot:title("Shrimplin Logs")
plot:redraw()

In [7]:
plot = Plot():line(training_data[{{1,471},{1}}]:reshape(471), depth[{{1,471}}],'red','gamma ray'):draw()

plot:line(training_data[{{1,471},{2}}]:reshape(471)+2, depth[{{1,471}}],'blue','ILD')
plot:line(training_data[{{1,471},{3}}]:reshape(471)+4, depth[{{1,471}}],'green','dPhi')
plot:line(training_data[{{1,471},{4}}]:reshape(471)+6, depth[{{1,471}}],'brown','NeuPor')
plot:line(training_data[{{1,471},{5}}]:reshape(471)+8, depth[{{1,471}}],'grey','PE')
plot:line(training_data[{{1,471},{6}}]:reshape(471)+10, depth[{{1,471}}],'black','NM_M')
plot:line(training_data[{{1,471},{7}}]:reshape(471)+12, depth[{{1,471}}],'yellow','RelPos')

plot:legend(true)
plot:title("Shrimplin Logs")
plot:redraw()

### Extract blind well
Next we separate the Newby well for blind testing.

In [18]:
newby = training_data[{{3283,3745},{}}];
training_data = torch.cat(training_data[{{1,3282},{}}],training_data[{{3746,4149},{}}],1);

In [21]:
print("new training data size: ", training_data:size()[1], "x", training_data:size()[2])
print("newby data size: ", newby:size()[1], "x", newby:size()[2])

new training data size: 	3686	x	7	
newby data size: 	463	x	7	
