In [1]:
import numpy as np # linear algebra
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle
from sklearn import linear_model,svm, metrics
import winsound
%matplotlib inline

Load Data from pickle files

In [2]:
folder = 'D:/Libraries/Documents/Tensorflow/tensorflow/Statoil/'
with open(folder+'labeled_data.pkl','rb') as f:
    data = pickle.load(f)

b1train= data['b1train']
b1valid= data['b1valid']
b1test= data['b1test']
b2train= data['b2train']
b2valid= data['b2valid']
b2test= data['b2test']
anglestrain= data['anglestrain']
anglesvalid= data['anglesvalid']
anglestest= data['anglestest']
labelstrain= data['labelstrain']
labelsvalid= data['labelsvalid']
labelstest= data['labelstest']
idstrain= data['idstrain']
idsvalid= data['idsvalid']
idstest= data['idstest']

## Let's try some 'off the shelf' sklearn classifiers. Logistic Regression, SVC

In [4]:
X = np.concatenate((b1train, b2train), axis=1)
Xvalid = np.concatenate((b1valid,b2valid),axis=1)
Xtest = np.concatenate((b1test,b2test),axis=1)

In [4]:
LR = linear_model.LogisticRegression()
LR.fit(X=X,y=labelstrain)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [5]:
print("Raw Score: " + str(LR.score(Xvalid,labelsvalid)))
pred_proba = LR.predict_proba(Xvalid)
print("Log Loss: " + str(metrics.log_loss(labelsvalid,pred_proba)))

Raw Score: 0.74
Log Loss: 0.763588456889


Logistic Regression Gives:
Raw Score: 0.74
Log Loss: 0.763588456889

In [None]:
kernels = ['rbf', 'linear', 'sigmoid']
for k in kernels:
    SVC = svm.SVC(kernel=k)
    SVC.fit(X=X,y=labelstrain)
    print("kernel = " + str(k))
    print("Raw Score: " + str(SVC.score(Xvalid,labelsvalid)))

kernel = rbf
Raw Score: 0.52
kernel = linear
Raw Score: 0.73
kernel = sigmoid
Raw Score: 0.52
    
Seems like the data is fairly linearly separable

In [5]:
Xnorm = np.square(b1train)+np.square(b2train)
Xnormvalid = np.square(b1valid)+np.square(b2valid)
Xnormtest = np.square(b1test)+np.square(b2test)

In [None]:
LR = linear_model.LogisticRegression()
LR.fit(Xnorm,labelstrain)

print("Raw Score: " + str(LR.score(Xnormvalid,labelsvalid)))
pred_proba = LR.predict_proba(Xnormvalid)
print("Log Loss: " + str(metrics.log_loss(labelsvalid,pred_proba)))

In [None]:
kernels = ['rbf', 'linear', 'sigmoid']
for k in kernels:
    SVC = svm.SVC(kernel=k)
    SVC.fit(X=Xnorm,y=labelstrain)
    print("kernel = " + str(k))
    print("Raw Score: " + str(SVC.score(Xnormvalid,labelsvalid)))

In [6]:
Xangles = np.arctan2(b2train,b1train)
Xanglesvalid = np.arctan2(b1valid,b2valid)
Xanglestest = np.arctan2(b1test,b2test)

In [None]:
LR = linear_model.LogisticRegression()
LR.fit(Xangles,labelstrain)

print("Raw Score: " + str(LR.score(Xanglesvalid,labelsvalid)))
pred_proba = LR.predict_proba(Xanglesvalid)
print("Log Loss: " + str(metrics.log_loss(labelsvalid,pred_proba)))

In [7]:
Xpolar = np.concatenate((Xnorm,Xangles),axis=1)
Xpolarvalid = np.concatenate((Xnormvalid,Xanglesvalid),axis=1)
Xpolartest = np.concatenate((Xnormtest, Xanglestest),axis=1)

In [None]:
LR = linear_model.LogisticRegression()
LR.fit(Xpolar,labelstrain)

print("Raw Score: " + str(LR.score(Xpolarvalid,labelsvalid)))
pred_proba = LR.predict_proba(Xpolarvalid)
print("Log Loss: " + str(metrics.log_loss(labelsvalid,pred_proba)))

## Now let's try a Neural Network. Start with 3 hidden layers and relu

First we convert the labels to one-hot encodings for the tensorflow cross-entropy function

In [8]:
print(labelstest[0:5])
onehot_test = np.ndarray(shape=[labelstest.shape[0],2])
onehot_test[:,0]=(labelstest == 0)
onehot_test[:,1]=(labelstest == 1)
print(onehot_test[0:5,:])


[0 1 1 0 1]
[[ 1.  0.]
 [ 0.  1.]
 [ 0.  1.]
 [ 1.  0.]
 [ 0.  1.]]


In [9]:
print(labelsvalid[0:5])
onehot_valid = np.ndarray(shape=[labelsvalid.shape[0],2])
onehot_valid[:,0]=(labelsvalid == 0)
onehot_valid[:,1]=(labelsvalid == 1)
print(onehot_valid[0:5,:])

[1 1 0 1 0]
[[ 0.  1.]
 [ 0.  1.]
 [ 1.  0.]
 [ 0.  1.]
 [ 1.  0.]]


In [10]:
print(labelstrain[0:5])
onehot_train=np.ndarray(shape=[labelstrain.shape[0],2])
onehot_train[:,0]=(labelstrain == 0)
onehot_train[:,1]=(labelstrain == 1)
print(onehot_train[0:5,:])

[1 1 0 0 0]
[[ 0.  1.]
 [ 0.  1.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]]


Creating the list of Xs so that we can iterate over our various representations of the data

In [50]:
#define parameters
num_hidden = 1024
learning_rate=0.0002
dropout_rate=0.3
num_labels = 2

Xs = [X]#, Xpolar]
Xvalids = [Xvalid]#, Xpolarvalid]
Xtests = [Xtest]#, Xpolartest]

for X,Xv,Xt in zip(Xs,Xvalids,Xtests):
    num_features = X.shape[1]
    graph = tf.Graph()
    with graph.as_default():
        
        #input training data
        tf_X = tf.placeholder(dtype=tf.float32, shape=(None, num_features))
        tf_y = tf.placeholder(dtype=tf.float32, shape=(None, num_labels))
        tf_is_training = tf.placeholder(dtype=tf.bool)
        
        #hidden layers
        tf_hidden_1 = tf.layers.dense(tf_X,num_hidden)
        tf_hidden_1 = tf.nn.relu(tf_hidden_1)
        tf_hidden_1 = tf.layers.dropout(tf_hidden_1, rate = dropout_rate, training=tf_is_training)
        
        tf_hidden_2 = tf.layers.dense(tf_hidden_1,num_hidden)
        tf_hidden_2 = tf.nn.relu(tf_hidden_2)
        tf_hidden_2 = tf.layers.dropout(tf_hidden_2, rate = dropout_rate, training=tf_is_training)
        
        tf_hidden_3 = tf.layers.dense(tf_hidden_2,num_hidden)
        tf_hidden_3 = tf.nn.relu(tf_hidden_3)
        #tf_hidden_3 = tf.layers.dropout(tf_hidden_3, rate=dropout_rate, training=tf_is_training)
        
        #output
        tf_output = tf.layers.dense(tf_hidden_3,num_labels)
        tf_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_y,logits=tf_output))#Need to change to log loss
        tf_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_loss)
        
   

In [51]:
batch_size = 64
num_steps=10001
  
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in range(num_steps):
        offset = (step * batch_size) % (X.shape[0] - batch_size)
        batch_X = X[offset:(offset + batch_size), :]
        batch_labels = onehot_train[offset:(offset + batch_size),:]
        feed_dict = {tf_X: batch_X, tf_y: batch_labels, tf_is_training: True}
        
        _, loss = session.run([tf_optimizer, tf_loss], feed_dict=feed_dict)
        
        if(step % 500==0):
          print("Step: ", step)
          print("Loss: ", loss)
          valid_preds,valid_loss = session.run([tf_output,tf_loss], feed_dict={tf_X:Xvalid,tf_y: onehot_valid,tf_is_training:False})
          print("Validation Accuracy: ", np.mean(np.argmax(valid_preds,axis=1)==labelsvalid))
          print("Validation Loss: ", valid_loss)

winsound.Beep(500,1000)
          
            
            


Initialized
Step:  0
Loss:  9.24991
Validation Accuracy:  0.52
Validation Loss:  39.7753
Step:  500
Loss:  0.624504
Validation Accuracy:  0.57
Validation Loss:  0.60793
Step:  1000
Loss:  0.650186
Validation Accuracy:  0.7
Validation Loss:  0.588915
Step:  1500
Loss:  0.618067
Validation Accuracy:  0.66
Validation Loss:  0.592986
Step:  2000
Loss:  0.508514
Validation Accuracy:  0.64
Validation Loss:  0.625163
Step:  2500
Loss:  0.465766
Validation Accuracy:  0.73
Validation Loss:  0.529888
Step:  3000
Loss:  0.567861
Validation Accuracy:  0.74
Validation Loss:  0.513416
Step:  3500
Loss:  0.488856
Validation Accuracy:  0.72
Validation Loss:  0.528653
Step:  4000
Loss:  0.571767
Validation Accuracy:  0.7
Validation Loss:  0.531931
Step:  4500
Loss:  0.420224
Validation Accuracy:  0.74
Validation Loss:  0.496103
Step:  5000
Loss:  0.522784
Validation Accuracy:  0.72
Validation Loss:  0.509659
Step:  5500
Loss:  0.407494
Validation Accuracy:  0.75
Validation Loss:  0.494307
Step:  6000
L

Not bad accuracy. Still not competitive. Could probably benefit from decaying learning rate, but it's difficult to train well with such limited training examples. Probably a convolutional network will do much better since more specific models are more practical when we have a small amount of training data