In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
def normalise_and_pad(sequence):
    ret = np.pad(np.array(sequence) / 255.0, ((0, 10-len(sequence)),(0,0)), 'wrap')
    return ret

df = pd.read_json('/Users/diogo/Kaggle/turkeyCompetition/dataset/train.json')
x_train = np.asarray([normalise_and_pad(x) for x in df['audio_embedding']], dtype='float32')
y_train = df['is_turkey'].values

# Split the data into 80% training, 10% cross-validation, 10% test
x_train, x_crossvalidation, y_train, y_crossvalidation = train_test_split(x_train,
                                                                          y_train,
                                                                          test_size=0.2)

x_crossvalidation, x_test, y_crossvalidation, y_test = train_test_split(x_crossvalidation, 
                                                                        y_crossvalidation,
                                                                        test_size=0.5)

print(x_train.shape)

(956, 10, 128)


In [3]:
def get_batches(x_train, y_train, batch_size):
    current_index=0
    while current_index+batch_size < len(x_train):
        batch_x = np.asarray(x_train[current_index:current_index+batch_size], dtype='float32')
        batch_y = np.asarray(y_train[current_index:current_index+batch_size], dtype='float32')
        batch_y = batch_y.reshape(len(batch_y), 1)
        yield (batch_x, batch_y)
        current_index += batch_size

In [4]:
import tensorflow as tf

# Parameters
learning_rate = 0.00001
training_epochs = 60
batch_size = 100  # Decrease batch size if you don't have enough memory
display_step = 1

n_input = 10*128  # MNIST data input (img shape: 28*28)
n_classes = 1  # MNIST total classes (0-9 digits)

In [5]:
n_hidden_layer = 256 # layer number of features

In [6]:
# Store layers weight & bias
weights = {
    'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
    'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [7]:
# tf Graph input
x = tf.placeholder("float32", [None, 10, 128])
y = tf.placeholder("float32", [None, n_classes])

x_flat = tf.reshape(x, [-1, n_input])

x_flat = tf.Print(x_flat, [x_flat, tf.shape(x_flat)], "Xflat is: ")
y_flat = y
y_flat = tf.Print(y_flat, [y_flat, tf.shape(y_flat)], "Y is: ")

Instructions for updating:
Use tf.print instead of tf.Print. Note that tf.print returns a no-output operator that directly prints the output. Outside of defuns or eager mode, this operator will not be executed unless it is directly specified in session.run or used as a control dependency for other operators. This is only a concern in graph mode. Below is an example of how to ensure tf.print executes in graph mode:
```python
    sess = tf.Session()
    with sess.as_default():
        tensor = tf.range(10)
        print_op = tf.print(tensor)
        with tf.control_dependencies([print_op]):
          out = tf.add(tensor, tensor)
        sess.run(out)
    ```
Additionally, to use tf.print in python 2.7, users must make sure to import
the following:

  `from __future__ import print_function`



In [8]:
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']),biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
logits = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])
logits = tf.Print(logits, [logits, tf.shape(logits), tf.nn.softmax(logits)], "Logits: ")

In [15]:
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_flat))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)


# Calculate accuracy
pred = tf.nn.softmax(logits, axis=0)
correct_prediction = tf.equal(tf.round(pred), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [16]:
# Initializing the variables
init = tf.global_variables_initializer()


# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        step = 0
        for batch_x, batch_y in get_batches(x_train, y_train, batch_size):
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        
        # Print status for every 10 epochs        
        if epoch % 10 == 0:
            y_test = y_test.reshape(len(y_test),1)
            print(y_test[:4])
            valid_accuracy = sess.run(
                accuracy,
                feed_dict={
                    x: x_test,
                    y: y_test})
            print('Epoch {:<3} - Validation Accuracy: {}'.format(
                epoch,
                valid_accuracy))

[[1]
 [1]
 [0]
 [0]]
Epoch 0   - Validation Accuracy: 0.6166666746139526
[[1]
 [1]
 [0]
 [0]]
Epoch 10  - Validation Accuracy: 0.6166666746139526
[[1]
 [1]
 [0]
 [0]]
Epoch 20  - Validation Accuracy: 0.6166666746139526
[[1]
 [1]
 [0]
 [0]]
Epoch 30  - Validation Accuracy: 0.6333333253860474
[[1]
 [1]
 [0]
 [0]]
Epoch 40  - Validation Accuracy: 0.6333333253860474
[[1]
 [1]
 [0]
 [0]]
Epoch 50  - Validation Accuracy: 0.6333333253860474
