In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
#Load the data in
df = pd.read_json('../dataset/train.json')

In [3]:
df.head()

Unnamed: 0,audio_embedding,end_time_seconds_youtube_clip,is_turkey,start_time_seconds_youtube_clip,vid_id
0,"[[172, 34, 216, 110, 208, 46, 95, 66, 161, 125...",70,0,60,kDCk3hLIVXo
1,"[[169, 20, 165, 102, 205, 62, 110, 103, 211, 1...",40,1,30,DPcGzqHoo7Y
2,"[[148, 8, 138, 60, 237, 48, 121, 108, 145, 177...",240,1,230,7yM63MTHh5k
3,"[[151, 0, 162, 88, 171, 71, 47, 90, 179, 190, ...",520,1,510,luG3RmUAxxM
4,"[[162, 17, 187, 111, 211, 105, 92, 67, 203, 15...",10,0,0,PIm3cjxTpOk


In [4]:
#Examine the audio embedding data
dist_encodings = {}
dist_samples = {}
for enc_array in df['audio_embedding']:
    if len(enc_array) in dist_samples.keys():
        dist_samples[len(enc_array)]+=1
    else:
        dist_samples[len(enc_array)]=1
    for enc in enc_array:
        for v in enc:
            if v in dist_encodings.keys():
                dist_encodings[v]+=1
            else:
                dist_encodings[v]=1

print("Encodings range: {} to {}".format(min(dist_encodings.keys()), max(dist_encodings.keys())))
print("Encodings: {}".format(dist_encodings))

print("Encoding samples range: {} to {}".format(min(dist_samples.keys()), max(dist_samples.keys())))
print("Encoding samples: {}".format(dist_samples))

Encodings range: 0 to 255
Encodings: {0: 126642, 1: 2133, 2: 2257, 3: 2306, 4: 2306, 5: 2386, 6: 2415, 7: 2393, 8: 2656, 9: 2591, 10: 2622, 11: 2724, 12: 2712, 13: 2789, 14: 2832, 15: 2931, 16: 2923, 17: 3044, 18: 3047, 19: 3116, 20: 3097, 21: 3161, 22: 3119, 23: 3209, 24: 3163, 25: 3364, 26: 3246, 27: 3213, 28: 3387, 29: 3271, 30: 3310, 31: 3354, 32: 3395, 33: 3523, 34: 3581, 35: 3632, 36: 3596, 37: 3636, 38: 3798, 39: 3723, 40: 3849, 41: 3831, 42: 3835, 43: 3962, 44: 3998, 45: 3921, 46: 4154, 47: 4151, 48: 4349, 49: 4220, 50: 4428, 51: 4447, 52: 4407, 53: 4487, 54: 4573, 55: 4749, 56: 4569, 57: 4627, 58: 4776, 59: 4874, 60: 4826, 61: 4997, 62: 4855, 63: 5074, 64: 5036, 65: 5095, 66: 5111, 67: 5358, 68: 5267, 69: 5526, 70: 5433, 71: 5492, 72: 5438, 73: 5436, 74: 5538, 75: 5501, 76: 5620, 77: 5782, 78: 5860, 79: 5738, 80: 5932, 81: 5783, 82: 6007, 83: 6180, 84: 6080, 85: 6126, 86: 6145, 87: 6085, 88: 6010, 89: 6150, 90: 6272, 91: 6253, 92: 6402, 93: 6449, 94: 6216, 95: 6320, 96: 6463, 

In [5]:
#Split the data into training and validation
x_train_data, x_val_data = train_test_split(df,test_size=0.1,train_size=None,random_state=34,shuffle=True)

def normalise_and_pad(sequence, max_val=255.0, max_seq_len=10):
    ret = np.pad(np.array(sequence) / max_val, ((0, max_seq_len-len(sequence)),(0,0)), 'wrap')
    return ret


def create_binary_classifier(binary_array):
    yvals = np.zeros(shape=(len(binary_array), 2), dtype='float32')
    for idx, val in enumerate(binary_array):
        if val == 1:
            yvals[idx][1] = 1
        else:
            yvals[idx][0] = 1
    return yvals
    
    

xtrain = np.asarray([normalise_and_pad(x) for x in x_train_data['audio_embedding']], dtype='float32')
ytrain = create_binary_classifier(x_train_data['is_turkey'].values)


xval = np.asarray([normalise_and_pad(x) for x in x_val_data['audio_embedding']], dtype='float32')
yval = create_binary_classifier(x_val_data['is_turkey'].values)

#Examine types and compare outputs
print("xtrain: {}; ytrain:{}, xval: {}; yval: {}".format(xtrain.shape, ytrain.shape, xval.shape, yval.shape))

print(x_train_data['is_turkey'].values[:10])
print(ytrain[:10])

xtrain: (1075, 10, 128); ytrain:(1075, 2), xval: (120, 10, 128); yval: (120, 2)
[0 1 0 0 1 0 1 1 0 0]
[[1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]]


In [6]:
def get_batches(x_train, y_train, batch_size):
    current_index=0
    while current_index+batch_size < len(x_train):
        batch_x = x_train[current_index:current_index+batch_size]
        batch_y = y_train[current_index:current_index+batch_size]
        yield (batch_x, batch_y)
        current_index += batch_size

In [7]:
import tensorflow as tf
import time

#Set logging and reset the graph
tf.logging.set_verbosity(tf.logging.ERROR)
tf.reset_default_graph()

save_file = '../model/model.ckpt'

# Parameters
learning_rate = 0.000005
training_epochs = 800
batch_size = 128  # Decrease batch size if you don't have enough memory
display_step = 5
keep_prob_val = 0.5

n_input = 10*128  #10*128 audio embeddings
n_classes = 2  # is not vs is turkey 

logdir = '../logs'

#Size of the network:
n_hidden_layer_1 = 512 # layer number of features
n_hidden_layer_2 = 256 # layer number of features

In [8]:
with tf.name_scope("variables_scope"):
    
    with tf.name_scope("input_variables"):
        # tf Graph input
        x = tf.placeholder("float32", [None, 10, 128], name="input_x")
        y = tf.placeholder("float32", [None, n_classes], name="targets")
        keep_prob = tf.placeholder(tf.float32) # probability to keep units

        x_flat = tf.reshape(x, [-1, n_input], name="input_x_flat")
    
    
    with tf.name_scope("weights_scope"):
        # Store layers weight & bias
        weights = {
            'hidden_layer_1': tf.Variable(tf.random_normal([n_input, n_hidden_layer_1]), name="w_hidden_1"),
            'hidden_layer_2': tf.Variable(tf.random_normal([n_hidden_layer_1, n_hidden_layer_2]), name="w_hidden_2"),
            #'hidden_layer_3': tf.Variable(tf.random_normal([n_hidden_layer_2, n_hidden_layer_3]), name="w_hidden_3"),
            #'hidden_layer_4': tf.Variable(tf.random_normal([n_hidden_layer_3, n_hidden_layer_4]), name="w_hidden_4"),
            'out': tf.Variable(tf.random_normal([n_hidden_layer_2, n_classes]), name="w_out")
        }
        tf.summary.histogram("weight_histogram_hidden_1", weights['hidden_layer_1'])
        tf.summary.histogram("weight_histogram_hidden_2", weights['hidden_layer_2'])
        #tf.summary.histogram("weight_histogram_hidden_3", weights['hidden_layer_3'])
        #tf.summary.histogram("weight_histogram_hidden_4", weights['hidden_layer_4'])
        tf.summary.histogram("weight_histogram_out", weights['out'])
        
        biases = {
            'hidden_layer_1': tf.Variable(tf.random_normal([n_hidden_layer_1])),
            'hidden_layer_2': tf.Variable(tf.random_normal([n_hidden_layer_2])),
            #'hidden_layer_3': tf.Variable(tf.random_normal([n_hidden_layer_3])),
            #'hidden_layer_4': tf.Variable(tf.random_normal([n_hidden_layer_4])),
            'out': tf.Variable(tf.random_normal([n_classes]))
        }
    
    
    with tf.name_scope("network_scope"):
        # Hidden layer with RELU activation
        layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer_1']),biases['hidden_layer_1'])
        layer_1 = tf.nn.relu(layer_1)
        layer_1 = tf.nn.dropout(layer_1, keep_prob)

        layer_2 = tf.add(tf.matmul(layer_1, weights['hidden_layer_2']),biases['hidden_layer_2'])
        layer_2 = tf.nn.relu(layer_2)
        layer_2 = tf.nn.dropout(layer_2, keep_prob)

#         layer_3 = tf.add(tf.matmul(layer_2, weights['hidden_layer_3']),biases['hidden_layer_3'])
#         layer_3 = tf.nn.relu(layer_3)
#         layer_3 = tf.nn.dropout(layer_3, keep_prob)
        
#         layer_4 = tf.add(tf.matmul(layer_3, weights['hidden_layer_4']),biases['hidden_layer_4'])
#         layer_4 = tf.nn.relu(layer_4)
#         layer_4 = tf.nn.dropout(layer_4, keep_prob)

        # Output layer with linear activation
        logits = tf.add(tf.matmul(layer_2, weights['out']), biases['out'])
        tf.summary.histogram("logits_histogram", logits)

In [9]:
with tf.name_scope("training_scope"):
    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y), name='cost')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost, name='gradDescent')
    tf.summary.histogram("loss_histogram", cost)
    tf.summary.scalar("loss_scalar", cost)


# Calculate accuracy
with tf.name_scope("accuracy_scope"):
    argmax_logits = tf.argmax(logits, 1)
    argmax_y = tf.argmax(y, 1)
    correct_prediction = tf.equal(argmax_logits, argmax_y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.histogram("accurace_scalar", accuracy)
    tf.summary.scalar("accurace_scalar", accuracy)

In [10]:
# Initializing the variables
init = tf.global_variables_initializer()

# TensorBoard - Write the default graph out so we can view it's structure
merged_summary_op = tf.summary.merge_all()
tbWriter = tf.summary.FileWriter(logdir)

saver = tf.train.Saver()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    tbWriter.add_graph(sess.graph)
    # Training cycle
    step = 0
    for epoch in range(training_epochs):
        # Loop over all batches
        for batch_x, batch_y in get_batches(xtrain, ytrain, batch_size):
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: keep_prob_val})
            
            if step % 5 == 0:
                summary = sess.run(merged_summary_op, feed_dict={x: batch_x, y: batch_y, keep_prob: keep_prob_val})
                tbWriter.add_summary(summary, step)
            
            step+=1
        
        # Print status for every 10 epochs        
        if epoch % display_step == 0:
            valid_accuracy = sess.run(
                accuracy,
                feed_dict={
                    x: xval,
                    y: yval,
                    keep_prob: 1.0})
            print('Epoch {:<3} - Validation Accuracy: {}'.format(
                epoch,
                valid_accuracy))
            
    saver.save(sess, save_file)

Epoch 0   - Validation Accuracy: 0.5916666388511658
Epoch 5   - Validation Accuracy: 0.5916666388511658
Epoch 10  - Validation Accuracy: 0.6083333492279053
Epoch 15  - Validation Accuracy: 0.6083333492279053
Epoch 20  - Validation Accuracy: 0.6000000238418579
Epoch 25  - Validation Accuracy: 0.6166666746139526
Epoch 30  - Validation Accuracy: 0.6416666507720947
Epoch 35  - Validation Accuracy: 0.6583333611488342
Epoch 40  - Validation Accuracy: 0.6583333611488342
Epoch 45  - Validation Accuracy: 0.6666666865348816
Epoch 50  - Validation Accuracy: 0.699999988079071
Epoch 55  - Validation Accuracy: 0.7250000238418579
Epoch 60  - Validation Accuracy: 0.7250000238418579
Epoch 65  - Validation Accuracy: 0.7333333492279053
Epoch 70  - Validation Accuracy: 0.7333333492279053
Epoch 75  - Validation Accuracy: 0.7416666746139526
Epoch 80  - Validation Accuracy: 0.7416666746139526
Epoch 85  - Validation Accuracy: 0.7416666746139526
Epoch 90  - Validation Accuracy: 0.7416666746139526
Epoch 95  - V

In [30]:
#Load the test data in
df_test = pd.read_json('../dataset/test.json')
print(len(df_test['vid_id']))
df_test.head()

1196


Unnamed: 0,audio_embedding,end_time_seconds_youtube_clip,start_time_seconds_youtube_clip,vid_id
0,"[[177, 20, 226, 132, 198, 81, 111, 59, 132, 18...",10,0,pyKh38FXD3E
1,"[[169, 21, 204, 161, 195, 72, 60, 39, 152, 184...",40,30,THhP1idrWXA
2,"[[165, 13, 198, 141, 199, 81, 173, 54, 119, 11...",40,30,jsw3T6GY2Nw
3,"[[167, 18, 188, 159, 198, 63, 156, 36, 179, 22...",24,14,nFkXTMHcjMU
4,"[[178, 32, 181, 100, 198, 46, 82, 83, 136, 227...",40,30,Au8g9kAlrLQ


In [12]:
#Process the data to be ready to feed the model
xsubmission = np.asarray([normalise_and_pad(x) for x in df_test['audio_embedding']], dtype='float32')

#Examine types and compare outputs
print("xsubmission: {}".format(xsubmission.shape))


xsubmission: (1196, 10, 128)


In [33]:
with tf.Session() as sess:
    saver.restore(sess, save_file)
    argmax_output = sess.run(
                argmax_logits,
                feed_dict={
                    x: xsubmission,
                    y: yval,
                    keep_prob: 1.0})


submit_df = pd.DataFrame(columns=['vid_id', 'is_turkey'])
submit_df['vid_id'] = df_test['vid_id']
submit_df['is_turkey'] = list(argmax_output)
print("Dataframe size: {}".format(len(submit_df['vid_id'])))
submit_df.head()

Dataframe size: 1196


Unnamed: 0,vid_id,is_turkey
0,pyKh38FXD3E,0
1,THhP1idrWXA,0
2,jsw3T6GY2Nw,0
3,nFkXTMHcjMU,0
4,Au8g9kAlrLQ,1


In [36]:
submit_df.to_csv('../results/submission_turkey.csv',index=None,columns=['vid_id','is_turkey'])