In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.python import debug as tf_debug
import os
import random

In [3]:
#pict_path = "./data/subvocalization/train_data/combined/combined_spectrograms/"
pict_path  = "./data/subvocalization/train_data/combined_spectrograms/"
submit_pict_path = "./data/subvocalization/test_data/spectrograms_combined_test/"
PNG_CHANNELS = 3

In [4]:
labels = ["one", "two"]

Create train list by walking through folders and checking if folder name is one of my labels:

In [5]:
train_list = []

for x in os.listdir(pict_path):#audio_path):
    if os.path.isdir(pict_path + x):
        train_list.extend([x+"/"+y for y in os.listdir(pict_path + x) if '.png' in y])

print(train_list[:10])
len(train_list)

['two/11_1400864.wav3.png', 'two/11_2200252.wav3.png', 'two/11_1400878.wav3.png', 'two/11_1400621.wav3.png', 'two/11_1400043.wav3.png', 'two/11_1400133.wav3.png', 'two/11_2200848.wav3.png', 'two/11_1400242.wav3.png', 'two/11_1400534.wav3.png', 'two/11_1400402.wav3.png']


5118

Create submit list by walking through folders and checking if folder name is one of my labels:

In [6]:
submit_list = []
for x in os.listdir(submit_pict_path):
    if '.png' in x:
        submit_list.append(x)
        
print(submit_list[:10])
len(submit_list)

['0NZTS94B.wav.png', 'CM6WFMO5.wav.png', 'MPAVNNFR.wav.png', 'N07YPLGF.wav.png', 'UT7X0FXK.wav.png', 'HXR72FT6.wav.png', 'PQR8SQFQ.wav.png', '9A3USGGX.wav.png', 'DN60GNW1.wav.png', '2IUK7R3J.wav.png']


1002

In [7]:
submit_df = pd.DataFrame(submit_list, columns = ["filename"])
submit_df.head()

Unnamed: 0,filename
0,0NZTS94B.wav.png
1,CM6WFMO5.wav.png
2,MPAVNNFR.wav.png
3,N07YPLGF.wav.png
4,UT7X0FXK.wav.png


In [8]:
SPLIT= int(len(train_list)/100.0*80)
VALID_SPLT = int((len(train_list) - SPLIT)/2)
train_list = random.sample(train_list, len(train_list))

In [9]:
train_df = pd.DataFrame(train_list[:SPLIT], columns = ["Filepath"])
valid_df = pd.DataFrame(train_list[SPLIT:SPLIT+VALID_SPLT], columns = ["Filepath"])
test_df  = pd.DataFrame(train_list[SPLIT+VALID_SPLT:],  columns = ["Filepath"])

In [10]:
train_df.head(5)

Unnamed: 0,Filepath
0,two/11_2200483.wav3.png
1,one/11_2200657.wav3.png
2,two/11_1400419.wav3.png
3,one/11_2200830.wav3.png
4,two/11_2200351.wav3.png


Turn word labels into number labels:

In [11]:
label_dict = {}
for i, label in enumerate(labels):
    label_dict[label] = i
label_dict

{'one': 0, 'two': 1}

In [12]:
def make_label(x):
    label = x.split("/")[0]
    if label not in label_dict:
        return label_dict["unknown"]
    else:
        return label_dict[label]

train_df["Label"] = train_df["Filepath"].apply(make_label)
valid_df["Label"] = valid_df["Filepath"].apply(make_label)
test_df["Label"]  = test_df["Filepath"].apply(make_label)
train_df.head(5)

Unnamed: 0,Filepath,Label
0,two/11_2200483.wav3.png,1
1,one/11_2200657.wav3.png,0
2,two/11_1400419.wav3.png,1
3,one/11_2200830.wav3.png,0
4,two/11_2200351.wav3.png,1


In [13]:
test_df.head(5)

Unnamed: 0,Filepath,Label
0,two/11_2500924.wav3.png,1
1,two/11_2200035.wav3.png,1
2,one/11_1400554.wav3.png,0
3,one/11_1400828.wav3.png,0
4,two/11_2200369.wav3.png,1


In [14]:
#important for monitoring metrics in tensorboard
tf.logging.set_verbosity(tf.logging.INFO)

Parse ff reads png to tensor, converts to grayscale, slices to 4 pics and glues it as parallel channels

In [15]:
def _parse_function(filename, label):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_png(image_string, channels=PNG_CHANNELS)
    image = tf.image.convert_image_dtype(image_decoded, tf.float32)
    img = tf.image.rgb_to_grayscale(image,name="grayscale_op")
    ch1 = tf.image.crop_to_bounding_box(img, 2, 0, 64, 224)
    ch2 = tf.image.crop_to_bounding_box(img, 68, 0, 64, 224)
    ch3 = tf.image.crop_to_bounding_box(img, 134, 0, 64, 224)
    ch4 = tf.image.crop_to_bounding_box(img, 199, 0, 64, 224)
    combined_img = tf.stack((ch1,ch2,ch3,ch4), axis = 3, name="stack")
    combined_img = tf.reshape(combined_img, [-1,64,224,4])
    combined_img = tf.image.resize_images(combined_img, IMG_RESIZE, align_corners=True, preserve_aspect_ratio=False)
    return combined_img, label

Create input functions:

In [16]:
def create_train_input_fn(files,labels, batch_size, num_epochs=1):
    
    def _input_fn():
        filenames = tf.constant(list(files))
        _labels = tf.constant(list(labels))

        dataset = tf.data.Dataset.from_tensor_slices((filenames, _labels))

        dataset = dataset.apply(
                    tf.data.experimental.shuffle_and_repeat(2048,num_epochs))
        dataset = dataset.apply(
                    tf.data.experimental.map_and_batch(_parse_function,batch_size,num_parallel_calls=4))
        
        image_batch, label_batch = dataset.make_one_shot_iterator().get_next()
        
        return image_batch, label_batch
    return _input_fn

In [17]:
def create_predict_input_fn(files, labels, batch_size):
    
    def _input_fn():
        filenames = tf.constant(list(files))
        _labels = tf.constant(list(labels))

        dataset = tf.data.Dataset.from_tensor_slices((filenames, _labels))

        dataset = dataset.apply(
                    tf.data.experimental.map_and_batch(_parse_function,batch_size,num_parallel_calls=4))

        image_batch, label_batch = dataset.make_one_shot_iterator().get_next()
        
        return image_batch, label_batch
    return _input_fn

In [18]:
def create_submit_input_fn(files, batch_size):
    
    def _input_fn():
        filenames = tf.constant(list(files))

        dataset = tf.data.Dataset.from_tensor_slices(filenames)

        def _parse_function(filename):
            image_string = tf.read_file(filename)
            image_decoded = tf.image.decode_png(image_string, channels=PNG_CHANNELS)
            image = tf.image.convert_image_dtype(image_decoded, tf.float32)
            img = tf.image.rgb_to_grayscale(image,name="grayscale_op")
            ch1 = tf.image.crop_to_bounding_box(img, 2, 0, 64, 224)
            ch2 = tf.image.crop_to_bounding_box(img, 68, 0, 64, 224)
            ch3 = tf.image.crop_to_bounding_box(img, 134, 0, 64, 224)
            ch4 = tf.image.crop_to_bounding_box(img, 199, 0, 64, 224)
            combined_img = tf.stack((ch1,ch2,ch3,ch4), axis = 3, name="stack")
            combined_img = tf.reshape(combined_img, [-1,64,224,4])
            combined_img = tf.image.resize_images(combined_img, IMG_RESIZE, align_corners=True, preserve_aspect_ratio=False)
            return combined_img

        
        dataset = dataset.apply(
                    tf.data.experimental.map_and_batch(_parse_function,batch_size,num_parallel_calls=4))

        image_batch = dataset.make_one_shot_iterator().get_next()
        
        return image_batch
    return _input_fn

In [19]:
BATCH_SIZE = 128
NUM_EPOCHS = 33
IMG_SIZE = (263,224)
IMG_RESIZE = [64,128]
NUM_CLASSES = 2
DROPOUT_RATE = 0.25

In [20]:
train_input_fn = create_train_input_fn(train_df["Filepath"].apply(lambda x: pict_path + x),train_df["Label"],
                                       batch_size=BATCH_SIZE)
valid_input_fn = create_predict_input_fn(valid_df["Filepath"].apply(lambda x: pict_path + x),valid_df["Label"],
                                       batch_size=BATCH_SIZE)


test_input_fn = create_predict_input_fn(test_df["Filepath"].apply(lambda x: pict_path + x),test_df["Label"],
                                       batch_size=BATCH_SIZE)

submit_input_fn = create_submit_input_fn(submit_df["filename"].apply(lambda x: submit_pict_path + x),batch_size=BATCH_SIZE)

In [21]:
#hooks = [tf_debug.LocalCLIDebugHook()]

Create model:

In [22]:
def conv2d(input_tensor, depth, kernel, name, strides=(1, 1), padding="VALID"):
    return tf.layers.conv2d(input_tensor, filters=depth, kernel_size=kernel, strides=strides, padding=padding, activation=tf.nn.leaky_relu, name=name)

In [23]:
#tf.image.crop_to_bounding_box(image,offset_height,offset_width,target_height,target_width)

def conv_model(features, labels, mode):
    img = tf.reshape(features, [-1,64,128,4])
    net = conv2d(img,64,[3,3],"conv1",(1,1)) #62,126
    net = tf.layers.dropout(net, rate = DROPOUT_RATE, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.max_pooling2d(net, 2,2) #31,63
    net = conv2d(net,128,[3,3],"conv2",(2,2)) #15,31
    net = tf.layers.dropout(net, rate = DROPOUT_RATE, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.max_pooling2d(net, 2,1)# 14,30   
    net = conv2d(net,256,[2,2],"conv3",(2,2)) #7,15
    net = tf.layers.dropout(net, rate = DROPOUT_RATE, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.max_pooling2d(net, 2,1) #6,14
    net = conv2d(net,512,[2,2],"conv4",(2,2))#3,7
    net = tf.layers.dropout(net, rate = DROPOUT_RATE, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.flatten(net)
    net = tf.layers.dense(net, units = 256, activation=tf.nn.leaky_relu)
    net = tf.layers.dropout(net, rate = DROPOUT_RATE, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.dense(net, units = 128, activation=tf.nn.leaky_relu)
    logits = tf.layers.dense(net,units = 2)

    predictions = {
                    "classes": tf.argmax(input=logits, axis=1),
                    "probabilities": tf.nn.softmax(logits)
    }
    
    #define predict method logic
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode = mode, predictions=predictions)
    
    #sparse_softmax does one-hot automatically
    loss = tf.losses.sparse_softmax_cross_entropy(labels = labels, logits = logits)
    accuracy = tf.metrics.accuracy(labels = labels, predictions = predictions["classes"], name = "acc_op")
    
    
    #define train method logic
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        """
        decayed_lr = tf.train.cosine_decay_restarts(0.1, global_step=tf.train.get_global_step(),
                                                   first_decay_steps=50,t_mul=2.0,m_mul=0.1,alpha=0.0)
        
        
        decayed_lr = tf.train.cosine_decay_restarts(0.01, global_step=tf.train.get_global_step(),
                                                   first_decay_steps=100,t_mul=2.0,m_mul=1.0,alpha=0.0)
        """
        
        optimizer = tf.train.AdamOptimizer(0.0003)#GradientDescentOptimizer(decayed_lr)
        optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, 5.0)
        train_op  = optimizer.minimize(
            loss = loss,
            global_step=tf.train.get_global_step())
        tf.identity(accuracy[1], name='train_accuracy')
        tf.summary.scalar('train_accuracy', accuracy[1])
        eval_metric_ops = {"train_accuracy":accuracy}
        return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op, eval_metric_ops=eval_metric_ops)
    
    
    
    #define evaluate method logic
    tf.identity(accuracy[1], name='val_accuracy')
    tf.summary.scalar('val_accuracy', accuracy[1])
    eval_metric_ops = {"val_accuracy":accuracy}
    return tf.estimator.EstimatorSpec(mode = mode, loss = loss, eval_metric_ops=eval_metric_ops)

Train model:

In [24]:
!rm -rf tf_files
OUT_DIR = "./tf_files"

In [25]:
my_cnn_classifier = tf.estimator.Estimator(model_fn=conv_model, 
                                           config=tf.estimator.RunConfig(keep_checkpoint_max=1).replace(save_summary_steps=2),
                                           model_dir=OUT_DIR)
file_writer = tf.summary.FileWriter(OUT_DIR)

def train_and_eval(estimator, num_epochs = NUM_EPOCHS, steps=10):
    for n in range(num_epochs):
        estimator.train(input_fn = train_input_fn,steps = steps)
        estimator.evaluate(input_fn = valid_input_fn)
        
train_and_eval(my_cnn_classifier)

INFO:tensorflow:Using config: {'_model_dir': './tf_files', '_tf_random_seed': None, '_save_summary_steps': 2, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdc0a316f28>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_ini

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-30-08:58:32
INFO:tensorflow:Saving dict for global step 70: global_step = 70, loss = 0.43906188, val_accuracy = 0.80859375
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 70: ./tf_files/model.ckpt-70
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-70
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 70 into ./tf_files/model.ckpt.
INFO:tensorflow:loss = 0.40514505, step = 70
INFO:tensorflow:Saving checkpoints for 80 into ./tf_files/model.ckpt.
INFO:tensorflow:Loss for final step: 0.41483653.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 201

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-140
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 140 into ./tf_files/model.ckpt.
INFO:tensorflow:loss = 0.2969851, step = 140
INFO:tensorflow:Saving checkpoints for 150 into ./tf_files/model.ckpt.
INFO:tensorflow:Loss for final step: 0.4155324.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-11-30-08:59:24
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-150
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-30-08:59:24
INFO:tensorflow:Saving dict for global step 150: global_step = 150, loss = 0.3471

INFO:tensorflow:Saving checkpoints for 210 into ./tf_files/model.ckpt.
INFO:tensorflow:loss = 0.3097222, step = 210
INFO:tensorflow:Saving checkpoints for 220 into ./tf_files/model.ckpt.
INFO:tensorflow:Loss for final step: 0.25574112.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-11-30-09:00:10
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-220
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-30-09:00:11
INFO:tensorflow:Saving dict for global step 220: global_step = 220, loss = 0.32293233, val_accuracy = 0.8964844
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 220: ./tf_files/model.ckpt-220
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restori

INFO:tensorflow:Starting evaluation at 2018-11-30-09:00:56
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-290
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-30-09:00:57
INFO:tensorflow:Saving dict for global step 290: global_step = 290, loss = 0.29804146, val_accuracy = 0.8769531
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 290: ./tf_files/model.ckpt-290
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-290
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 290 into ./tf_files/model.ckpt.
INFO:tensorflow:loss = 0.24454151, step = 290
INFO:tensorflow:Saving checkpoints for 300 into ./tf_files/model

In [26]:
my_cnn_classifier.evaluate(input_fn = test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-11-30-09:01:24
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-330
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-30-09:01:24
INFO:tensorflow:Saving dict for global step 330: global_step = 330, loss = 0.31549484, val_accuracy = 0.8808594
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 330: ./tf_files/model.ckpt-330


{'loss': 0.31549484, 'val_accuracy': 0.8808594, 'global_step': 330}

Submission file generation:

In [27]:
generator = my_cnn_classifier.predict(input_fn=submit_input_fn)
predictions = [next(generator) for i in range(len(submit_list))]
classes = [predictions[i]["classes"] for i in range(len(predictions))]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./tf_files/model.ckpt-330
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [28]:
def make_submission_file(classes, filename):
    submission = pd.DataFrame()
    submission["filename"] = submit_list
    submission["filename"] = submission["filename"].apply(lambda x: x.split(".")[0])
    submission["label"] = classes
    submission["label"] = submission["label"].apply(lambda x: int(x)+1)
    submission.set_index("filename", inplace=True)
    submission.to_csv(filename)
    
make_submission_file(classes, "mtolstok_subvocalization.csv")