# Sparkling Emoticana

In [None]:
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import scipy.io
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
import tensorflowonspark as TFOS

%matplotlib inline

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import nested_scopes
from __future__ import print_function

from pyspark.conf import SparkConf
from pyspark.context import SparkContext
from pyspark.sql import SparkSession

import argparse
import os
import subprocess
import sys
import logging
from datetime import datetime
import math
import numpy
import time

import tensorflow as tf
from tensorflowonspark import dfutil
from tensorflowonspark import TFCluster
from tensorflowonspark.pipeline import TFEstimator, TFModel

In [None]:
sc

<br></br> <br></br> <br></br> <br></br>

## 1. function

In [None]:
def onehot_encoding(data, num=8) :
    return np.eye(num)[data]

In [None]:
def cutting(train, valid, test, size=1025, num=276) :
    result = []
    half = int(num/2)
    
    for dataset in [train, valid, test] :
        if not dataset :
            continue
            
        zero = np.zeros([len(dataset), size, num])
        emotion_lst = []

        idx = 0
        for spectrogram, emotion in dataset:
            mid = int(spectrogram.shape[1]/2)
            zero[idx, :, 0:len(spectrogram[0])] = spectrogram[:, mid-half:mid+half]
            emotion_lst.append(emotion-1)
            idx += 1
            
        result.append((zero, emotion_lst))
        
    return result

In [None]:
def load_wav_data(path) :
    file_lst = os.listdir(path)
    random.shuffle(file_lst)
    
    train = []
    valid = []
    test = []
    
    for file in file_lst :
        try : 
            y, sr = librosa.load(path+file)
            emotion = int(file.split("-")[2])
            actor = int(file.split("-")[6].split(".")[0])
        
            melspectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
        
            if actor in [1,2] :
                valid.append((melspectrogram, emotion))
            elif actor in [3,4] :
                test.append((melspectrogram, emotion))
            else :
                train.append((melspectrogram, emotion))
                
        except :
            pass
    
    return file_lst, train, valid, test

In [None]:
def load_wav_test_data(path) :
    file_lst = os.listdir(path)
    random.shuffle(file_lst)
    
    test = []
    
    for file in file_lst :
        try : 
            y, sr = librosa.load(path+file)
            emotion = int(file.split("-")[2])
            actor = int(file.split("-")[6].split(".")[0])
        
            melspectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
            test.append((melspectrogram, emotion))

                
        except :
            pass
    
    return test

In [None]:
def get_batch_data(df):
    # Convert from dict of named arrays to two numpy arrays of the proper type
    train_data = np.array(list(df.select('image').toPandas()['image'])).reshape([-1, 128, 126, 1])
    train_label = np.array(list(df.select('label').toPandas()['label'])).reshape([-1, 8])
        
    return (train_data, train_label)

In [None]:
def print_log(worker_num, arg):
    print("{0}: {1}".format(worker_num, arg))

In [None]:
def np_to_df(data, label) :
    data_rdd = sc.parallelize(data.reshape([-1, 128, 126]).tolist())
    label_rdd = sc.parallelize(label.reshape([-1, 8]).tolist())
    
    pair = data_rdd.zip(label_rdd)
    df = spark.createDataFrame(pair, ['image', 'label'])
    
    return df

<br></br> <br></br> <br></br> <br></br>

## 1. Model

In [None]:
class CNN() :
    def __init__(self, name):
        self.name = name
        
    def convolution(self, X_input, filters, kernel_size, strides, name, padding="SAME") :
        with tf.variable_scope(name) :
            bn = tf.layers.batch_normalization(X_input)
            conv = tf.layers.conv2d(bn, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, kernel_initializer=tf.contrib.layers.xavier_initializer())
            relu = tf.nn.leaky_relu(conv)
            
            return relu
            
    def build(self) :
        with tf.variable_scope(self.name) :
            ### Input
            #input : 128x126x1
            #output : 8
            self.X = tf.placeholder(tf.float32, [None, 128, 126, 1])
            self.Y = tf.placeholder(tf.float32, [None, 8])
            self.training = tf.placeholder(tf.bool)
            self.learning_rate = tf.placeholder(tf.float32)
            print(self.X.shape)
            
        ### Input Layer
        #input : 128x126x1
        #output : 32x31x8
        conv1 = self.convolution(self.X, 8, [3,3], 2, "conv1")
        pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2], strides=2, name="pool1")
        print(conv1.shape)
        print(pool1.shape)

        ### Hidden Layer1
        #input : 32x31x8
        #output : 32x31x16
        conv2 = self.convolution(conv1, 16, [3,3], 1, "conv2")
        print(conv2.shape)
            
        ### Hidden Layer2
        #input : 32x31x16
        #output : 32x31x32
        conv3 = self.convolution(conv2, 32, [3,3], 1, "conv3")
        print(conv3.shape)
            
        ### Pooling Layer2
        #input : 32x31x32
        #output : 16x15x32
        pool2 = tf.layers.max_pooling2d(conv3, pool_size=[2,2], strides=2, name="pool2")
        print(pool2.shape)
            
        ### Hidden Layer3
        #input : 16x15x32
        #output : 16x15x64
        conv4 = self.convolution(pool2, 64, [3,3], 1, "conv4")
        print(conv4.shape)
        
        ### Hidden Layer4
        #input : 16x15x64
        #output : 16x15x128
        conv5 = self.convolution(conv4, 128, [3,3], 1, "conv5")
        print(conv5.shape)
        
        ### Pooling Layer3
        #input : 16x15x128
        #output : 8x7x128
        pool3 = tf.layers.max_pooling2d(conv5, pool_size=[2,2], strides=2, name="pool3")
        print(pool3.shape)
        
        ### Hidden Layer5
        #input : 8x7x128
        #output : 8x7x32
        conv6 = self.convolution(pool3, 32, [1,1], 1, "conv6")
        print(conv6.shape)
        
        with tf.variable_scope("global_avg_pooling") :
            ### global avg pooling
            #input : 8x7x32
            #output : 1x1x32
            global_avg_pooling = tf.reduce_mean(conv6, [1, 2], keep_dims=True)
            print(global_avg_pooling.shape)
        
        with tf.variable_scope("fully_connected") :
            ###Output Layer
            #input : 1x1x32
            #ouput : 8
            shape = global_avg_pooling.get_shape().as_list()
            dimension = shape[1] * shape[2] * shape[3]
            self.flat = tf.reshape(global_avg_pooling, shape=[-1, dimension])

            fc = tf.layers.dense(inputs=self.flat, units=8, kernel_initializer=tf.contrib.layers.xavier_initializer())
            self.logits = fc

        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
        
        self.label = tf.argmax(self.logits, 1)
        correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
    def set_sess(self, sess) :
        self.sess = sess

    def predict(self, x_test, training=False):
        feed_dict={self.X: x_test, self.training: training}
        
        return self.sess.run(self.logits, feed_dict=feed_dict)

    def get_accuracy(self, x_test, y_test, training=False):
        feed_dict={self.X: x_test,self.Y: y_test, self.training: training}
        
        return self.sess.run(self.accuracy, feed_dict=feed_dict)

    def train(self, x_data, y_data, learning_rate, training=True):
        feed_dict={self.X: x_data, self.Y: y_data, self.learning_rate: learning_rate, self.training: training}
        
        return self.sess.run([self.cost, self.optimizer], feed_dict=feed_dict)
    
    def evaluate(self, X_input, Y_input, batch_size=None, training=False):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: training}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc
    
    def save(self, ver) :
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, "CNN_" + str(ver) + ".ckpt")
        
        print("Model saved in path: %s" % save_path)
                 

<br></br> <br></br> <br></br> <br></br>

## 2. Setting

In [None]:
num_executors = 3

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", help="number of epochs", type=int, default=50)
parser.add_argument("--data", help="HDFS path to MNIST data in parallelized format")
parser.add_argument("--format", help="format", default="wav")
parser.add_argument("--model_dir", help="HDFS path to save/load model during train/test")
parser.add_argument("--readers", help="number of reader/enqueue threads", type=int, default=1)
parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000)
parser.add_argument("--batch_size", help="number of examples per batch", type=int, default=40)
parser.add_argument("--mode", help="train|inference", default="train")
parser.add_argument("--rdma", help="use rdma connection", default=False)

<br></br> <br></br> <br></br> <br></br>

## 3. TFoS

In [None]:
def CNN_function(args, ctx):
    
    # Delay PS nodes a bit, since workers seem to reserve GPUs more quickly/reliably (w/o conflict)
    if job_name == "ps":
        time.sleep((worker_num + 1) * 5)
        
    # Get TF cluster and server instances
    cluster, server = TFNode.start_cluster_server(0, args.rdma)

    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index
    
    height = 128
    width = 126
    batch_size = args.batch_size
    
    if job_name == "ps":
        server.join()
    elif job_name == "worker":

        # Assigns ops to the local worker by default.
        with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % task_index, cluster=cluster)):
            model = CNN("CNN")
            global_step = tf.Variable(0)
            
            logit = model.logits
            loss = model.cost
            optimizer = model.optimizer
            accuracy = model.accuracy
            
            tf.summary.scalar("loss", loss)
            tf.summary.scalar("accuracy", accuracy)

            saver = tf.train.Saver()
            summary_op = tf.summary.merge_all()
            initializer = tf.global_variables_initializer()

        # Create a "supervisor", which oversees the training process and stores model state into HDFS
        logdir = TFNode.hdfs_path(ctx, args.model_dir)
        print("tensorflow model path: {0}".format(logdir))
        hooks = [tf.train.StopAtStepHook(last_step=100000)]
        
        summary_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph())

        with tf.train.MonitoredTrainingSession(master=server.target,
                                             is_chief=(task_index == 0),
                                             checkpoint_dir=logdir,
                                             hooks=hooks) as mon_sess:

            step = 0
            tf_feed = ctx.get_data_feed(args.mode == "train")
            while not mon_sess.should_stop() and not tf_feed.should_stop() and step < args.steps:
                batch_xs, batch_ys = get_batch_data(tf_feed.next_batch(batch_size))
                 
                feed_dict1 = {mode.X: batch_xs, model.Y: batch_ys, model.learning_rate: 0.008, self.training:True}
                feed_dict2 = {mode.X: batch_xs, model.Y: batch_ys, model.learning_rate: 0.008, self.training:False}
                
                if len(batch_xs) > 0:
                    if args.mode == "train" :
                        _, summary, step = mon_sess.run([optimizer, summary_op, global_step], feed_dict=feed_dict1)

                        if (step % 20 == 0):
                            print("{0} step: {1} accuracy: {2}".format(datetime.now().isoformat(), step, sess.run(accuracy, feed_dict = feed_dict2)))

                        if task_index == 0:
                            summary_writer.add_summary(summary, step)
                    
                    else :
                        labels, preds, acc = mon_sess.run([model.label, logit, accuracy], feed_dict=feed_dict2)
                        
                        results = ["{0} Label: {1}, Prediction: {2}".format(datetime.now().isoformat(), l, p) for l, p in zip(labels, preds)]
                        tf_feed.batch_results(results)
                        print("results: {0}, acc: {1}".format(results, acc))

            if mon_sess.should_stop() or step >= args.steps:
                tf_feed.terminate()

        # Ask for all the services to stop.
        print("{0} stopping MonitoredTrainingSession".format(datetime.now().isoformat()))

    if job_name == "worker" and task_index == 0:
        summary_writer.close()

In [None]:
file_lst, train, valid, test = load_wav_data(data_dir)
cut_train, cut_valid, cut_test = cutting(train, valid, test, size =128 , num=126)

train_data = cut_train[0].reshape([-1, 128, 126, 1])
train_label = onehot_encoding(cut_train[1])
    
test_data = cut_test[0].reshape([-1, 128, 126, 1])
test_label = onehot_encoding(cut_test[1])

In [None]:
args = parser.parse_args(['--mode', 'train', '--steps', '3000', '--epochs', '5',
                          '--data', data_dir])

In [None]:
cluster = TFCluster.run(sc, CNN_function, args, num_executors, 1, False, TFCluster.InputMode.SPARK)

In [None]:
trainDF = np_to_df(train_data.to, train_label)
cluster.train(trainDF, args.epochs)

In [None]:
cluster.shutdown()

In [None]:
args = parser.parse_args(['--mode', 'inference', 
                          '--data', data_dir])

In [None]:
cluster = TFCluster.run(sc, CNN_function, args, num_executors, 1, False, TFCluster.InputMode.SPARK)

In [None]:
testDF = np_to_rdd(test_data, test_label)

prediction_results = cluster.inference(testDF)
prediction_results.take(20)

In [None]:
cluster.shutdown()