# Sparkling Emoticana

In [None]:
import pyspark.sql
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import scipy.io
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display

%matplotlib inline

## 1. CNN

In [None]:
class CNN() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
        
    def convolution(self, X_input, filters, kernel_size, strides, name, padding="SAME") :
        with tf.variable_scope(name) :
            bn = tf.layers.batch_normalization(X_input)
            conv = tf.layers.conv2d(bn, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, kernel_initializer=tf.contrib.layers.xavier_initializer())
            relu = tf.nn.leaky_relu(conv)
            
            return relu
            
    def build(self) :
        with tf.variable_scope(self.name) :
            ### Input
            #input : 128x126x1
            #output : 8
            self.X = tf.placeholder(tf.float32, [None, 128, 126, 1])
            self.Y = tf.placeholder(tf.float32, [None, 8])
            self.training = tf.placeholder(tf.bool)
            self.learning_rate = tf.placeholder(tf.float32)
            print(self.X.shape)
            
        ### Input Layer
        #input : 128x126x1
        #output : 32x31x8
        conv1 = self.convolution(self.X, 8, [3,3], 2, "conv1")
        pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2], strides=2, name="pool1")
        print(conv1.shape)
        print(pool1.shape)

        ### Hidden Layer1
        #input : 32x31x8
        #output : 32x31x16
        conv2 = self.convolution(conv1, 16, [3,3], 1, "conv2")
        print(conv2.shape)
            
        ### Hidden Layer2
        #input : 32x31x16
        #output : 32x31x32
        conv3 = self.convolution(conv2, 32, [3,3], 1, "conv3")
        print(conv3.shape)
            
        ### Pooling Layer2
        #input : 32x31x32
        #output : 16x15x32
        pool2 = tf.layers.max_pooling2d(conv3, pool_size=[2,2], strides=2, name="pool2")
        print(pool2.shape)
            
        ### Hidden Layer3
        #input : 16x15x32
        #output : 16x15x64
        conv4 = self.convolution(pool2, 64, [3,3], 1, "conv4")
        print(conv4.shape)
        
        ### Hidden Layer4
        #input : 16x15x64
        #output : 16x15x128
        conv5 = self.convolution(conv4, 128, [3,3], 1, "conv5")
        print(conv5.shape)
        
        ### Pooling Layer3
        #input : 16x15x128
        #output : 8x7x128
        pool3 = tf.layers.max_pooling2d(conv5, pool_size=[2,2], strides=2, name="pool3")
        print(pool3.shape)
        
        ### Hidden Layer5
        #input : 8x7x128
        #output : 8x7x32
        conv6 = self.convolution(pool3, 32, [1,1], 1, "conv6")
        print(conv6.shape)
        
        with tf.variable_scope("global_avg_pooling") :
            ### global avg pooling
            #input : 8x7x32
            #output : 1x1x32
            global_avg_pooling = tf.reduce_mean(conv6, [1, 2], keep_dims=True)
            print(global_avg_pooling.shape)
        
        with tf.variable_scope("fully_connected") :
            ###Output Layer
            #input : 1x1x32
            #ouput : 8
            shape = global_avg_pooling.get_shape().as_list()
            dimension = shape[1] * shape[2] * shape[3]
            flat = tf.reshape(global_avg_pooling, shape=[-1, dimension])

            fc = tf.layers.dense(inputs=flat, units=8, kernel_initializer=tf.contrib.layers.xavier_initializer())
            self.logits = fc

        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

        correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, x_test, training=False):
        feed_dict={self.X: x_test, self.training: training}
        
        return self.sess.run(self.logits, feed_dict=feed_dict)

    def get_accuracy(self, x_test, y_test, training=False):
        feed_dict={self.X: x_test,self.Y: y_test, self.training: training}
        
        return self.sess.run(self.accuracy, feed_dict=feed_dict)

    def train(self, x_data, y_data, learning_rate, training=True):
        feed_dict={self.X: x_data, self.Y: y_data, self.learning_rate: learning_rate, self.training: training}
        
        return self.sess.run([self.cost, self.optimizer], feed_dict=feed_dict)
    
    def evaluate(self, X_input, Y_input, batch_size=None, training=False):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: training}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc
    
    def save(self, ver) :
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, "CNN_" + str(ver) + ".ckpt")
        
        print("Model saved in path: %s" % save_path)
                 

 <br></br>  <br></br>  <br></br>  <br></br>

## 2. dataset

In [None]:
def load_mat_data(path) :
    file_lst = os.listdir(path)
    random.shuffle(file_lst)
    
    train = []
    valid = []
    test = []
    
    for file in file_lst :
        try : 
            emotion = int(file.split("-")[2])
            actor = int(file.split("-")[6].split(".")[0])
            spectrogram = scipy.io.loadmat(path+file)["S"]
        
            if actor in [1,2] :
                valid.append((spectrogram, emotion))
            elif actor in [3,4] :
                test.append((spectrogram, emotion))
            else :
                train.append((spectrogram, emotion))
                
        except :
            pass
    
    return file_lst, train, valid, test

In [None]:
def load_wav_data(path) :
    file_lst = os.listdir(path)
    random.shuffle(file_lst)
    
    train = []
    valid = []
    test = []
    all_data = []
    
    for file in file_lst :
        try : 
            y, sr = librosa.load(path+file)
            emotion = int(file.split("-")[2])
            actor = int(file.split("-")[6].split(".")[0])
        
            melspectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
        
            if actor in [1,2] :
                valid.append((melspectrogram, emotion))
            elif actor in [3,4] :
                test.append((melspectrogram, emotion))
            else :
                train.append((melspectrogram, emotion))
            
            all_data.append((melspectrogram, emotion))
        except :
            pass
    
    return file_lst, train, valid, test, all_data

In [None]:
def zero_padding(train, valid, test, num=528) :
    result = []
    
    for dataset in [train, valid, test] :
        zero = np.zeros([len(dataset), 1025, num])
        emotion_lst = []

        idx = 0
        for spectrogram, emotion in dataset:
            zero[idx, :, 0:len(spectrogram[0])] = spectrogram
            emotion_lst.append(emotion-1)
            idx += 1
            
        result.append((zero, emotion_lst))
        
    return result


In [None]:
def cutting(train, valid, test, all_data, size=1025, num=276) :
    result = []
    half = int(num/2)
    
    for dataset in [train, valid, test, all_data] :
        zero = np.zeros([len(dataset), size, num])
        emotion_lst = []

        idx = 0
        for spectrogram, emotion in dataset:
            mid = int(spectrogram.shape[1]/2)
            zero[idx, :, 0:len(spectrogram[0])] = spectrogram[:, mid-half:mid+half]
            emotion_lst.append(emotion-1)
            idx += 1
            
        result.append((zero, emotion_lst))
        
    return result

In [None]:
def onehot_encoding(data, num=8) :
    return np.eye(num)[data]

In [None]:
# spectrogram from mat
"""
file_lst, train, valid, test = load_mat_data("data/mat/")
train, valid, test = cutting(train, valid, test)

train_data = train[0].reshape([-1, 1025, 276, 1])
train_label = onehot_encoding(train[1])

valid_data = valid[0].reshape([-1, 1025, 276, 1])
valid_label = onehot_encoding(valid[1])

test_data = test[0].reshape([-1, 1025, 276, 1])
test_label = onehot_encoding(test[1])

train = []
valid = []
test = []

print(len(train_data), train_data.shape, train_label.shape)
print(len(valid_data))
print(len(test_data))

print(file_lst[:5])
print(train_data[0])
plt.imshow(train_data[300].reshape(1025,276))
"""

In [None]:
# melspectrogram from wav
file_lst, train, valid, test, all_data = load_wav_data("data/wav/")
cut_train, cut_valid, cut_test, cut_all = cutting(train, valid, test, all_data, size =128 , num=126)

train_data = cut_train[0].reshape([-1, 128, 126, 1])
train_label = onehot_encoding(cut_train[1])

valid_data = cut_valid[0].reshape([-1, 128, 126, 1])
valid_label = onehot_encoding(cut_valid[1])

test_data = cut_test[0].reshape([-1, 128, 126, 1])
test_label = onehot_encoding(cut_test[1])

all_data = cut_all[0].reshape([-1, 128, 126, 1])
all_label = onehot_encoding(cut_all[1])

train = []
valid = []
test = []
cut_train = []
cut_valid = []
cut_test = []
cut_all = []

In [None]:
print(len(train_data), train_data.shape, train_label.shape)
print(len(valid_data))
print(len(test_data))
print(train_data[0][:5, :5])

plt.figure(figsize=(10, 4))
librosa.display.specshow(librosa.power_to_db(train_data[0].reshape(128,126), ref=np.max),y_axis='mel', fmax=8000,x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()

 <br></br>  <br></br>  <br></br>  <br></br>

## 3. train and test

In [None]:
learning_rate = 0.02
training_epochs = 100
batch_size = 40

train_losses = []
train_accs = []

valid_losses = []
valid_accs = []

In [None]:
sess = tf.Session()

model = CNN(sess, "CNN")
model.build()

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
print('Learning Started!')
print("")

# train model
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(len(train_data) / batch_size)
    idx = 0
    
    learning_rate = 0.001

    for i in range(total_batch):
        batch_xs, batch_ys = train_data[idx:idx+batch_size], train_label[idx:idx+batch_size]
        c, _ = model.train(batch_xs, batch_ys, learning_rate=learning_rate)
        avg_cost += c / total_batch
        idx += batch_size
        
        if i%10 == 0 :
            print("log : ", avg_cost)
            
    #train cost & acc
    cost, acc = model.evaluate(train_data, train_label, batch_size = batch_size)
    
    train_losses.append(cost)
    train_accs.append(acc)
    
    #valid cost & acc
    v_cost, v_acc = model.evaluate(valid_data, valid_label, batch_size = batch_size)
    
    valid_losses.append(v_cost)
    valid_accs.append(v_acc)
    
    print("epoch : ", epoch, " -- train {:.5f}({:.1f}%), valid {:.5f}({:.1f}%)".format(cost, acc*100, v_cost, v_acc*100))
    print(" ")

print("")
print('Learning Finished!')

In [None]:
print('Accuracy:', model.get_accuracy(test_data, test_label))

In [None]:
ver = 2

saver = tf.train.Saver()
save_path = saver.save(sess, "./CNN/CNN_model/ver_" + str(ver) +"/CNN_" + str(ver) + ".ckpt")
print("model saved")

In [None]:
tf.reset_default_graph() 

 <br></br>  <br></br>  <br></br>  <br></br>