In [None]:
import os
import librosa
import librosa.display
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt


import tensorflow as tf

from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import LinearSVC

import pickle

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=8000)])

In [173]:
voice, sr = librosa.load('./audios/sad.mp3')
FRAME_SIZE = 2048
HOP_LENGTH = 512
after_stft = librosa.stft(voice, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)


In [174]:
after_stft.shape

(1025, 137)

In [168]:
after_stft_i = after_stft[:,600:800]
if len(after_stft_i[1]) == 0:
    print('yey')
after_stft_i.shape

yey


(1025, 0)

In [177]:
emotions_dict = {
    0: 'Neutral',
    1: 'Calm',
    2: 'Happy',
    3: 'Sad',
    4: 'Angry',
    5: 'Fearful',
    6: 'Disgusted',
    7: 'Surprised'
}

def create_spectrogram(audio_file, order: int, spec_type: str):
    voice, sr = librosa.load(audio_file)
    after_stft = librosa.stft(voice, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
    
    i = 0
    while True:
        after_stft_i = after_stft[:,i*200:(i+1)*200]
        if len(after_stft_i[1]) == 0:
            break
        abs_stft_result = np.abs(after_stft_i) ** 2
        log_stft_result = librosa.power_to_db(abs_stft_result)
        if order != 0:
            log_stft_delta = librosa.feature.delta(log_stft_result, order)
            plot_spectrogram(i, log_stft_delta, sr, HOP_LENGTH, y_axis=spec_type)
            return
        plot_spectrogram(i, log_stft_result, sr, HOP_LENGTH, y_axis=spec_type)
        
        i += 1

def plot_spectrogram(index, Y, sr, hop_length, y_axis="log"):
    plt.figure(figsize=(25, 10))
    plt.axis('off')
    librosa.display.specshow(Y,
                             sr=sr,
                             hop_length=hop_length,
                             y_axis=y_axis)

    plt.savefig(f"./spectrogram{index}.png", bbox_inches='tight', pad_inches=0)
    plt.close()
    
def predict_emotion(cnn_model_path, spec_type='log', test_path="none"):

    if not os.path.exists("./spectrogram.png"):
        print("Please first run Create Spectrogram section.")
        return
    if not os.path.exists(cnn_model_path):
        print("Model path does not exists.")
        return


    cnn = load_model(cnn_model_path)

    #spec = cv2.imread("./spectrogram.png")
    # spec = cv2.imread(test_path)

    # spec = cv2.resize(spec, (128, 128))

    if test_path != "none":
        spec = tf.keras.preprocessing.image.load_img(test_path, target_size=(128, 128))

    else:
        spec = tf.keras.preprocessing.image.load_img("./spectrogram.png", target_size=(128, 128))




    spec = np.reshape(spec, [1, 128, 128, 3])


    classes = cnn.predict(spec)
    
    return emotions_dict[list(classes[0]).index(max(list(classes[0])))]


def predict_emotion_ml(cnn_model_path, spec_type='log', seed=10, ml_algorithm='SVC_Polynomial_Kernel', test_path="none"):

    ml_model_path = "./models/ml_models0/seed" + str(seed) + "/"  + ml_algorithm + ".sav"
    if not os.path.exists(ml_model_path):
        print("ML-Model type is not valid.")
        return
    if not os.path.exists("./spectrogram.png"):
        print("Please first run Create Spectrogram section.")
        return
    if not os.path.exists(cnn_model_path):
        print("Model path does not exists.")
        return

    cnn = load_model(cnn_model_path)
    
    
    if test_path != "none":
        spec = tf.keras.preprocessing.image.load_img(test_path, target_size=(128, 128))

    else:
        spec = tf.keras.preprocessing.image.load_img("./spectrogram.png", target_size=(128, 128))


    spec = np.reshape(spec, [1, 128, 128, 3])
    
    outputLayer = cnn.layers[-5]
    intermediate_layer_model = Model(inputs=cnn.input,
                                    outputs=outputLayer.output)

    intermediate_output = intermediate_layer_model.predict(spec)


    ml_model = pickle.load(open(ml_model_path, 'rb'))

    # ---------------------------------------------- problematik

    
    # ml_output = ml_model.predict_proba(lst)
    # ml_list = list(ml_output)
    
    
    for i in range(9):
        result = ml_model.score(intermediate_output, [i])
        # print(str(int(result)), end="\t")
        if result == 1:
            return emotions_dict[int(i)]
    
        
    

    # emotions_dict[ml_list.index(max(ml_list))] 

In [154]:
# You can play with these.
spec_type = 'log'
seed=10
order = 0
cnn_model_path = "./models/cnn_models_epoch50/seed" + str(seed) + "/logSpecAugment.h5"

# Not recommended to change these.
FRAME_SIZE = 2048
HOP_LENGTH = 512

In [175]:
audio_file = './audios/happy.mp3'
ipd.Audio(audio_file)

In [178]:
create_spectrogram(audio_file, order=order, spec_type=spec_type) # ignore warning

In [136]:
# CNN
predict_emotion(cnn_model_path=cnn_model_path, spec_type=spec_type)



'Disgusted'

In [137]:
# CNN + ML
print(predict_emotion_ml(cnn_model_path=cnn_model_path, spec_type=spec_type, seed=seed, ml_algorithm='SVC_Polynomial_Kernel'))

Sad


In [157]:
import warnings
warnings.filterwarnings('ignore')

audio_file = './audios/sad.mp3'
# audio_file = './data/15_03_16.wav'

cnn_predicts = []
# svc_poly_predicts = []
linearSVC_predicts = []
linear_discriminant_predicts = []
logistic_regression_predicts = []

create_spectrogram(audio_file, order=order, spec_type=spec_type) # ignore warning

for s in [10, 50, 100]:
    cnn_model_path = "./models/cnn_models_epoch50/seed" + str(s) + "/logSpecAugment.h5"
    
    cnn_predicts.append(predict_emotion(cnn_model_path=cnn_model_path, spec_type=spec_type))
    # svc_poly_predicts.append(predict_emotion_ml(cnn_model_path=cnn_model_path, spec_type=spec_type, seed=seed, ml_algorithm='SVC_Polynomial_Kernel'))
    linearSVC_predicts.append(predict_emotion_ml(cnn_model_path=cnn_model_path, spec_type=spec_type, seed=seed, ml_algorithm='LinearSVC'))
    linear_discriminant_predicts.append(predict_emotion_ml(cnn_model_path=cnn_model_path, spec_type=spec_type, seed=seed, ml_algorithm='LinearDiscriminantAnalysis'))
    logistic_regression_predicts.append(predict_emotion_ml(cnn_model_path=cnn_model_path, spec_type=spec_type, seed=seed, ml_algorithm='LogisticRegression'))


from collections import Counter
all_predicts = cnn_predicts + linearSVC_predicts + linear_discriminant_predicts + logistic_regression_predicts
print('Classified with voting:', Counter(all_predicts).most_common(1)[0][0])

Classified with voting: Sad


In [158]:
print(cnn_predicts)
# print(svc_poly_predicts)
print(linearSVC_predicts)
print(linear_discriminant_predicts)
print(logistic_regression_predicts)

['Disgusted', 'Calm', 'Calm']
['Surprised', 'Sad', 'Happy']
['Neutral', 'Sad', 'Neutral']
['Surprised', 'Sad', 'Sad']
