In [1]:

from fast_bert.prediction import BertClassificationPredictor

# ekman_label.csv renamed to labels.csv

MODEL_PATH = './model/model_out'
LABEL_PATH = './model/'
text_predictor = BertClassificationPredictor(
				model_path=MODEL_PATH,
				label_path=LABEL_PATH, # location for labels.csv file
				multi_label=False,
				# model_type='xlnet',
				do_lower_case=False,
				device='cpu') # set cu


In [2]:
import librosa
import keras
import numpy as np

class LivePredictions:
    """
    Main class of the application.
    """

    def __init__(self):
        """
        Init method is used to initialize the main parameters.
        """
        self.path = './model/Emotion_Voice_Detection_Model.h5'
        self.loaded_model = keras.models.load_model(self.path)

    def make_predictions(self,file_path):
        """
        Method to process the files and create your features.
        """
        data, sampling_rate = librosa.load(file_path,res_type='kaiser_fast')
        x = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0).reshape(1,40,1)
        predictions = self.loaded_model.predict(x)
        return list(predictions)[0]
        # print( "Prediction is", " ", self.convert_class_to_emotion(predictions))

sound_predictor = LivePredictions()
'''
label_conversion = {'0': 'neutral',
                    '1': 'happy',
                    '2': 'sad',
                    '3': 'angry',
                    '4': 'fearful',
                    '5': 'disgust',
                    '6': 'surprised'}
'''

"\nlabel_conversion = {'0': 'neutral',\n                    '1': 'happy',\n                    '2': 'sad',\n                    '3': 'angry',\n                    '4': 'fearful',\n                    '5': 'disgust',\n                    '6': 'surprised'}\n"

In [3]:
SOUND_FILE_PATH = '../data/emotion-classification-from-audio-files-master/features'

In [4]:
final_infor_result = []
label = []
label_conversion = {'neutral':'neutral','joy': 'happy','sadness': 'sad','anger': 'angry','fear': 'fearful','disgust': 'disgust','surprise': 'surprised'}

In [5]:
from vosk import Model, KaldiRecognizer, SetLogLevel
import os
import wave
import json

class AudioToWords:
    def __init__(self, model_path):
        
        self.model_path = model_path

        SetLogLevel(0)

        if not os.path.exists(model_path):
            print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
            exit (1)
        self.model = Model(self.model_path)

        

    def get_words(self,audio_path):
        self.wf = wave.open(audio_path, "rb")
        if self.wf.getnchannels() != 1 or self.wf.getsampwidth() != 2 or self.wf.getcomptype() != "NONE":
            print ("Audio file must be WAV format mono PCM.")
            exit (1)

        rec = KaldiRecognizer(self.model, self.wf.getframerate())
        rec.SetMaxAlternatives(10)
        rec.SetWords(True)

        while True:
            data = self.wf.readframes(4000)
            if len(data) == 0:
                break
            rec.AcceptWaveform(data)
        

        return rec.FinalResult()

audio_path = 'test/happy.wav'
model_path = './a2w_new/model'

sample = AudioToWords(model_path)


In [12]:
def find_file_in_list(file_name, file_list):
    for file in file_list:
        if file_name == file['file_path']:
            return True
    return False

In [13]:
import os
def get_complete_info(path):
    for subdir, dirs, files in os.walk(path):
            for file in files:
                try:
                    file_path = os.path.join(subdir, file)
                    if file_path.endswith(".wav") and not find_file_in_list(file_path, final_infor_result):
                        file = int(file[7:8]) - 1
                        if(file <= 1):
                            label.append(0)
                        elif(file > 1):
                            label.append(file - 1)
                        print(file_path)
                        text = json.loads(sample.get_words(file_path))['alternatives'][0]['text']
                        result = sound_predictor.make_predictions(file_path)
                        labeled_sound_result = {'neutral': result[0], 'happy': result[1], 'sad': result[2], 'angry': result[3], 'fearful': result[4], 'disgust': result[5], 'surprised': result[6]}
                        text_result = text_predictor.predict(text)
                        labeled_text_result = {}
                        for i in text_result:
                            labeled_text_result[label_conversion[i[0]]] = i[1]
                        file_info = {'file_path': file_path, 'label': label[-1], 'text': text, 'sound_vector': labeled_sound_result,  'text_vector': labeled_text_result}
                        final_infor_result.append(file_info)
                # If the file is not valid, skip it
                except Exception as err:
                    print(err)
                    continue

In [14]:
get_complete_info(SOUND_FILE_PATH)

../data/emotion-classification-from-audio-files-master/features\Actor_25\60-90-01-26-76-29-41.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-38-07-76-32-74-12.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-52-03-84-20-89-56.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-64-08-80-38-46-29.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-65-01-20-96-58-37.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-70-01-60-14-94-58.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-70-03-12-51-42-76.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-78-04-62-98-21-39.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-78-08-69-41-32-70.wav
../data/emotion-classification-from-audio-files-master/features\Actor_25\61-89-04-68-39-45-33.wav
../data/emotion-clas

In [31]:
try:
    with open('result.json', 'w') as outfile:
        json.dump(final_infor_result,outfile)
except Exception as e:
    print(e)
    

In [35]:
class NumpyFloatValuesEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.float32):
            return float(obj)
        return json.JSONEncoder.default(self, obj)

In [38]:
another_bullshit_result = []
for i in final_infor_result:
    a = json.dumps(i,cls=NumpyFloatValuesEncoder)
    another_bullshit_result.append(a)
print()




In [39]:
try:
    with open('final_result.json', 'w') as outfile:
        json.dump(another_bullshit_result,outfile)
except Exception as e:
    print(e)

In [41]:
# test load
try:
    with open('last_network_input.json') as json_file:
        dataaa = json.load(json_file)
except Exception as e:
    print(e)
print(type(dataaa))

<class 'list'>


In [49]:
print(type(json.loads(dataaa[0])['sound_vector']))

<class 'dict'>
