# Audio Sentiment Classification

In [6]:
import keras
import numpy as np
import librosa




In [7]:
class AudioSentimentClassifier:
    """
    Main class of the application.
    """

    def __init__(self, path, file):
        """
        Init method is used to initialize the main parameters.
        """
        self.path = path
        self.file = file

    def load_model(self):
        """
        Method to load the chosen model.
        :param path: path to your h5 model.
        :return: summary of the model with the .summary() function.
        """
        self.loaded_model = keras.models.load_model(self.path)
        #return self.loaded_model.summary()

    def makepredictions(self):
      """
      Method to process the files and create your features.
      """
      data, sampling_rate = librosa.load(self.file)
      mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
      x = np.expand_dims(mfccs, axis=0)
      x = np.expand_dims(x, axis=2)  # Adjust dimensions for model input
      predictions = self.loaded_model.predict(x)
      predicted_class = np.argmax(predictions)
      print("Prediction is", " ", self.convertclasstoemotion(predicted_class))


    @staticmethod
    def convertclasstoemotion(pred):
        """
        Method to convert the predictions (int) into human readable strings.
        """

        label_conversion = {'0': 'neutral',
                            '1': 'calm',
                            '2': 'happy',
                            '3': 'sad',
                            '4': 'angry',
                            '5': 'fearful',
                            '6': 'disgust',
                            '7': 'surprised'}

        for key, value in label_conversion.items():
            if int(key) == pred:
                label = value
        return label


In [11]:
MLPpred = AudioSentimentClassifier(path='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\models\\MLP_Classifier.h5',
                                    file='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\audio\\joy2.wav')
CNNpred = AudioSentimentClassifier(path='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\models\\CNN_Classifier.h5',
                                   file='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\audio\\joy2.wav')

# Model 1
MLPpred.load_model()
MLPpred.makepredictions()

# Model 2
CNNpred.load_model()
CNNpred.makepredictions()

Prediction is   disgust
Prediction is   happy


In [14]:
print("Librosa version:", keras.__version__)
print("Librosa version:", librosa.__version__)
print("NumPy version:", np.__version__)

Librosa version: 2.15.0
Librosa version: 0.10.1
NumPy version: 1.23.5


--------------------------------------------------------------------------------

## Convert to tflite model in order to run on raspi

In [15]:
pip install tflite_runtime

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement tflite_runtime (from versions: none)
ERROR: No matching distribution found for tflite_runtime


In [16]:
import tensorflow as tf
from tensorflow.keras.models import load_model

h5_model_path = 'C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\models\\CNN_Classifier.h5'
keras_model = load_model(h5_model_path)

# Convert the Keras model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
tflite_model = converter.convert()

tflite_model_path = 'C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\models\\CNN_Classifier_Lite.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f'TFLite model saved to {tflite_model_path}')


INFO:tensorflow:Assets written to: C:\Users\nello\AppData\Local\Temp\tmp5ahyz9fl\assets


INFO:tensorflow:Assets written to: C:\Users\nello\AppData\Local\Temp\tmp5ahyz9fl\assets


TFLite model saved to C:\Users\nello\Documents\vscode_projects\TravelTales\Client\AudioSentimentClassification\models\CNN_Classifier_Lite.tflite


In [17]:
class AudioSentimentClassifierLite:
    """
    Main class of the application.
    """

    def __init__(self, path, file):
        """
        Init method is used to initialize the main parameters.
        """
        self.path = path
        self.file = file

    def load_model(self):
      self.interpreter = tf.lite.Interpreter(model_path=self.path)
      self.interpreter.allocate_tensors()


    def makepredictions(self):
      """
      Method to process the files and create your features.
      """
      data, sampling_rate = librosa.load(self.file)
      mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
      x = np.expand_dims(mfccs, axis=0)
      x = np.expand_dims(x, axis=2)  # Adjust dimensions for model input

      # Get input and output tensors
      input_tensor_index = self.interpreter.get_input_details()[0]['index']
      output = self.interpreter.tensor(self.interpreter.get_output_details()[0]['index'])

      # Run inference
      self.interpreter.set_tensor(input_tensor_index, x)
      self.interpreter.invoke()

      # Get the output
      predictions = output()[0]
      predicted_class = np.argmax(predictions)
      print("Prediction is", " ", self.convertclasstoemotion(predicted_class))



    @staticmethod
    def convertclasstoemotion(pred):
        """
        Method to convert the predictions (int) into human readable strings.
        """

        label_conversion = {'0': 'neutral',
                            '1': 'calm',
                            '2': 'happy',
                            '3': 'sad',
                            '4': 'angry',
                            '5': 'fearful',
                            '6': 'disgust',
                            '7': 'surprised'}

        for key, value in label_conversion.items():
            if int(key) == pred:
                label = value
        return label

In [18]:
audio_classifier = AudioSentimentClassifierLite(path='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\models\\CNN_Classifier_Lite.tflite',
                                                 file='C:\\Users\\nello\\Documents\\vscode_projects\\TravelTales\\Client\\AudioSentimentClassification\\audio\\joy2.wav')
audio_classifier.load_model()
audio_classifier.makepredictions()

Prediction is   happy
