<a href="https://colab.research.google.com/github/brianmodel/EmotionClassification/blob/main/Emotion_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting up requirements


Colab notebook audio playback code courtesy of Ricardo de Azambuja (see article [here](https://ricardodeazambuja.com/deep_learning/2019/03/09/audio_and_video_google_colab/))

In [None]:
!pip install ffmpeg-python
!pip install pyAudioAnalysis
!pip install eyed3
!pip install pydub
# !pip install --upgrade scikit-learn==0.21.3
!pip install --upgrade scikit-learn==0.23.2
!pip3 install pickle5

Collecting ffmpeg-python
  Downloading https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Collecting pyAudioAnalysis
[?25l  Downloading https://files.pythonhosted.org/packages/71/42/09adc0229b78dc514004ecf83508afa36a998502a36a4ebdacc14ae55fcf/pyAudioAnalysis-0.3.6.tar.gz (52.4MB)
[K     |████████████████████████████████| 52.4MB 78kB/s 
[?25hBuilding wheels for collected packages: pyAudioAnalysis
  Building wheel for pyAudioAnalysis (setup.py) ... [?25l[?25hdone
  Created wheel for pyAudioAnalysis: filename=pyAudioAnalysis-0.3.6-cp36-none-any.whl size=52589856 sha256=842043d2094963621e91637d600b8d45f2edd7e6f9bc8147268f5d26bda56594
  Stored in directory: /root/.cache/pip/wheels/fd/74/c2/361da76b03ed9d45c1b606d8fd25ac53ab965f754061fc4805
Successfully built pyAudioAnalysis
Installing collected packages: pyA

In [None]:
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    mimeType : 'audio/webm;codecs=opus'
  };            
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {            
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data); 
    reader.onloadend = function() {
      base64data = reader.result;
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording audio, press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      //recordButton.innerText = "Saving recording"
      recordButton.style.visibility = "hidden"
  }
}

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  resolve(base64data.toString())

});

}
});
      
</script>
"""

In [None]:
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
import librosa
import io
import ffmpeg
import pyAudioAnalysis

def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])
  
  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)
  
  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]
  audio, sr = librosa.load(io.BytesIO(riff))

  return audio, sr

In [None]:
import pickle5 as pickle

def load_model(model_name):
    """
    This function loads an SVM model either for classification or training.
    ARGMUMENTS:
        - SVMmodel_name:     the path of the model to be loaded
    """
    with open(model_name + "MEANS", "rb") as fo:
        mean = pickle.load(fo)
        std = pickle.load(fo)
        classNames = pickle.load(fo)
        mid_window = pickle.load(fo)
        mid_step = pickle.load(fo)
        short_window = pickle.load(fo)
        short_step = pickle.load(fo)
        compute_beat = pickle.load(fo)

    mean = np.array(mean)
    std = np.array(std)

    with open(model_name, 'rb') as fid:
        svm_model = pickle.load(fid)

    return svm_model, mean, std, classNames, mid_window, mid_step, short_window, short_step, compute_beat


In [None]:
from pyAudioAnalysis import audioTrainTest as aT
from pyAudioAnalysis import MidTermFeatures as aF

# Use raw audio to classify instead of passing in a file
def recording_classification(signal, sampling_rate, model_name, model_type):
  if model_type == 'knn':
    classifier, mean, std, classes, mid_window, mid_step, short_window, short_step, compute_beat = aT.load_model_knn(model_name)
  else:
    classifier, mean, std, classes, mid_window, mid_step, short_window, short_step, compute_beat = load_model(model_name)

  if sampling_rate == 0:
    # audio file IO problem
    return -1, -1, -1

  if signal.shape[0] / float(sampling_rate) < mid_window:
    mid_window = signal.shape[0] / float(sampling_rate)

  # feature extraction:
  mid_features, s, _ = \
      aF.mid_feature_extraction(signal, sampling_rate,
                                mid_window * sampling_rate,
                                mid_step * sampling_rate,
                                round(sampling_rate * short_window),
                                round(sampling_rate * short_step))
  # long term averaging of mid-term statistics
  mid_features = mid_features.mean(axis=1)
  if compute_beat:
    beat, beat_conf = aF.beat_extraction(s, short_step)
    mid_features = np.append(mid_features, beat)
    mid_features = np.append(mid_features, beat_conf)
  feature_vector = (mid_features - mean) / std    # normalization

  # classification
  class_id, probability = aT.classifier_wrapper(classifier, model_type,
                                              feature_vector)
  return class_id, probability, classes


In [None]:
from pyAudioAnalysis import audioTrainTest as aT
from pyAudioAnalysis import MidTermFeatures as aF
from pyAudioAnalysis import audioBasicIO


def file_classification(input_file, model_params):
    classifier, mean, std, classes, mid_window, mid_step, short_window, short_step, compute_beat = model_params

    # read audio file and convert to mono
    sampling_rate, signal = audioBasicIO.read_audio_file(input_file)
    signal = audioBasicIO.stereo_to_mono(signal)

    if sampling_rate == 0:
        # audio file IO problem
        return -1, -1, -1
    if signal.shape[0] / float(sampling_rate) < mid_window:
        mid_window = signal.shape[0] / float(sampling_rate)

    # feature extraction:
    mid_features, s, _ = \
        aF.mid_feature_extraction(signal, sampling_rate,
                                  mid_window * sampling_rate,
                                  mid_step * sampling_rate,
                                  round(sampling_rate * short_window),
                                  round(sampling_rate * short_step))
    # long term averaging of mid-term statistics
    mid_features = mid_features.mean(axis=1)
    if compute_beat:
        beat, beat_conf = aF.beat_extraction(s, short_step)
        mid_features = np.append(mid_features, beat)
        mid_features = np.append(mid_features, beat_conf)
    feature_vector = (mid_features - mean) / std    # normalization

    # classification
    class_id, probability = aT.classifier_wrapper(classifier, model_type,
                                               feature_vector)
    return class_id, probability, classes

In [None]:
  MODELS = {
    'individual_gradientboosting': ('/content/drive/MyDrive/AudioAnalysisModels/gradientboosting_individual_3singer/gradientboosting_individual_3singer', 'gradientboosting'),
    'individual_randomforest': ('/content/drive/MyDrive/AudioAnalysisModels/randomforest_individual_3singer/randomforest_individual_3singer', 'randomforest'),
    'individual_extratrees': ('/content/drive/MyDrive/AudioAnalysisModels/extratrees_individual_3singer/extratrees_individual_3singer', 'extratrees'),
    'big4_gradientboosting': ('/content/drive/MyDrive/AudioAnalysisModels/gradientboosting_big4_3singer/gradientboosting_big4_3singer', 'gradientboosting'),
    'love_disgust': ('/content/drive/MyDrive/AudioAnalysisModels/svm_LoveDisgust_3singer/svm_LoveDisgust_3singer', 'svm'),
  }

In [None]:
from pyAudioAnalysis import audioTrainTest as aT
from google.colab import drive
import soundfile
import os

# Getting audio recording, and running various models
def record_and_classify(models_to_run):
  audio, sr = get_audio()
  for model_name, should_run in models_to_run.items():
    if should_run:
      model = MODELS[model_name]
      model_path, model_type = model[0], model[1]
      class_id, prob, classes = recording_classification(audio, sr, model_path, model_type)
      print('{}:'.format(model_name))
      print('{}\n'.format(classes[int(class_id)]))

# Sample Data


In [None]:
from scipy.io import wavfile
# import IPython.display as ipd, Audio
from IPython.display import Audio, display

audio_files = [
  '/content/drive/MyDrive/AudioAnalysisModels/sample_data/Amusement.wav',
  '/content/drive/MyDrive/AudioAnalysisModels/sample_data/Anger.wav',
  '/content/drive/MyDrive/AudioAnalysisModels/sample_data/Love.wav',
  '/content/drive/MyDrive/AudioAnalysisModels/sample_data/Hate.wav',
]

for i, filepath in enumerate(audio_files):
  print("Sample {}:".format(i+1))
  display(Audio(filename=filepath))


Sample 1:


Sample 2:


Sample 3:


Sample 4:


# Classify Sample Data


Select which model to run, and choose all the samples for which you want to predict the emotion. You can listen to each sample in the above cell.

In [None]:
#@title Classify Data

model = "individual_gradientboosting" #@param ["individual_gradientboosting", "individual_randomforest", "individual_extratrees", "big4_gradientboosting", "love_disgust"]
sample_1 = True #@param {type:"boolean"}
sample_2 = True #@param {type:"boolean"}
sample_3 = True #@param {type:"boolean"}
sample_4 = True #@param {type:"boolean"}

model_path = MODELS[model][0]
samples_idx = []
if sample_1:
  samples_idx.append(0)
if sample_2:
  samples_idx.append(1)
if sample_3:
  samples_idx.append(2)
if sample_4:
  samples_idx.append(3)


model_params = load_model(model_path)
for i in samples_idx:
  sample = audio_files[i]
  class_id, prob, classes = file_classification(sample, model_params)
  print("Sample {} prediction: {}".format(i+1, classes[int(class_id)]))

Sample 1 prediction: Amusement
Sample 2 prediction: Anger
Sample 3 prediction: Love
Sample 4 prediction: Hate


# Record and classify microphone audio emotion

Run the cell to start recording your microphone audio. Once done, hit the button to stop recording and see selected models' predictions.

In [None]:
#@title Run Models
individual_gradientboosting = False #@param {type:"boolean"}
individual_randomforest = False #@param {type:"boolean"}
individual_extratrees = False #@param {type:"boolean"}
big4_gradientboosting = False #@param {type:"boolean"}
love_disgust = True #@param {type:"boolean"}


models_to_run = {
    'individual_gradientboosting': individual_gradientboosting,
    'individual_randomforest': individual_randomforest,
    'individual_extratrees': individual_extratrees,
    'big4_gradientboosting': big4_gradientboosting,
    'love_disgust': love_disgust,
}

record_and_classify(models_to_run)

love_disgust:
Disgust

