In [None]:
CODE TUTORIAL FROM https://data-flair.training/
from google.colab import drive
drive.mount('/content/gdrive')

import os

print(os.listdir())

Mounted at /content/gdrive
['.config', 'gdrive', 'sample_data']


In [None]:
!pip install tensorflow keras sklearn matplotlib pandas


In [None]:
from PIL import Image

In [None]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg

In [None]:
!pip install PyAudio

In [None]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
#extract features function

def extract_feature(file_name, mfcc, chroma, mel): # function to extract certain features from a soundfile
  with soundfile.SoundFile(file_name) as sound_file: # from file_name open sound file that automatically closes once done(with-as)
    X = sound_file.read(dtype='float32') # read from sound_file then call it X
    sample_rate = sound_file.samplerate # get sound rate
    if chroma:
      stft = np.abs(librosa.stft(X)) # if chroma is true get stft of X
    result = np.array([])
    # for each of 3 features check if exists, 
    #if feature exists then call corresponding function from librose.feature
    #get mean value
    #call hstack with result and feature value and store in result array -> result horizontally stacked
    if mfcc:
      mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) 
      result = np.hstack((result, mfccs))
    if chroma:
      chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
      result = np.hstack((result, chroma))
    if mel:
      mel = np.mean(librosa.feature.melspectogram(X, sr=sample_rate).T, axis=0)
      result = np.hstack((result, mel))
  return result

In [None]:
#dictionary mapping numbers in str format to emotions in str format in RAVDESS dataset 

emotions = {
    '01':'neutral',
    '02':'calm',
    '03':'happy'
    '04':'sad'
    '05':'angry'
    '06':'fearful'
    '07':'disgust'
    '08':'surprised'
}

#list of emations we are observing

observed_emotions = ['calm', 'happy', 'fearful', 'disgust']

In [None]:
def load_data(test_size=0.2): #function takes in relative size of test set as parameter
  x,y = [], [] # initialise empty lists

  #get all pathnames for soundfiles in dataset 
  #(using glob.glob the glob function from glog module)
  for file in glob.glob("D:\\DataFlair\\ravdess data\\Actor_*\\*.wav"): 
    file_name = os.path.basename(file) # for each file get basename of file
    
    # get emotion number (split name around '-' and extract 3rd value) 
    #then map to emotion word using emotions dict
    emotion = emotions[file_name.split('-')[2]] 
    
    if emotion not in observed_emotions: # check if emotion in observed_emotions list
      continue #if not  continue to next file
    feature = extract_feature(file, mfcc=True, chroma=True, mel=True) #extracts feature and stores it
    x.append(feature) # list x holds features
    y.append(emotion) # list y holds emotion label
  return train_test_split(np.array(x), y, test_size=test_size, random_state=9) # call function to split data prepared
  
  

In [None]:
x_train,x_test,y_train,y_test = load_data(test_size=0.25) # split dataset into training ad testing sets 25% of dataset is test set

In [None]:
print((x_train.shape[0], x_test.shape[0])) # get shape of training and testing datasets

In [None]:
print(f'Features extracted: {x_train.shape[1]}') # number of features (num cols)