In [1]:
import pandas as pd
import pickle
import numpy as np
import librosa
import matplotlib.pyplot as plt
import random
import uuid
import cv2
import os
from tqdm import tqdm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data_dir = '/content/drive/MyDrive/Dataset/pre-processed/'
labels_df_path = '{}df_iemocap.csv'.format(data_dir)
audio_vectors_path = '{}audio_vectors_'.format(data_dir)

In [4]:
labels_df = pd.read_csv(labels_df_path)
labels_df = labels_df.drop(labels_df[labels_df.emotion  == 'fea'].index)
labels_df = labels_df.drop(labels_df[labels_df.emotion  == 'sur'].index)
labels_df = labels_df.drop(labels_df[labels_df.emotion  == 'xxx'].index)
labels_df = labels_df.drop(labels_df[labels_df.emotion  == 'dis'].index)
labels_df = labels_df.drop(labels_df[labels_df.emotion  == 'oth'].index)
labels_df.head()

Unnamed: 0,start_time,end_time,wav_file,emotion,val,act,dom
0,15.14,17.21,Ses01F_script02_1_F000,neu,2.5,2.0,2.0
1,25.91,27.4125,Ses01F_script02_1_F001,fru,2.5,2.0,2.5
4,49.22,51.4,Ses01F_script02_1_F004,neu,2.0,3.0,3.0
6,54.72,57.69,Ses01F_script02_1_F006,ang,2.0,3.5,3.0
7,57.75,59.57,Ses01F_script02_1_F007,ang,2.0,3.5,2.5


In [5]:
labels_df.groupby('emotion').count()

Unnamed: 0_level_0,start_time,end_time,wav_file,val,act,dom
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ang,1103,1103,1103,1103,1103,1103
exc,1041,1041,1041,1041,1041,1041
fru,1849,1849,1849,1849,1849,1849
hap,595,595,595,595,595,595
neu,1708,1708,1708,1708,1708,1708
sad,1084,1084,1084,1084,1084,1084


In [6]:
emotion_dict = {'ang': 0,
                'exc': 1,
                'hap': 2,
                'sad': 3,
                'fru': 4,
                'neu': 5}

In [7]:
list(emotion_dict.values())

[0, 1, 2, 3, 4, 5]

In [8]:
save_dir = '/content/drive/MyDrive/Dataset/Images/'

In [9]:
for index in range(6):
  newpath = '{}/{}'.format(save_dir, index)
  if not os.path.exists(newpath):
    os.makedirs(newpath)

In [10]:
# Parameters
sr = 44100
n_mels = 128  # Number of Mel bands to generate
n_fft = 1024  # Length of the FFT window
hop_length = 512  # Number of samples between successive frames


In [None]:
for sess in (range(1, 2)):
        audio_vectors = pickle.load(open('{}{}.pkl'.format(audio_vectors_path, sess), 'rb'))
        for index, row in tqdm(labels_df[labels_df['wav_file'].str.contains('Ses0{}'.format(sess))].iterrows()):
            try:
                wav_file_name = row['wav_file']
                label = emotion_dict[row['emotion']]
                if label != -1:
                  y = audio_vectors[wav_file_name]
                  # Generate Mel spectrogram
                  mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmax=6000)
                  # Convert to decibels (more perceptually meaningful)
                  mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
                  mel_spectrogram_db = cv2.resize(mel_spectrogram_db, (128, 128), interpolation=cv2.INTER_LINEAR)

                  plt.figure(figsize=(128, 128), dpi=300)
                  librosa.display.specshow(mel_spectrogram_db, sr=sr)
                  plt.savefig('{}{}/mel_spectrogram_{}.png'.format(save_dir, label, uuid.uuid4().hex), bbox_inches='tight', pad_inches=0)
                  plt.close()
                  # np.save('{}{}/mel_spectrogram_{}.npy'.format(save_dir, label, uuid.uuid4().hex), mel_spectrogram_db)
            except Exception as e:
                print('Some exception occurred: {}'.format(e))

2it [01:44, 51.08s/it]