In [1]:
!pip install awkward

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
import pathlib

from tqdm.notebook import tqdm

import librosa
import awkward as ak
import matplotlib.pyplot as plt

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# IMPORT

## LOCAL IMPORT

In [4]:
!ls drive Othercomputers

ls: cannot access 'Othercomputers': No such file or directory
drive:
MyDrive  Othercomputers


In [5]:
dataset_folder = pathlib.Path("drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly")  # change it with your file location

In [6]:
file_list = list(dataset_folder.iterdir())
for i, path in enumerate(file_list):
    if path.suffix != ".wav":
        file_list.pop(i)
file_list = sorted(file_list)
file_list

[PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-01.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-02.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-03.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-04.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-05.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-06.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/RavdessAudioOnly/03-01-01-01-01-01-07.wav'),
 PosixPath('drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Time Series/DATASET/Ravde

In [7]:
array = list()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    array.append([librosa_audio_segment])

  0%|          | 0/2452 [00:00<?, ?it/s]

In [8]:
b = ak.ArrayBuilder()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    b.begin_list()
    b.begin_list()
    for value in librosa_audio_segment:
        b.real(value)
    b.end_list() 
    b.end_list() 
array = b.snapshot()

  0%|          | 0/2452 [00:00<?, ?it/s]

Filename identifiers 

Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
Vocal channel (01 = speech, 02 = song).
Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
Repetition (01 = 1st repetition, 02 = 2nd repetition).
Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

In [9]:
CATEGORICAL_FEATURES_NAMES = {
    "modality": {"01": "full-AV", "02": "video-only", "03": "audio-only"}, 
    "vocal_channel": {"01": "speech", "02": "song"},
    "emotion": {"01" : "neutral", "02" : "calm", "03" : "happy", "04" : "sad", "05" : "angry", "06" : "fearful", "07" : "disgust", "08" : "surprised"},
    "emotional_intensity": {"01" : "normal", "02" : "strong"},
    "statement": {"01" : "Kids are talking by the door", "02" : "Dogs are sitting by the door"},
    "repetition": {"01" : "1st", "02" : "2nd"},
    "actor": {str(i).zfill(2): str(i).zfill(2) for i in range(1, 25)}
}

In [10]:
df = list()
file_names = list()
for path in file_list:
    stem = path.stem.split("-")
    file_names.append(path.name)
    df.append(stem)
df = pd.DataFrame(df)
df.columns = list(CATEGORICAL_FEATURES_NAMES.keys())

for column in df.columns:
    df = df.replace(CATEGORICAL_FEATURES_NAMES)

df["sex"] = ["F" if i % 2 == 0 else "M" for i in df["actor"].astype(int)]
df["filename"] = file_names

In [11]:
df.head()

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,sex,filename
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,1,M,03-01-01-01-01-01-01.wav
1,audio-only,speech,neutral,normal,Kids are talking by the door,1st,2,F,03-01-01-01-01-01-02.wav
2,audio-only,speech,neutral,normal,Kids are talking by the door,1st,3,M,03-01-01-01-01-01-03.wav
3,audio-only,speech,neutral,normal,Kids are talking by the door,1st,4,F,03-01-01-01-01-01-04.wav
4,audio-only,speech,neutral,normal,Kids are talking by the door,1st,5,M,03-01-01-01-01-01-05.wav


In [12]:
df_sorted_actor = df.sort_values(by=["actor", "filename"])
idxs_sorted = list(df_sorted_actor.index)

In [13]:
idx_train = list(df_sorted_actor[df_sorted_actor.actor.astype(int) < 19].index)
idx_test = list(df_sorted_actor[df_sorted_actor.actor.astype(int) >= 19].index)

In [14]:
X_train = array[idx_train]
X_test = array[idx_test]
Y_train = df_sorted_actor[df_sorted_actor.actor.astype(int) < 19]
Y_test = df_sorted_actor[df_sorted_actor.actor.astype(int) >= 19]

# TO NUMPY

In [15]:
def pad_X(X, m_max, nan_value=0):
    return ak.fill_none(ak.pad_none(X, m_max, axis=2, clip=True), value=nan_value)

In [16]:
# find the max length of X_train
maximum = 0
for ts in X_train:
    length = len(np.asarray(np.ravel(ts)))
    if length > maximum:
        maximum = length
maximum

304304

In [17]:
#%%time
#X_train = np.squeeze(np.array(pad_X(X_train, maximum, np.nan)))
#X_test = np.squeeze(np.array(pad_X(X_test, maximum, np.nan)))

In [18]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

AttributeError: ignored

In [19]:
np.save("RavdessAudioOnlyNumpy__X_train.npy", X_train)
np.save("RavdessAudioOnlyNumpy__X_test.npy", X_test)

ValueError: ignored

In [20]:
Y_train.to_csv("RavdessAudioOnlyNumpy__Y_train.csv", index=False)
Y_test.to_csv("RavdessAudioOnlyNumpy__Y_test.csv", index=False)

# FROM NUMPY

In [None]:
X_train = np.load("RavdessAudioOnlyNumpy__X_train.npy")
X_test = np.load("RavdessAudioOnlyNumpy__X_test.npy")
print(X_train.shape, X_test.shape)

In [None]:
X_train