In [1]:
import librosa
import librosa.display as librosa_display
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import os
import time
from torchaudio.functional import compute_deltas
import torch
from sklearn.cluster import KMeans

In [2]:
def prepare_RAVDESS_DS(path_audios):
    wav_paths, emotions, actors = [], [], []
    for path in tqdm(Path(path_audios).glob("*/*.wav")):
        name = str(path).split('/')[-1].split('.')[0]
        label = int(name.split("-")[2]) - 1  # Start emotions in 0
        actor = int(name.split("-")[-1])

        try:
            wav_paths.append(path)
            emotions.append(label)
            actors.append(actor)
        except Exception as e:
            # print(str(path), e)
            pass
        
    return wav_paths, emotions, actors

In [3]:
wav_paths, emotions, actors = prepare_RAVDESS_DS('dataset')

1440it [00:00, 48161.18it/s]


In [4]:
sample_rate = 16000

frame_length = 0.025
frame_stride = 0.010

def scaled(li, min_v=-1, max_v=1):
    ret = [(x%(max_v-min_v+1)+min_v) for x in li]
    return np.array(ret)

In [5]:
def clustering(x, n_clusters=4):

    kmeans = KMeans(n_clusters=n_clusters, random_state=2022)
    kmeans.fit(x)
    result = kmeans.predict(x)
    predict = pd.DataFrame(result)
    predict.columns = ['predict']
    cluster_center = kmeans.cluster_centers_
    
    return result, cluster_center

def cal_dist(x, predict, cluster_center):
    ret = []
    
    for i, _ in enumerate(x):
        dist = np.linalg.norm(x[i] - cluster_center[predict[i]])
        ret.append(dist)
        
    return ret

In [6]:
def get_boaw(paths):
    ret = []
    
    for _, path, in tqdm(enumerate(paths), desc='get features, size.....'):
        y = librosa.load(path, sr=sample_rate)[0]
        
        feature = np.zeros(50000)
        if(len(y) >= 60000):
            feature = y[10000:60000]
        else:
            a = y[10000:]
            feature[:len(a)] = a
        
        li = []
        for i, a in enumerate(feature):
            li.append([i, a])
        
        for_boaw = pd.DataFrame(li)
        pred, cluster_center = clustering(for_boaw)
        d = np.array(li)
        boaw = cal_dist(d, pred, cluster_center)
        
        ret.append(boaw)
    return np.array(ret)

In [7]:
boaw = get_boaw(wav_paths)

get features, size.....: 1440it [12:33,  1.91it/s]


In [8]:
def get_features(paths):
    data = []
    sz = []
    for _, path, in tqdm(enumerate(paths), desc='get features, size.....'):
        y = librosa.load(path, sr=sample_rate)[0]
        
        input_nfft = int(round(sample_rate*frame_length))
        input_stride = int(round(sample_rate*frame_stride))
        
        S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128, n_fft=input_nfft, window='hamming', hop_length=input_stride)
        log_S = librosa.power_to_db(S, ref=np.max)
        mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=26)
        
        data.append(mfcc)
        sz.append(mfcc.shape[1])
        
    return data, sz

In [9]:
features, sz = get_features(wav_paths)

get features, size.....: 1440it [01:20, 17.81it/s]


In [10]:
def make_mfcc(features, sz):
    ret = []
    
    for _, feature in tqdm(enumerate(features), desc='Make dataframe.....'):
        mfcc_feature = np.zeros((26, sz))
        mfcc_feature = feature[:,:sz]
        
        ret.append(mfcc_feature)
        
    return np.array(ret)

In [11]:
mfccs = make_mfcc(features, min(sz))

Make dataframe.....: 1440it [00:00, 240802.08it/s]


In [12]:
def make_df(mfccs, boaw, emotions, actors):
    mfcc_data, boaw_data = [], []
    
    print(mfccs.shape, boaw.shape)
    
    for i, _ in tqdm(enumerate(mfccs), desc='Make dataframe.....'):
        
        mfcc_data.append({
            'feature': mfccs[i],
            'emotion': emotions[i],
            'actor': actors[i]
        })
        boaw_data.append({
            'feature': boaw[i],
            'actor': actors[i]
        })
    
    return pd.DataFrame(mfcc_data), pd.DataFrame(boaw_data) 

In [13]:
mfcc_df, boaw_df = make_df(mfccs, boaw, emotions, actors)

(1440, 26, 294) (1440, 50000)


Make dataframe.....: 1440it [00:00, 1444927.69it/s]


In [14]:
def generate_train_test_mfcc(fold, df, save_path=""):
    """
    Divide the data in train and test in a subject-wise 5-CV way. The division is generated before running the training
    of each fold.
    :param fold:[int] Fold to create the train and test sets [ranging from 0 - 4]
    :param df:[DataFrame] Dataframe with the complete list of files generated
    :param save_path:[str] Path to save the train.csv and test.csv per fold
    """
    
    actors_per_fold = {
        0: [2,5,14,15,16],
        1: [3, 6, 7, 13, 18],
        2: [10, 11, 12, 19, 20],
        3: [8, 17, 21, 23, 24],
        4: [1, 4, 9, 22],
    }

    test_df = df.loc[df['actor'].isin(actors_per_fold[fold])]
    train_df = df.loc[~df['actor'].isin(actors_per_fold[fold])]

    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
    
    X_train = np.array([data for data in train_df['feature']])
    y_train = np.array([data for data in train_df['emotion']])
    X_test = np.array([data for data in test_df['feature']])
    y_test = np.array([data for data in test_df['emotion']])
    
    with open(save_path+'.npy', 'wb') as f:
        np.save(f, X_train)
        np.save(f, y_train)
        np.save(f, X_test)
        np.save(f, y_test)
        
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    

def generate_train_test_boaw(fold, df, save_path=""):
    """
    Divide the data in train and test in a subject-wise 5-CV way. The division is generated before running the training
    of each fold.
    :param fold:[int] Fold to create the train and test sets [ranging from 0 - 4]
    :param df:[DataFrame] Dataframe with the complete list of files generated
    :param save_path:[str] Path to save the train.csv and test.csv per fold
    """
    
    actors_per_fold = {
        0: [2,5,14,15,16],
        1: [3, 6, 7, 13, 18],
        2: [10, 11, 12, 19, 20],
        3: [8, 17, 21, 23, 24],
        4: [1, 4, 9, 22],
    }

    test_df = df.loc[df['actor'].isin(actors_per_fold[fold])]
    train_df = df.loc[~df['actor'].isin(actors_per_fold[fold])]

    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
    
    X_train = np.array([data for data in train_df['feature']])
    X_test = np.array([data for data in test_df['feature']])
    
    with open(save_path+'.npy', 'wb') as f:
        np.save(f, X_train)
        np.save(f, X_test)
        
    print(X_train.shape, X_test.shape)

In [15]:
for fold in range(5):
    
    save_root = 'HBN dataset'
    os.makedirs(save_root, exist_ok=True)
    
    mfcc_save_path = os.path.join(save_root, "mfcc_fold"+str(fold))
    boaw_save_path = os.path.join(save_root, "boaw_fold"+str(fold))
    
    print(mfcc_save_path, boaw_save_path)
    
    generate_train_test_mfcc(fold, mfcc_df, mfcc_save_path)
    time.sleep(5)
    generate_train_test_boaw(fold, boaw_df, boaw_save_path)
    time.sleep(5)

HBN dataset\mfcc_fold0 HBN dataset\boaw_fold0
(1140, 26, 294) (1140,) (300, 26, 294) (300,)
(1140, 50000) (300, 50000)
HBN dataset\mfcc_fold1 HBN dataset\boaw_fold1
(1140, 26, 294) (1140,) (300, 26, 294) (300,)
(1140, 50000) (300, 50000)
HBN dataset\mfcc_fold2 HBN dataset\boaw_fold2
(1140, 26, 294) (1140,) (300, 26, 294) (300,)
(1140, 50000) (300, 50000)
HBN dataset\mfcc_fold3 HBN dataset\boaw_fold3
(1140, 26, 294) (1140,) (300, 26, 294) (300,)
(1140, 50000) (300, 50000)
HBN dataset\mfcc_fold4 HBN dataset\boaw_fold4
(1200, 26, 294) (1200,) (240, 26, 294) (240,)
(1200, 50000) (240, 50000)


In [16]:
# x, pred, cluster_center = [],[],[]
# plt.scatter(x[:, 0], x[:, 1], c=pred, s=50, cmap='viridis')

# centers = cluster_center
# plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.5);