In [2]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import numpy as np
import csv
import IPython.display as ipd
import statistics as stats
from collections import Counter
from math import log
import scipy.stats as scistat
import json

In [3]:
frame_size = 1024
hop_length = 512

In [4]:
def writer(csv_file, mode, feature_vectors):
    with open(csv_file, mode, newline='') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerow(feature_vectors)

In [5]:
def central_tendency_measures(vector, length, sr):
        
    # Mean
    mean = np.mean(vector)
    
    # Median
    median = stats.median(vector)
    
    # Maximum
    maxi = max(vector)
    
    # Minimum
    mini = min(vector)
    
    # Standard Deviation 
    std = np.std(vector)
    
    # SMA
    sma = 0
    time = (1/sr) * length
    for sample in vector:
        sma += abs(sample) * time
        
    # MAD
    mad = scistat.median_abs_deviation(vector)
    
    # Entropy
    ent = 0
    counts = Counter(vector)
    total = len(vector)
    for count in counts.values():
        prob = count / total
        ent -= prob * log(prob, 2)
        
    # IQR
    q1 = np.quantile(vector, 0.25)
    q3 = np.quantile(vector, 0.75)
    iqr = q3 - q1
    
    # Energy
    energy = 0
    for value in vector:
        energy += value**2
        
    # Skewness
    skew = scistat.skew(vector)
    
    # Kurtosis
    kurt = scistat.kurtosis(vector)
    
    return np.array([mean, median, std, maxi, mini, sma, mad, ent, iqr, energy, skew, kurt])

In [7]:
dataset_path = # path to dataset -> r'D:\SER-RO-MAHA\Sample Speech Data'

def extract_all_features(dataset_path, frame_size, hop_length, spectral_centroid=True, spectral_bandwidth=True):
                   
    for i, (dirpath, dirnames, files) in enumerate(os.walk(dataset_path)):
        
        if dirpath is not dataset_path:
            print(f'Processing {dirpath}')
            emotion = dirpath.split('\\')[-1]  
            
            for file in files:
                print(file)
                file_path = os.path.join(dirpath, file)
                signal, sr = librosa.load(file_path)
                
                if spectral_centroid:
                    sc_signal = librosa.feature.spectral_centroid(y=signal, sr=sr, n_fft=frame_size, hop_length=hop_length)[0]
                    sc_signal = np.trim_zeros(sc_signal)
                    vals = central_tendency_measures(sc_signal, len(signal), sr)
                    writer('spectral_centroid_mesaures.csv', 'a', vals)
                
                if spectral_bandwidth:
                    sb_signal = librosa.feature.spectral_bandwidth(y=signal, sr=sr, n_fft=frame_size, hop_length=hop_length)[0]
                    sb_signal = np.trim_zeros(sb_signal)
                    vals = central_tendency_measures(sb_signal, len(signal), sr)                    
                    writer('spectral_bandwidth_mesaures.csv', 'a', vals)
                
                writer('freq_dom_measures_y.txt', 'a', [i-1])

In [8]:
extract_all_features(dataset_path, frame_size, hop_length, spectral_centroid=True, spectral_bandwidth=True)

Processing D:\SER-RO-MAHA\Sample Speech Data\Anger
03-01-05-01-01-01-01.wav
JE_a15.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Disgust
03-01-07-01-01-01-01.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Fear
03-01-06-01-01-01-01.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Happy
03-01-03-01-01-01-01.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Neutral
03-01-01-01-01-01-01.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Sad
03-01-04-01-01-01-01.wav
Processing D:\SER-RO-MAHA\Sample Speech Data\Surprise
03-01-08-01-01-01-01.wav
