In [1]:
import wave
import sys
import matplotlib.pyplot as plt
from scipy.io.wavfile import read
import librosa.display
import numpy as np
from scipy import stats
from spafe.frequencies.fundamental_frequencies import FundamentalFrequenciesExtractor
from spafe.frequencies.dominant_frequencies import get_dominant_frequencies 
from scipy.fft import fft,fftfreq,fftshift
from spafe.features.spfeats import extract_feats
import librosa
import os
import pandas as pd
from tqdm import tqdm
import re

In [318]:
def spectral_properties(y: np.ndarray, fs: int) -> dict:
    """ This function takes signal and its rate,
    Outputs the feature values which we are going to explain here"""
    spec = np.abs(np.fft.rfft(y)) # Uses one dimensional Fast Furie Transformation and finds the absolute value of it 
    freq = np.fft.rfftfreq(len(y), d=1 / fs) # Discrete fast furie transformation
    amp = spec / spec.sum()
    mean = (freq * amp).sum()
    sd = np.sqrt(np.sum(amp * ((freq - mean) ** 2)))
    amp_cumsum = np.cumsum(amp)
    median = freq[len(amp_cumsum[amp_cumsum <= 0.5]) + 1]
    mode_val = freq[amp.argmax()]
    
    Q25 = freq[len(amp_cumsum[amp_cumsum <= 0.25]) + 1]
    Q75 = freq[len(amp_cumsum[amp_cumsum <= 0.75]) + 1]
    IQR = Q75 - Q25
    z = amp - amp.mean()
    w = amp.std()
    skew = ((z ** 3).sum() / (len(spec) - 1)) / w ** 3
    kurt = ((z ** 4).sum() / (len(spec) - 1)) / w ** 4
    sp_entr = -np.sum(amp*  np.log(amp))/np.log(len(amp))
    
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y,))
    
    fft_mode = np.mean(stats.mode(freq).mode)
    fft_mode_max = np.max(stats.mode(freq).mode)
    fft_mode_min = np.min(stats.mode(freq).mode)

    normalized_frequencies = np.linspace(0, 1, len(spec))
    spectral_centroid = np.sum(amp * normalized_frequencies)
    
    peakf = max(freq)
    mnf = np.mean(freq)
    
    fund_freqs_extractor = FundamentalFrequenciesExtractor(debug = False)
    pitches, harmonic_rates, argmins, times = fund_freqs_extractor.main(sig = y,fs = fs)
    meanf = np.mean(pitches)
    minf = min(pitches)
    maxf = max(pitches)
    
    
    dom_freq = get_dominant_frequencies(sig = np.array(y).reshape(-1,1),fs = int(fs),
                                        lower_cutoff = 50,
                                        upper_cutoff = 30000,
                                        nfft = 512,
                                        win_len = 0.02,
                                        win_hop = 0.015,
                                        win_type = 'hamming',
                                        debug = False)
    diff = np.abs(dom_freq[:-1] - dom_freq[1:])
    min_max = dom_freq.max() - dom_freq.min()
    if min_max == 0:
        mod_id = 0
    else:
        mod_id = diff.mean()/min_max
    
    dom_mean = dom_freq.mean()
    dom_min = dom_freq.min()
    dom_max = dom_freq.max()
    dom_max_min = dom_max - dom_min
    
    
    result_d = {
        'mean': mean,
        'sd': sd,
        'median': median,
        'mode_val': mode_val,
        'Q25': Q25,
        'Q75': Q75,
        'IQR': IQR,
        'skew': skew,
        'kurt': kurt,
        'sp.ent':sp_entr,
        'sfm' : spectral_flatness,
        'fft_mode' : fft_mode,
        'fft_max' :fft_mode_max,
        'fft_min' : fft_mode_min,
        'centroid' : spectral_centroid,
        'peakf' : peakf,
        'mnf' : mnf,
        'meanf' : meanf,
        'minf' : minf,
        'maxf' : maxf,
        'modid' : mod_id,
        'dom_mean' : dom_mean,
        'dom_max' : dom_max,
        'dom_min' : dom_min,
        'dom_max_min' : dom_max_min
    }

    return result_d

In [319]:
df_info = pd.read_csv('../CREMA-D/SentenceFilenames.csv')
df_demo = pd.read_csv('../CREMA-D/VideoDemographics.csv')

In [320]:
def find_sex(id : np.int64):
    return df_demo[df_demo["ActorID"]==id]["Sex"]

In [322]:
sample_rate = 22000

In [323]:
sample_rate * 0.03

660.0

In [457]:
import warnings
warnings.filterwarnings('ignore')

data_folder = '../CREMA-D/AudioWAV/'
list_of_data = []
names = []
labels = []
for each in tqdm(os.listdir(data_folder)):

    sample, sample_rate = librosa.load(data_folder + each)
  #  sample_rate = 22000
    smp = spectral_properties(sample,sample_rate)
    names = list(smp.keys()) + ["label"]
    lb = find_sex(int(re.findall(r'\d+', each)[0])).values[0]
    labels.append(lb)
    list_of_data.append(list(smp.values()) + [lb])

100%|██████████| 7442/7442 [15:24<00:00,  8.05it/s]


In [465]:
base = list_of_data.copy()

In [466]:
list_of_data = base.copy()

In [467]:
df = pd.DataFrame(data = list_of_data,columns=names)

In [468]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [469]:
df.head()

Unnamed: 0,mean,sd,median,mode_val,Q25,Q75,IQR,skew,kurt,sp.ent,...,mnf,meanf,minf,maxf,modid,dom_mean,dom_max,dom_min,dom_max_min,label
0,3123.714325,2776.66175,2493.191221,225.652096,285.150207,6052.941176,5767.79097,10.325403,172.26341,0.900765,...,5512.389818,177.78367,0.0,200.454545,0.111111,699.8,5363.0,1.0,5362.0,Female
1,1755.664449,2350.503794,413.008993,220.873597,185.566665,2928.012065,2742.4454,9.883651,152.125737,0.849399,...,5512.397364,133.30241,0.0,200.454545,0.125,172.111111,344.0,1.0,343.0,Female
2,1902.088347,2157.992567,811.710824,478.668785,479.090892,2435.132471,1956.041579,5.459985,42.105785,0.878289,...,5512.5,164.437844,0.0,200.454545,0.041667,807.0,3776.0,1.0,3775.0,Female
3,1365.668177,1925.862107,508.437133,261.908843,229.38996,1444.453634,1215.063675,9.831893,161.079525,0.837142,...,5512.390139,147.891693,0.0,200.454545,0.1,386.545455,2017.0,1.0,2016.0,Female
4,1335.107028,1496.630294,704.061853,557.90101,463.841666,1609.1525,1145.310834,6.952778,64.861993,0.839737,...,5512.384731,166.020656,0.0,200.454545,0.05,564.285714,2274.0,1.0,2273.0,Female


In [470]:
df.to_csv("ExtractedFeatures.csv")

In [471]:
df = pd.read_csv("ExtractedFeatures.csv")

In [423]:
df.dropna(inplace=True)

In [437]:
import warnings
warnings.filterwarnings('ignore')

data_folder = '../CREMA-D/AudioWAV/'
list_of_data = []
names = []
labels = []
for each in tqdm(os.listdir(data_folder)):

    sample, sample_rate = librosa.load(data_folder + each)
    sample_rate = 22000
    smp = extract_feats(sample,sample_rate)
    names = list(smp.keys()) + ["label"]
    lb = find_sex(int(re.findall(r'\d+', each)[0])).values[0]
    labels.append(lb)
    list_of_data.append(list(smp.values()) + [lb])

100%|██████████| 7442/7442 [19:26<00:00,  6.38it/s]


In [438]:
def transform_loss():
    for indx in range(len(list_of_data)):
        lst = []
        for each in list_of_data[indx]:
            if type(each) == list or type(each) ==np.ndarray:
                continue
            if type(each) == tuple:
                continue
            if type(each) == np.complex128:
                continue            
            lst.append(each)
        list_of_data[indx] = lst
g = 0
def transform():
    for indx in range(len(list_of_data)):
        lst = []
        for each in list_of_data[indx]:
            if type(each) == list or type(each) ==np.ndarray:
                if(len(each) == 0):
                    continue
                else:
                    lst.append(np.max(each))
                    lst.append(np.min(each))
                    lst.append(np.mean(each))
                    lst.append(np.argmax(each))
                    lst.append(np.argmin(each))
                    lst.append(int(stats.mode(each).mode))
                    continue
            if type(each) == tuple:
                rl,bl = each
                each = rl
                
            if type(each) == np.complex128:
                each = each.real
                
            lst.append(each)
        list_of_data[indx] = lst

In [439]:
base2 = list_of_data.copy()

In [441]:
list_of_data = base2.copy()

In [442]:
transform()
transform()

In [443]:
df = pd.DataFrame(data = list_of_data)

In [444]:
df.dropna(inplace=True)

In [446]:
df.to_csv("ExtractedFeatures2.csv")