In [1]:
# Mandatorio al inicio de cada notebook
import os
import warnings
os.chdir(os.path.abspath(".."))
warnings.filterwarnings('ignore')

In [2]:
# Imports genéricos
import stable_whisper
import pandas as pd
import soundfile as sf
import ast

# Archivos y dirs importantes
df_annotations = pd.read_excel('data/annotations.xlsx')
dir_audios = 'data/MSPCORPUS/Audio'
dir_trans = 'data/TRANSCRIPCIONES'

In [3]:
# Creación del diccionario "trans_dict" que contiene la separación de segmentos de todos los audios
audios_name = df_annotations['Audio_Name'].unique()
trans_dict = {}

for audio_name in audios_name:
    audio_name_json = audio_name + '.json'
    audio_json = stable_whisper.WhisperResult(f'{dir_trans}/{audio_name_json}')

    x = []
    for segment in audio_json:
        x.append((segment.start, segment.end))

    trans_dict[audio_name] = x

In [4]:
# Importación y definición del feature set a utilizar de opensmile
import opensmile

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

In [28]:
from src.futil import split_audio

processed_files = os.listdir('data/FEATURES/OPENSMILE')
processed_files = [i[:25] for i in processed_files]

audios_name = df_annotations['Audio_Name'].unique()

opensmile_exception = []
for audio_name in audios_name:
    
    if f'{audio_name}' not in processed_files:
        # Obtengo la data del audio para cada split
        audio_data = split_audio(f'{dir_audios}/{audio_name}', trans_dict[audio_name])
    
        try:      
            print('Processing Audio Name:', audio_name)
            audio_segments_data = audio_data[0]
            audio_features = pd.DataFrame()
            counter = 0
            
            for audio_segment_data in audio_segments_data:
                audio_features_temp = smile.process_signal(audio_segment_data, sampling_rate = 16000).reset_index(drop = True)
                audio_features_temp['segment'] = [trans_dict[audio_name][counter]]
                audio_features = pd.concat([audio_features_temp, audio_features], ignore_index = True)
                counter += 1      
            
            audio_features.to_csv(f'data/FEATURES/OPENSMILE/{audio_name}_features.csv', index = False)     
            print(audio_name, ' Procesado con éxito')     
            
        except:
            opensmile_exception.append(audio_name)
            with open('data/feature_exctraction_opensmile_exceptions.txt', 'w') as f:
                for exception in opensmile_exception:
                    f.write(f'{exception}\n')
    else:
        print('Skipping', audio_name)

Skipping MSP-Conversation_0002.wav
Skipping MSP-Conversation_0021.wav
Skipping MSP-Conversation_0023.wav
Skipping MSP-Conversation_0035.wav
Skipping MSP-Conversation_0043.wav
Skipping MSP-Conversation_0046.wav
Skipping MSP-Conversation_0047.wav
Skipping MSP-Conversation_0053.wav
Skipping MSP-Conversation_0054.wav
Skipping MSP-Conversation_0055.wav
Skipping MSP-Conversation_0061.wav
Skipping MSP-Conversation_0067.wav
Skipping MSP-Conversation_0079.wav
Skipping MSP-Conversation_0081.wav
Skipping MSP-Conversation_0083.wav
Skipping MSP-Conversation_0087.wav
Skipping MSP-Conversation_0088.wav
Skipping MSP-Conversation_0094.wav
Skipping MSP-Conversation_0101.wav
Skipping MSP-Conversation_0103.wav
Skipping MSP-Conversation_0110.wav
Skipping MSP-Conversation_0114.wav
Skipping MSP-Conversation_0125.wav
Skipping MSP-Conversation_0130.wav
Skipping MSP-Conversation_0135.wav
Skipping MSP-Conversation_0140.wav
Skipping MSP-Conversation_0147.wav
Skipping MSP-Conversation_0153.wav
Skipping MSP-Convers

MSP-Conversation_1512.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1523.wav
MSP-Conversation_1523.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1536.wav
MSP-Conversation_1536.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1540.wav
MSP-Conversation_1540.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1545.wav
MSP-Conversation_1545.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1560.wav
MSP-Conversation_1560.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1568.wav
MSP-Conversation_1568.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1599.wav
MSP-Conversation_1599.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1609.wav
MSP-Conversation_1609.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1621.wav
MSP-Conversation_1621.wav  Procesado con éxito
Processing Audio Name: MSP-Conversation_1630.wav
MSP-Conversation_1630.wav  Procesado con