# Per Question

In [None]:
import pandas as pd
import opensmile
import os
import audiosegment
from pydub import AudioSegment
import soundfile as sf
import tqdm
import argparse
import audiofile
import audtorch
import json
import glob
import torch
import torchaudio

In [None]:
# Define cell parameters
src_default = '../data/cropped_data/cropped_interview_data/per_question'
src_windowed_default = '../data/cropped_data/cropped_interview_data/per_question/windowed_2000_500'
dst_default = '../data/features/features_interview/question_opensmile.csv'
dst_windowed_default = '../data/features/features_interview/windowed_opensmile.csv'
model_default = 'opensmile'
device_default = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
feature_set_default = 'eGeMAPSv02'

# Define a function to parse cell parameters
def parse_cell_parameters(src=src_default, src_windowed=src_windowed_default, dst=dst_default, dst_windowed=dst_windowed_default, model=model_default, device=device_default, feature_set=feature_set_default):
    return {
        'src': src,
        'src_windowed': src_windowed,
        'dst': dst,
        'dst_windowed': dst_windowed,
        'model': model,
        'device': device,
        'feature_set': feature_set
    }

# Parse cell parameters
parameters = parse_cell_parameters()

In [None]:
dst = parameters['dst']
if os.path.isfile(dst):
    exit()
os.makedirs(os.path.dirname(dst), exist_ok=True)

files = glob.glob(os.path.join(parameters['src'], '*.wav'))

In [None]:
def extract_audio_file_features(audio, feature_set, feature_level):
    if feature_set=='eGeMAPSv02':
        if feature_level=='Functionals':
            smile = opensmile.Smile(
                feature_set=opensmile.FeatureSet.eGeMAPSv02,
                feature_level=opensmile.FeatureLevel.Functionals,
            )
        elif feature_level=='LLD':
            smile = opensmile.Smile(
                feature_set=opensmile.FeatureSet.eGeMAPSv02,
                feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
            )
    elif feature_set=='ComParE_2016':
        if feature_level=='Functionals':
            smile = opensmile.Smile(
                feature_set=opensmile.FeatureSet.ComParE_2016,
                feature_level=opensmile.FeatureLevel.Functionals,
            )
        elif feature_level=='LLD':
            smile = opensmile.Smile(
                feature_set=opensmile.FeatureSet.ComParE_2016,
                feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
            )

    y = smile.process_file(audio)
    return pd.DataFrame(y)

def extract_opensmile_features(audio_path, feature_set):
    feature_df = extract_audio_file_features(audio_path, feature_set=feature_set, feature_level="Functionals")
    feature_df.insert(0,'file', audio_path)
    return feature_df

In [None]:
features_list = []
for counter, (file) in tqdm.tqdm(
    enumerate(files), 
    total=len(files), 
    desc=parameters['model']
):
    audio, fs = audiofile.read(
        file,
        always_2d=True
    )
    audio = audtorch.transforms.Expand(4000)(audio)
    audio = torch.from_numpy(audio)
    if fs != 16000:
        audio = torchaudio.transforms.Resample(fs, 16000)(audio)
    if len(audio.shape) == 2:
        audio = audio.mean(0)
        
    features = extract_opensmile_features(audio_path=file, feature_set=parameters['feature_set'])
    features_list.append(features)    

In [None]:
features_df = pd.concat(features_list)
features_df.to_csv(os.path.join(parameters['dst']),index=False)
print(features_df)

# With windowing

In [None]:
dst_windowed = parameters['dst_windowed']
if os.path.isfile(dst_windowed):
    exit()
os.makedirs(os.path.dirname(dst_windowed), exist_ok=True)

files = glob.glob(os.path.join(parameters['src_windowed'], '*.wav'))

In [None]:
features_list = []
for counter, (file) in tqdm.tqdm(
    enumerate(files), 
    total=len(files), 
    desc=parameters['model']
):
    audio, fs = audiofile.read(
        file,
        always_2d=True
    )
    audio = audtorch.transforms.Expand(4000)(audio)
    audio = torch.from_numpy(audio)
    if fs != 16000:
        audio = torchaudio.transforms.Resample(fs, 16000)(audio)
    if len(audio.shape) == 2:
        audio = audio.mean(0)
        
    features = extract_opensmile_features(audio_path=file, feature_set=parameters['feature_set'])
    features_list.append(features)  

In [None]:
features_df = pd.concat(features_list)
features_df.to_csv(os.path.join(parameters['dst_windowed']),index=False)
print(features_df)