In [2]:
import os
import json
import pandas as pd
import fnmatch
import audiofile
import opensmile
from pydub import AudioSegment
from tqdm import tqdm

In [3]:
data_path = './ABI_data/'
diary_path = data_path + 'diarization/'
lld_path = data_path + 'LLDs/'
audio_path = data_path + 'Audios/'

if not os.path.exists(lld_path):
    os.mkdir(lld_path)

series = os.listdir(diary_path)
series.sort()
series

['Adventures in Brain Injury by Calvin Balaster',
 'Between Two Brains by the Brain Injury Association',
 'Brain Injury Connector',
 'Brain Injury Conversations by Cynthia Burke',
 'Brain Injury Today',
 'Brainbank by Synapse',
 'Dandelion Roots Making Connections',
 'Expressions of Life With an Acquired Brain Injury',
 'Hope Survives Brain Injury Podcast by Christabell Braden',
 'Life After Brain Injury by Will and Amy Perringwood',
 'Noggins and Neurons']

In [4]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,

    num_workers=4
)

In [5]:
min_length = 1.0

for s in series:
    output_path = lld_path + s + '/'
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    
    episodes = fnmatch.filter(os.listdir(diary_path+s+'/'), '*_cleaned.json')
    for episode in tqdm(episodes):
        f = open(diary_path + s + '/' + episode)
        data = json.load(f)
        
        inst = episode.split('.')[0].split('_')[0]
        audio_file_path = audio_path + s + '/' + inst + '.wav'
#         signal, sample_rate = audiofile.read(audio_file_path, always_2d=True)
        
        for key in data:
            segment = data[key]
            start = segment['start']
            end = segment['end']
            if end - start < min_length:
                continue
            extracted = smile.process_file(audio_file_path, start=start, end=end)
            extracted.to_csv(output_path + key + '.csv')
            

100%|████████████████████████████████████████| 56/56 [2:11:46<00:00, 141.19s/it]
100%|███████████████████████████████████████████| 13/13 [19:07<00:00, 88.25s/it]
100%|██████████████████████████████████████████| 16/16 [29:11<00:00, 109.44s/it]
100%|████████████████████████████████████████████| 7/7 [13:13<00:00, 113.30s/it]
100%|███████████████████████████████████████████| 26/26 [32:50<00:00, 75.77s/it]
100%|█████████████████████████████████████████████| 6/6 [04:47<00:00, 47.89s/it]
100%|███████████████████████████████████████████| 21/21 [08:44<00:00, 24.97s/it]
100%|█████████████████████████████████████████████| 5/5 [01:36<00:00, 19.21s/it]
100%|███████████████████████████████████████████| 18/18 [28:34<00:00, 95.25s/it]
100%|███████████████████████████████████████████| 13/13 [05:49<00:00, 26.88s/it]
100%|████████████████████████████████████████| 62/62 [2:29:50<00:00, 145.00s/it]


In [42]:
episode

'Adventures in Brain Injury - Ep 8_cleaned.json'