# Extract labels from the evaluation files

Test for one file first

In [1]:
import re

# first test with one file
file_path = '/git/datasets/IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Ses01F_impro01.txt'

In [2]:
useful_regex = re.compile(r'\[.+\]\n', re.IGNORECASE)

In [3]:
with open(file_path) as f:
    file_content = f.read()
    
info_lines = re.findall(useful_regex, file_content)

In [4]:
for l in info_lines[1:10]:
    print(l.strip().split('\t'))

['[6.2901 - 8.2357]', 'Ses01F_impro01_F000', 'neu', '[2.5000, 2.5000, 2.5000]']
['[10.0100 - 11.3925]', 'Ses01F_impro01_F001', 'neu', '[2.5000, 2.5000, 2.5000]']
['[14.8872 - 18.0175]', 'Ses01F_impro01_F002', 'neu', '[2.5000, 2.5000, 2.5000]']
['[19.2900 - 20.7875]', 'Ses01F_impro01_F003', 'xxx', '[2.5000, 3.0000, 3.0000]']
['[21.3257 - 24.7400]', 'Ses01F_impro01_F004', 'xxx', '[2.5000, 3.0000, 2.5000]']
['[27.4600 - 31.4900]', 'Ses01F_impro01_F005', 'neu', '[2.5000, 3.5000, 2.0000]']
['[38.9650 - 43.5900]', 'Ses01F_impro01_F006', 'fru', '[2.0000, 3.5000, 3.5000]']
['[46.5800 - 52.1900]', 'Ses01F_impro01_F007', 'fru', '[2.5000, 3.5000, 3.5000]']
['[56.1600 - 58.8225]', 'Ses01F_impro01_F008', 'fru', '[2.0000, 3.5000, 3.5000]']


## Compile all the information in a single file

In [5]:
import re
import os


info_line = re.compile(r'\[.+\]\n', re.IGNORECASE)

start_times, end_times, wav_file_names, emotions, vals, acts, doms = [], [], [], [], [], [], []

for sess in range(1, 6):
    emo_evaluation_dir = '/git/datasets/IEMOCAP_full_release/Session{}/dialog/EmoEvaluation/'.format(sess)
    evaluation_files = [l for l in os.listdir(emo_evaluation_dir) if 'Ses' in l]
    for file in evaluation_files:
        with open(emo_evaluation_dir + file,errors="replace") as f:
            content = f.read()
        info_lines = re.findall(info_line, content)
        for line in info_lines[1:]:  # the first line is a header
            start_end_time, wav_file_name, emotion, val_act_dom = line.strip().split('\t')
            start_time, end_time = start_end_time[1:-1].split('-')
            val, act, dom = val_act_dom[1:-1].split(',')
            val, act, dom = float(val), float(act), float(dom)
            start_time, end_time = float(start_time), float(end_time)
            start_times.append(start_time)
            end_times.append(end_time)
            wav_file_names.append(wav_file_name)
            emotions.append(emotion)
            vals.append(val)
            acts.append(act)
            doms.append(dom)

In [6]:
import pandas as pd

df_iemocap = pd.DataFrame(columns=['start_time', 'end_time', 'wav_file', 'emotion', 'val', 'act', 'dom'])

df_iemocap['start_time'] = start_times
df_iemocap['end_time'] = end_times
df_iemocap['wav_file'] = wav_file_names
df_iemocap['emotion'] = emotions
df_iemocap['val'] = vals
df_iemocap['act'] = acts
df_iemocap['dom'] = doms

df_iemocap.tail()

Unnamed: 0,start_time,end_time,wav_file,emotion,val,act,dom
10034,258.36,260.12,Ses05F_impro03_M064,hap,4.0,3.0,3.0
10035,260.15,263.98,Ses05F_impro03_M065,hap,4.5,4.5,4.5
10036,264.0,265.55,Ses05F_impro03_M066,hap,4.0,3.5,3.5
10037,267.07,269.23,Ses05F_impro03_M067,hap,4.0,3.0,3.5
10038,269.27,271.59,Ses05F_impro03_M068,hap,4.0,3.5,4.0


In [8]:
df_iemocap.to_csv('data/pre-processed/df_iemocap.csv', index=False)

In [9]:
print(df_iemocap)

       start_time  end_time             wav_file emotion  val  act  dom
0          7.6300    8.5700  Ses01M_impro07_F000     neu  4.0  2.0  2.5
1         13.9500   21.1200  Ses01M_impro07_F001     exc  4.5  4.0  3.0
2         23.8800   25.2500  Ses01M_impro07_F002     xxx  3.5  3.0  2.0
3         27.0600   29.8100  Ses01M_impro07_F003     exc  3.5  3.0  3.0
4         30.3100   33.0800  Ses01M_impro07_F004     exc  4.0  3.5  3.0
5         33.7200   37.2700  Ses01M_impro07_F005     exc  4.5  4.0  3.0
6         37.8800   42.5825  Ses01M_impro07_F006     exc  3.5  3.5  3.0
7         45.1450   49.2825  Ses01M_impro07_F007     exc  3.5  2.5  3.0
8         49.6400   52.1000  Ses01M_impro07_F008     exc  4.0  2.5  3.0
9         52.4600   57.4562  Ses01M_impro07_F009     exc  4.0  2.5  3.5
10        58.7800   60.2100  Ses01M_impro07_F010     neu  3.0  2.0  2.0
11        60.8100   66.7700  Ses01M_impro07_F011     exc  3.0  2.5  2.5
12        68.6400   72.3500  Ses01M_impro07_F012     neu  4.0  3