In [1]:
import librosa
import os
import pandas as pd
import numpy as np

# Set current working directory to parent folder
os.chdir(os.path.abspath(".."))

In [2]:
# Lee el archivo que identifica como los audios estan divididos
df_conv = pd.read_csv(f"data/MSPCORPUS/Time_Labels/conversation_parts.txt", delimiter=";", header=None, names=['Conversation_Part', 'start_time', 'end_time'])

# Creamos dos columnas con el podcast number y la parte de ese podcast
df_conv['PC_Num'] = df_conv['Conversation_Part'].apply(lambda x: x[17:21]).astype(int)
df_conv['Part_Num'] = df_conv['Conversation_Part'].apply(lambda x: x[22:23]).astype(int)

mem = {}

def add_sync_time_columns(row):
    if row.Part_Num == 1:
        st = 0
        mem[row.PC_Num] = row.start_time
    else:
        st = row.start_time - mem[row.PC_Num]
        
    row['m_start_time'] = st
    row['m_end_time'] = row.end_time - mem[row.PC_Num]
    
    return row

# Una fila con los audios en formato inicio: 0 y final: final - inicio
df_conv = df_conv.apply(lambda row: add_sync_time_columns(row), axis=1)
df_conv = df_conv[['Conversation_Part','start_time', 'end_time', 'm_start_time', 'm_end_time', 'PC_Num', 'Part_Num']]
df_conv['Audio_Name'] = df_conv['Conversation_Part'].apply(lambda x: x[0:21]) + ".wav"

In [3]:
df_annotations = pd.read_excel(f'data/annotations.xlsx')

In [4]:
df_annotations_2 = pd.merge(left = df_annotations[['Name','Emotion','Annotator','PC_Num','Part_Num']], 
                                 right = df_conv[['start_time','end_time','m_start_time','m_end_time','Audio_Name','PC_Num','Part_Num']],
                                 how = 'left', 
                                 on = ['PC_Num','Part_Num'])

In [5]:
df_annotations_2['start_time'] = df_annotations_2['m_start_time']
df_annotations_2['end_time'] = df_annotations_2['m_end_time']

df_annotations_2.drop(['m_start_time','m_end_time'], axis = 1, inplace = True)

In [6]:
df_annotations_2.to_excel('annotations_2.xlsx', index = False)