Imports

In [1]:
import librosa
import os
import pandas as pd
import subprocess

# Annotation Files
Load segment files to find timestamps

In [2]:
def load_file(parent_dir, file_name):
    df = pd.read_csv(os.path.join(parent_dir, file_name))
    df['source'] = file_name.replace('.csv', '')
    return df

Let's find the start/end times

In [3]:
annot_path = '../data/raw/SegLabelHard/SegLabelHard'
annot_names = os.listdir(annot_path)
annotations = pd.concat([load_file(annot_path, f) for f in annot_names])
start_end = annotations.groupby('source').agg({'Start_Time': min, 'End_Time': max})

# MP3 files

In [4]:
def duration(mp3_path, mp3_name):
    args=("ffprobe","-show_entries", "format=duration","-i",
          os.path.join(mp3_path, mp3_name))
    popen = subprocess.Popen(args, stdout = subprocess.PIPE)
    popen.wait()
    output = popen.stdout.read()
    _, length = output.decode().split('\r\n')[1].split('=')
    return (mp3_name.replace('.mp3', ''), float(length))

In [5]:
mp3_path = '../data/raw/CAL500_32kps/'
mp3_names = os.listdir(mp3_path)

durations = [duration(mp3_path, f) for f in mp3_names]
song_durations = pd.DataFrame(durations, columns=['source', 'mp3_length'])
song_durations = song_durations.set_index('source')

# Comparison

In [7]:
merged = start_end.merge(song_durations, left_index=True, right_index=True)
merged = merged[(merged.End_Time <= merged.mp3_length)]
merged['full_duration'] = merged['End_Time'] - merged['Start_Time']
merged = merged[['full_duration']]

Segments

In [8]:
segments = (annotations.set_index('source')[['Start_Time', 'End_Time']]
                       .merge(merged, left_index=True, right_index=True))
segments['segment_duration'] = segments['End_Time'] - segments['Start_Time']
segments = segments.reset_index().sort_values(['source', 'Start_Time'])
segments['order'] = segments.groupby('source').cumcount()

Output

In [29]:
cols = ["source","Start_Time","End_Time","full_duration","segment_duration", "order"]
segments[cols].to_csv('./../data/raw/song_segment_times.csv', index=False)