From 7d805ab9d26c74db2319658f1ee1d0235f092845 Mon Sep 17 00:00:00 2001 From: Colin Raffel Date: Fri, 25 Jul 2014 12:06:56 -0400 Subject: [PATCH] Separate load_intervals and load_labeled_intervals and refactored load_time series for #35 and #67. Any code which uses load_intervals will need to be updated --- mir_eval/input_output.py | 184 ++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 110 deletions(-) diff --git a/mir_eval/input_output.py b/mir_eval/input_output.py index 2eb908c4..9d599979 100644 --- a/mir_eval/input_output.py +++ b/mir_eval/input_output.py @@ -2,7 +2,6 @@ import numpy as np import re -import os import warnings from . import util @@ -131,7 +130,6 @@ def load_labeled_events(filename, delimiter=r'\s+'): - labels : list of str list of labels - ''' # Use our universal function to load in the events events, labels = load_delimited(filename, [float, str], delimiter) @@ -145,129 +143,95 @@ def load_labeled_events(filename, delimiter=r'\s+'): return events, labels -def load_intervals(filename, delimiter=r'\s+', - converter=None, label_prefix='__'): - r'''Import labeled intervals from an annotation file. This is primarily - useful for processing events which span a duration, such as - segmentation, chords, or instrument activation. - - The annotation file may be either of two formats: - - Double-column. Each line contains two values, separated by - ``delimiter``, corresponding to the start and end time annotated - event. - - - Triple-column. Each line contains three values, separated by - ``delimiter``. The first two values specify the start and end - times, the last value specifies the label for the event (e.g. - "Verse" or "A:min"). - - :parameters: - - filename : str - Path to the annotation file - - - delimiter : str - Separator regular expression. - By default, lines will be split by any amount of whitespace - ('\s+') - - - converter : function - Function to convert time-stamp data into numerics. Defaults to - float(). - - - label_prefix : str - String to append to any synthetically generated labels - - :returns: - - event_times : np.ndarray, shape=(n_events, 2) - array of event start and end times - - - event_labels : list of str - list of corresponding event labels - ''' - - if converter is None: - converter = float - - times = [] - labels = [] - - splitter = re.compile(delimiter) - - with open(filename, 'r') as input_file: - for row, line in enumerate(input_file, 1): - data = splitter.split(line.strip(), 2) - - if len(data) == 2: - times.append([converter(data[0]), converter(data[1])]) - labels.append('%s%d' % (label_prefix, row)) - - elif len(data) == 3: - times.append([converter(data[0]), converter(data[1])]) - labels.append(data[2]) - - else: - raise ValueError('parse error %s:%d:\n%s' % - (filename, row, line)) +def load_intervals(filename, delimiter=r'\s+'): + r''' + Import intervals from an annotation file. The file should consist of two + columns of numeric values corresponding to start and end time of each + interval. This is primarily useful for processing events which span a + duration, such as segmentation, chords, or instrument activation. - times = np.asarray(times) + :parameters: + - filename : str + Path to the annotation file + - delimiter : str + Separator regular expression. + By default, lines will be split by any amount of whitespace ('\s+') + :returns: + - intervals : np.ndarray, shape=(n_events, 2) + array of event start and end times + ''' + # Use our universal function to load in the events + starts, ends = load_delimited(filename, [float, float], delimiter) + # Stack into an interval matrix + intervals = np.array([starts, ends]) + # Validate them, but throw a warning in place of an error try: - util.validate_intervals(times) + util.validate_intervals(intervals) except ValueError as error: warnings.warn(error.args[0]) - return times, labels + return intervals -def load_time_series(filename, delimiter=None): - r'''Import a time series from an annotation file. This is primarily useful - for processing dense time series with timestamps and corresponding - numeric values +def load_labeled_intervals(filename, delimiter=r'\s+'): + r''' + Import labeled intervals from an annotation file. The file should consist + of three columns: Two consisting of numeric values corresponding to start + and end time of each interval and a third corresponding to the label of + each interval. This is primarily useful for processing events which span a + duration, such as segmentation, chords, or instrument activation. - The annotation file must be of the following format: - - Double-column. Each line contains two values, separated by - ``delimiter``: the first contains the timestamp, and the second - contains its corresponding numeric value. + :parameters: + - filename : str + Path to the annotation file + - delimiter : str + Separator regular expression. + By default, lines will be split by any amount of whitespace ('\s+') - :parameters: - - filename : str - Path to the annotation file + :returns: + - intervals : np.ndarray, shape=(n_events, 2) + array of event start and end time + - labels : list of str + list of labels + ''' + # Use our universal function to load in the events + starts, ends, labels = load_delimited(filename, [float, float, str], + delimiter) + # Stack into an interval matrix + intervals = np.array([starts, ends]) + # Validate them, but throw a warning in place of an error + try: + util.validate_intervals(intervals) + except ValueError as error: + warnings.warn(error.args[0]) - - delimiter : str - Column separator. By default, lines will be split by any amount - of whitespace, unless the file ending is .csv, in which case a - comma ',' is used as the delimiter. + return intervals, labels - :returns: - - times : np.ndarray - array of timestamps (float) - - values : np.ndarray - array of corresponding numeric values (float) - ''' - # Note: unlike load_events, here we expect float data in both columns, - # so we can just use numpy's text load (np.loadtxt) +def load_time_series(filename, delimiter=r'\s+'): + r''' + Import a time series from an annotation file. The file should consist of + two columns of numeric values corresponding to the time and value of each + sample of the time series. - if os.path.splitext(filename)[1] == '.csv': - delimiter = ',' + :parameters: + - filename : str + Path to the annotation file + - delimiter : str + Separator regular expression. + By default, lines will be split by any amount of whitespace ('\s+') - try: - data = np.loadtxt(filename, 'float', '#', delimiter) - except ValueError: - raise ValueError('Error: could no load %s, please check if it is ' - 'in the correct 2 column format' - % os.path.basename(filename)) - - data = data.T - - # we do however want to make sure the data is in the right format! - if data.shape[0] != 2: - raise ValueError('Error: %s should be of dimension (2,x), but is ' - 'of dimension %s' - % (os.path.basename(filename), data.shape)) - - times = data[0] - values = data[1] + :returns: + - times : np.ndarray + array of timestamps (float) + - values : np.ndarray + array of corresponding numeric values (float) + ''' + # Use our universal function to load in the events + times, values = load_delimited(filename, [float, float], delimiter) + times = np.array(times) + values = np.array(values) return times, values