From 7d805ab9d26c74db2319658f1ee1d0235f092845 Mon Sep 17 00:00:00 2001
From: Colin Raffel <craffel@gmail.com>
Date: Fri, 25 Jul 2014 12:06:56 -0400
Subject: [PATCH] Separate load_intervals and load_labeled_intervals and
 refactored load_time series for #35 and #67.  Any code which uses
 load_intervals will need to be updated

---
 mir_eval/input_output.py | 184 ++++++++++++++++-----------------------
 1 file changed, 74 insertions(+), 110 deletions(-)

diff --git a/mir_eval/input_output.py b/mir_eval/input_output.py
index 2eb908c4..9d599979 100644
--- a/mir_eval/input_output.py
+++ b/mir_eval/input_output.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import re
-import os
 import warnings
 
 from . import util
@@ -131,7 +130,6 @@ def load_labeled_events(filename, delimiter=r'\s+'):
 
         - labels : list of str
             list of labels
-
     '''
     # Use our universal function to load in the events
     events, labels = load_delimited(filename, [float, str], delimiter)
@@ -145,129 +143,95 @@ def load_labeled_events(filename, delimiter=r'\s+'):
     return events, labels
 
 
-def load_intervals(filename, delimiter=r'\s+',
-                   converter=None, label_prefix='__'):
-    r'''Import labeled intervals from an annotation file.  This is primarily
-        useful for processing events which span a duration, such as
-        segmentation, chords, or instrument activation.
-
-        The annotation file may be either of two formats:
-         - Double-column.  Each line contains two values, separated by
-           ``delimiter``, corresponding to the start and end time annotated
-           event.
-
-         - Triple-column.  Each line contains three values, separated by
-           ``delimiter``.  The first two values specify the start and end
-           times, the last value specifies the label for the event (e.g.
-           "Verse" or "A:min").
-
-        :parameters:
-          - filename : str
-              Path to the annotation file
-
-          - delimiter : str
-              Separator regular expression.
-              By default, lines will be split by any amount of whitespace
-              ('\s+')
-
-          - converter : function
-              Function to convert time-stamp data into numerics. Defaults to
-              float().
-
-          - label_prefix : str
-              String to append to any synthetically generated labels
-
-        :returns:
-          - event_times : np.ndarray, shape=(n_events, 2)
-              array of event start and end times
-
-          - event_labels : list of str
-              list of corresponding event labels
-        '''
-
-    if converter is None:
-        converter = float
-
-    times = []
-    labels = []
-
-    splitter = re.compile(delimiter)
-
-    with open(filename, 'r') as input_file:
-        for row, line in enumerate(input_file, 1):
-            data = splitter.split(line.strip(), 2)
-
-            if len(data) == 2:
-                times.append([converter(data[0]), converter(data[1])])
-                labels.append('%s%d' % (label_prefix, row))
-
-            elif len(data) == 3:
-                times.append([converter(data[0]), converter(data[1])])
-                labels.append(data[2])
-
-            else:
-                raise ValueError('parse error %s:%d:\n%s' %
-                                 (filename, row, line))
+def load_intervals(filename, delimiter=r'\s+'):
+    r'''
+    Import intervals from an annotation file.  The file should consist of two
+    columns of numeric values corresponding to start and end time of each
+    interval.  This is primarily useful for processing events which span a
+    duration, such as segmentation, chords, or instrument activation.
 
-    times = np.asarray(times)
+    :parameters:
+        - filename : str
+            Path to the annotation file
+        - delimiter : str
+            Separator regular expression.
+            By default, lines will be split by any amount of whitespace ('\s+')
 
+    :returns:
+        - intervals : np.ndarray, shape=(n_events, 2)
+            array of event start and end times
+    '''
+    # Use our universal function to load in the events
+    starts, ends = load_delimited(filename, [float, float], delimiter)
+    # Stack into an interval matrix
+    intervals = np.array([starts, ends])
+    # Validate them, but throw a warning in place of an error
     try:
-        util.validate_intervals(times)
+        util.validate_intervals(intervals)
     except ValueError as error:
         warnings.warn(error.args[0])
 
-    return times, labels
+    return intervals
 
 
-def load_time_series(filename, delimiter=None):
-    r'''Import a time series from an annotation file.  This is primarily useful
-        for processing dense time series with timestamps and corresponding
-        numeric values
+def load_labeled_intervals(filename, delimiter=r'\s+'):
+    r'''
+    Import labeled intervals from an annotation file.  The file should consist
+    of three columns: Two consisting of numeric values corresponding to start
+    and end time of each interval and a third corresponding to the label of
+    each interval.  This is primarily useful for processing events which span a
+    duration, such as segmentation, chords, or instrument activation.
 
-        The annotation file must be of the following format:
-          - Double-column.  Each line contains two values, separated by
-            ``delimiter``: the first contains the timestamp, and the second
-            contains its corresponding numeric value.
+    :parameters:
+        - filename : str
+            Path to the annotation file
+        - delimiter : str
+            Separator regular expression.
+            By default, lines will be split by any amount of whitespace ('\s+')
 
-        :parameters:
-          - filename : str
-              Path to the annotation file
+    :returns:
+        - intervals : np.ndarray, shape=(n_events, 2)
+            array of event start and end time
+        - labels : list of str
+            list of labels
+    '''
+    # Use our universal function to load in the events
+    starts, ends, labels = load_delimited(filename, [float, float, str],
+                                          delimiter)
+    # Stack into an interval matrix
+    intervals = np.array([starts, ends])
+    # Validate them, but throw a warning in place of an error
+    try:
+        util.validate_intervals(intervals)
+    except ValueError as error:
+        warnings.warn(error.args[0])
 
-          - delimiter : str
-              Column separator. By default, lines will be split by any amount
-              of whitespace, unless the file ending is .csv, in which case a
-              comma ',' is used as the delimiter.
+    return intervals, labels
 
-        :returns:
-          - times : np.ndarray
-              array of timestamps (float)
-          - values : np.ndarray
-              array of corresponding numeric values (float)
-        '''
 
-    # Note: unlike load_events, here we expect float data in both columns,
-    # so we can just use numpy's text load (np.loadtxt)
+def load_time_series(filename, delimiter=r'\s+'):
+    r'''
+    Import a time series from an annotation file.  The file should consist of
+    two columns of numeric values corresponding to the time and value of each
+    sample of the time series.
 
-    if os.path.splitext(filename)[1] == '.csv':
-        delimiter = ','
+    :parameters:
+        - filename : str
+            Path to the annotation file
+        - delimiter : str
+            Separator regular expression.
+            By default, lines will be split by any amount of whitespace ('\s+')
 
-    try:
-        data = np.loadtxt(filename, 'float', '#', delimiter)
-    except ValueError:
-        raise ValueError('Error: could no load %s, please check if it is '
-                         'in the correct 2 column format'
-                         % os.path.basename(filename))
-
-    data = data.T
-
-    # we do however want to make sure the data is in the right format!
-    if data.shape[0] != 2:
-        raise ValueError('Error: %s should be of dimension (2,x), but is '
-                         'of dimension %s'
-                         % (os.path.basename(filename), data.shape))
-
-    times = data[0]
-    values = data[1]
+    :returns:
+        - times : np.ndarray
+            array of timestamps (float)
+        - values : np.ndarray
+            array of corresponding numeric values (float)
+    '''
+    # Use our universal function to load in the events
+    times, values = load_delimited(filename, [float, float], delimiter)
+    times = np.array(times)
+    values = np.array(values)
 
     return times, values