Skip to content

Commit

Permalink
Separate load_intervals and load_labeled_intervals and refactored loa…
Browse files Browse the repository at this point in the history
…d_time series for #35 and #67.  Any code which uses load_intervals will need to be updated
  • Loading branch information
craffel committed Jul 25, 2014
1 parent 0a9ffa2 commit 7d805ab
Showing 1 changed file with 74 additions and 110 deletions.
184 changes: 74 additions & 110 deletions mir_eval/input_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import numpy as np
import re
import os
import warnings

from . import util
Expand Down Expand Up @@ -131,7 +130,6 @@ def load_labeled_events(filename, delimiter=r'\s+'):
- labels : list of str
list of labels
'''
# Use our universal function to load in the events
events, labels = load_delimited(filename, [float, str], delimiter)
Expand All @@ -145,129 +143,95 @@ def load_labeled_events(filename, delimiter=r'\s+'):
return events, labels


def load_intervals(filename, delimiter=r'\s+',
converter=None, label_prefix='__'):
r'''Import labeled intervals from an annotation file. This is primarily
useful for processing events which span a duration, such as
segmentation, chords, or instrument activation.
The annotation file may be either of two formats:
- Double-column. Each line contains two values, separated by
``delimiter``, corresponding to the start and end time annotated
event.
- Triple-column. Each line contains three values, separated by
``delimiter``. The first two values specify the start and end
times, the last value specifies the label for the event (e.g.
"Verse" or "A:min").
:parameters:
- filename : str
Path to the annotation file
- delimiter : str
Separator regular expression.
By default, lines will be split by any amount of whitespace
('\s+')
- converter : function
Function to convert time-stamp data into numerics. Defaults to
float().
- label_prefix : str
String to append to any synthetically generated labels
:returns:
- event_times : np.ndarray, shape=(n_events, 2)
array of event start and end times
- event_labels : list of str
list of corresponding event labels
'''

if converter is None:
converter = float

times = []
labels = []

splitter = re.compile(delimiter)

with open(filename, 'r') as input_file:
for row, line in enumerate(input_file, 1):
data = splitter.split(line.strip(), 2)

if len(data) == 2:
times.append([converter(data[0]), converter(data[1])])
labels.append('%s%d' % (label_prefix, row))

elif len(data) == 3:
times.append([converter(data[0]), converter(data[1])])
labels.append(data[2])

else:
raise ValueError('parse error %s:%d:\n%s' %
(filename, row, line))
def load_intervals(filename, delimiter=r'\s+'):
r'''
Import intervals from an annotation file. The file should consist of two
columns of numeric values corresponding to start and end time of each
interval. This is primarily useful for processing events which span a
duration, such as segmentation, chords, or instrument activation.
times = np.asarray(times)
:parameters:
- filename : str
Path to the annotation file
- delimiter : str
Separator regular expression.
By default, lines will be split by any amount of whitespace ('\s+')
:returns:
- intervals : np.ndarray, shape=(n_events, 2)
array of event start and end times
'''
# Use our universal function to load in the events
starts, ends = load_delimited(filename, [float, float], delimiter)
# Stack into an interval matrix
intervals = np.array([starts, ends])
# Validate them, but throw a warning in place of an error
try:
util.validate_intervals(times)
util.validate_intervals(intervals)
except ValueError as error:
warnings.warn(error.args[0])

return times, labels
return intervals


def load_time_series(filename, delimiter=None):
r'''Import a time series from an annotation file. This is primarily useful
for processing dense time series with timestamps and corresponding
numeric values
def load_labeled_intervals(filename, delimiter=r'\s+'):
r'''
Import labeled intervals from an annotation file. The file should consist
of three columns: Two consisting of numeric values corresponding to start
and end time of each interval and a third corresponding to the label of
each interval. This is primarily useful for processing events which span a
duration, such as segmentation, chords, or instrument activation.
The annotation file must be of the following format:
- Double-column. Each line contains two values, separated by
``delimiter``: the first contains the timestamp, and the second
contains its corresponding numeric value.
:parameters:
- filename : str
Path to the annotation file
- delimiter : str
Separator regular expression.
By default, lines will be split by any amount of whitespace ('\s+')
:parameters:
- filename : str
Path to the annotation file
:returns:
- intervals : np.ndarray, shape=(n_events, 2)
array of event start and end time
- labels : list of str
list of labels
'''
# Use our universal function to load in the events
starts, ends, labels = load_delimited(filename, [float, float, str],
delimiter)
# Stack into an interval matrix
intervals = np.array([starts, ends])
# Validate them, but throw a warning in place of an error
try:
util.validate_intervals(intervals)
except ValueError as error:
warnings.warn(error.args[0])

- delimiter : str
Column separator. By default, lines will be split by any amount
of whitespace, unless the file ending is .csv, in which case a
comma ',' is used as the delimiter.
return intervals, labels

:returns:
- times : np.ndarray
array of timestamps (float)
- values : np.ndarray
array of corresponding numeric values (float)
'''

# Note: unlike load_events, here we expect float data in both columns,
# so we can just use numpy's text load (np.loadtxt)
def load_time_series(filename, delimiter=r'\s+'):
r'''
Import a time series from an annotation file. The file should consist of
two columns of numeric values corresponding to the time and value of each
sample of the time series.
if os.path.splitext(filename)[1] == '.csv':
delimiter = ','
:parameters:
- filename : str
Path to the annotation file
- delimiter : str
Separator regular expression.
By default, lines will be split by any amount of whitespace ('\s+')
try:
data = np.loadtxt(filename, 'float', '#', delimiter)
except ValueError:
raise ValueError('Error: could no load %s, please check if it is '
'in the correct 2 column format'
% os.path.basename(filename))

data = data.T

# we do however want to make sure the data is in the right format!
if data.shape[0] != 2:
raise ValueError('Error: %s should be of dimension (2,x), but is '
'of dimension %s'
% (os.path.basename(filename), data.shape))

times = data[0]
values = data[1]
:returns:
- times : np.ndarray
array of timestamps (float)
- values : np.ndarray
array of corresponding numeric values (float)
'''
# Use our universal function to load in the events
times, values = load_delimited(filename, [float, float], delimiter)
times = np.array(times)
values = np.array(values)

return times, values

Expand Down

0 comments on commit 7d805ab

Please sign in to comment.