Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow unicode filenames in Python 2 #119

Merged
merged 4 commits into from
Apr 9, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 134 additions & 77 deletions soundfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@

import numpy as _np
import os as _os
import sys as _sys
from cffi import FFI as _FFI
from os import SEEK_SET, SEEK_CUR, SEEK_END

try:
_unicode = unicode # doesn't exist in Python 3.x
except NameError:
_unicode = str

_ffi = _FFI()
_ffi.cdef("""
enum
Expand Down Expand Up @@ -515,7 +521,11 @@ def default_subtype(format):
'DOUBLE'

"""
return _default_subtypes.get(str(format).upper())
try:
format = format.upper()
except AttributeError:
pass
return _default_subtypes.get(format)


class SoundFile(object):
Expand Down Expand Up @@ -619,78 +629,19 @@ def __init__(self, file, mode='r', samplerate=None, channels=None,
>>> assert myfile.closed

"""
self._name = file
if isinstance(file, _unicode):
file = file.encode(_sys.getfilesystemencoding())
if mode is None:
mode = getattr(file, 'mode', None)
if not isinstance(mode, str):
raise TypeError("Invalid mode: {0!r}".format(mode))
modes = set(mode)
if modes.difference('xrwb+') or len(mode) > len(modes):
raise ValueError("Invalid mode: {0!r}".format(mode))
if len(modes.intersection('xrw')) != 1:
raise ValueError("mode must contain exactly one of 'xrw'")
mode_int = _check_mode(mode)
self._mode = mode

if '+' in modes:
mode_int = _snd.SFM_RDWR
elif 'r' in modes:
mode_int = _snd.SFM_READ
else:
mode_int = _snd.SFM_WRITE

old_fmt = format
self._name = file
if format is None:
extension = _os.path.splitext(str(getattr(file, 'name', file)))[-1]
format = extension.lstrip('.')
if format.upper() not in _formats and 'r' not in modes:
raise TypeError(
"No format specified and unable to get format from "
"file extension: {0!r}".format(file))

self._info = _ffi.new("SF_INFO*")
if 'r' not in modes or str(format).upper() == 'RAW':
if samplerate is None:
raise TypeError("samplerate must be specified")
self._info.samplerate = samplerate
if channels is None:
raise TypeError("channels must be specified")
self._info.channels = channels
self._info.format = _format_int(format, subtype, endian)
else:
if any(arg is not None for arg in (samplerate, channels, old_fmt,
subtype, endian)):
raise TypeError(
"Not allowed for existing files (except 'RAW'): "
"samplerate, channels, format, subtype, endian")

if isinstance(file, str):
if _os.path.isfile(file):
if 'x' in modes:
raise OSError("File exists: {0!r}".format(file))
elif modes.issuperset('w+'):
# truncate the file, because SFM_RDWR doesn't:
_os.close(_os.open(file, _os.O_WRONLY | _os.O_TRUNC))
self._file = _snd.sf_open(file.encode(), mode_int, self._info)
if mode_int == _snd.SFM_WRITE:
# Due to a bug in libsndfile version <= 1.0.25, frames != 0
# when opening a named pipe in SFM_WRITE mode.
# See http://github.com/erikd/libsndfile/issues/77.
self._info.frames = 0
# This is not necessary for "normal" files (because
# frames == 0 in this case), but it doesn't hurt, either.
elif isinstance(file, int):
self._file = _snd.sf_open_fd(file, mode_int, self._info, closefd)
elif all(hasattr(file, a) for a in ('seek', 'read', 'write', 'tell')):
self._file = _snd.sf_open_virtual(
self._init_virtual_io(file), mode_int, self._info, _ffi.NULL)
else:
raise TypeError("Invalid file: {0!r}".format(file))
self._handle_error("Error opening {0!r}: ".format(file))

if modes.issuperset('r+') and self.seekable():
self._info = _create_info_struct(file, mode, samplerate, channels,
format, subtype, endian)
self._file = self._open(file, mode_int, closefd)
if set(mode).issuperset('r+') and self.seekable():
# Move write position to 0 (like in Python file objects)
self.seek(0)

_snd.sf_command(self._file, _snd.SFC_SET_CLIPPING, _ffi.NULL,
_snd.SF_TRUE)

Expand Down Expand Up @@ -1050,6 +1001,35 @@ def close(self):
self._file = None
self._handle_error_number(err)

def _open(self, file, mode_int, closefd):
"""Call the appropriate sf_open*() function from libsndfile."""
assert not isinstance(file, _unicode)
if isinstance(file, bytes):
if _os.path.isfile(file):
if 'x' in self.mode:
raise OSError("File exists: {0!r}".format(self.name))
elif set(self.mode).issuperset('w+'):
# truncate the file, because SFM_RDWR doesn't:
_os.close(_os.open(file, _os.O_WRONLY | _os.O_TRUNC))
file_ptr = _snd.sf_open(file, mode_int, self._info)
if mode_int == _snd.SFM_WRITE:
# Due to a bug in libsndfile version <= 1.0.25, frames != 0
# when opening a named pipe in SFM_WRITE mode.
# See http://github.com/erikd/libsndfile/issues/77.
self._info.frames = 0
# This is not necessary for "normal" files (because
# frames == 0 in this case), but it doesn't hurt, either.
elif isinstance(file, int):
file_ptr = _snd.sf_open_fd(file, mode_int, self._info, closefd)
elif all(hasattr(file, a) for a in ('seek', 'read', 'write', 'tell')):
file_ptr = _snd.sf_open_virtual(
self._init_virtual_io(file), mode_int, self._info, _ffi.NULL)
else:
raise TypeError("Invalid file: {0!r}".format(self.name))
self._handle_error_number(_snd.sf_error(file_ptr),
"Error opening {0!r}: ".format(self.name))
return file_ptr

def _init_virtual_io(self, file):
"""Initialize callback functions for sf_open_virtual()."""
@_ffi.callback("sf_vio_get_filelen")
Expand Down Expand Up @@ -1108,8 +1088,7 @@ def vio_tell(user_data):
def _handle_error(self, prefix=""):
"""Check the error flag of the SNDFILE* structure."""
self._check_if_closed()
err = _snd.sf_error(self._file)
self._handle_error_number(err, prefix)
self._handle_error_number(_snd.sf_error(self._file), prefix)

def _handle_error_number(self, err, prefix=""):
"""Pretty-print a numerical error code."""
Expand Down Expand Up @@ -1213,24 +1192,31 @@ def _prepare_read(self, start, stop, frames):

def _format_int(format, subtype, endian):
"""Return numeric ID for given format|subtype|endian combo."""
if not isinstance(format, (_unicode, str)):
raise TypeError("Invalid format: {0!r}".format(format))
try:
result = _formats[str(format).upper()]
result = _formats[format.upper()]
except KeyError:
raise ValueError("Invalid format string: {0!r}".format(format))
raise ValueError("Unknown format: {0!r}".format(format))
if subtype is None:
subtype = default_subtype(format)
if subtype is None:
raise TypeError(
"No default subtype for major format {0!r}".format(format))
elif not isinstance(subtype, (_unicode, str)):
raise TypeError("Invalid subtype: {0!r}".format(subtype))
try:
result |= _subtypes[str(subtype).upper()]
result |= _subtypes[subtype.upper()]
except KeyError:
raise ValueError("Invalid subtype string: {0!r}".format(subtype))
raise ValueError("Unknown subtype: {0!r}".format(subtype))
if endian is None:
endian = 'FILE'
elif not isinstance(endian, (_unicode, str)):
raise TypeError("Invalid endian-ness: {0!r}".format(endian))
try:
result |= _endians[str(endian).upper()
if endian is not None else 'FILE']
result |= _endians[endian.upper()]
except KeyError:
raise ValueError("Invalid endian-ness: {0!r}".format(endian))
raise ValueError("Unknown endian-ness: {0!r}".format(endian))

info = _ffi.new("SF_INFO*")
info.format = result
Expand All @@ -1241,6 +1227,77 @@ def _format_int(format, subtype, endian):
return result


def _check_mode(mode):
"""Check if mode is valid and return its integer representation."""
if not isinstance(mode, (_unicode, str)):
raise TypeError("Invalid mode: {0!r}".format(mode))
mode_set = set(mode)
if mode_set.difference('xrwb+') or len(mode) > len(mode_set):
raise ValueError("Invalid mode: {0!r}".format(mode))
if len(mode_set.intersection('xrw')) != 1:
raise ValueError("mode must contain exactly one of 'xrw'")

if '+' in mode_set:
mode_int = _snd.SFM_RDWR
elif 'r' in mode_set:
mode_int = _snd.SFM_READ
else:
mode_int = _snd.SFM_WRITE
return mode_int


def _create_info_struct(file, mode, samplerate, channels,
format, subtype, endian):
"""Check arguments and create SF_INFO struct."""
original_format = format
if format is None:
format = _get_format_from_filename(file, mode)
assert isinstance(format, (_unicode, str))
else:
if not isinstance(format, (_unicode, str)):
raise TypeError("Invalid format: {0!r}".format(format))

info = _ffi.new("SF_INFO*")
if 'r' not in mode or format.upper() == 'RAW':
if samplerate is None:
raise TypeError("samplerate must be specified")
info.samplerate = samplerate
if channels is None:
raise TypeError("channels must be specified")
info.channels = channels
info.format = _format_int(format, subtype, endian)
else:
if any(arg is not None for arg in (
samplerate, channels, original_format, subtype, endian)):
raise TypeError("Not allowed for existing files (except 'RAW'): "
"samplerate, channels, format, subtype, endian")
return info


def _get_format_from_filename(file, mode):
"""Return a format string obtained from file (or file.name).

If file already exists (= read mode), an empty string is returned on
error. If not, an exception is raised.
The return type will always be str or unicode (even if
file/file.name is a bytes object).

"""
format = ''
file = getattr(file, 'name', file)
try:
# This raises an exception if file is not a (Unicode/byte) string:
format = _os.path.splitext(file)[-1][1:]
# Convert bytes to unicode (raises AttributeError on Python 3 str):
format = format.decode()
except Exception:
pass
if format.upper() not in _formats and 'r' not in mode:
raise TypeError("No format specified and unable to get format from "
"file extension: {0!r}".format(file))
return format


def _format_str(format_int):
"""Return the string representation of a given numeric format."""
for dictionary in _formats, _subtypes, _endians:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_pysoundfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,13 +363,13 @@ def test_open_with_more_invalid_arguments():
assert "integer" in str(excinfo.value)
with pytest.raises(ValueError) as excinfo:
sf.SoundFile(filename_new, 'w', 44100, 2, format='WAF')
assert "Invalid format string" in str(excinfo.value)
assert "Unknown format" in str(excinfo.value)
with pytest.raises(ValueError) as excinfo:
sf.SoundFile(filename_new, 'w', 44100, 2, 'PCM16', format='WAV')
assert "Invalid subtype string" in str(excinfo.value)
assert "Unknown subtype" in str(excinfo.value)
with pytest.raises(ValueError) as excinfo:
sf.SoundFile(filename_new, 'w', 44100, 2, endian='BOTH', format='WAV')
assert "Invalid endian-ness" in str(excinfo.value)
assert "Unknown endian-ness" in str(excinfo.value)


def test_open_r_and_rplus_with_too_many_arguments():
Expand Down