Skip to content
Browse files

Merge branch 'master' into astropy

Staging for merge into Astropy.  This includes SVN revisions 2195:2344

Conflicts:
	MANIFEST.in
	README.txt
	docs/source/conf.py
	docs/source/developers_guide/developers_guide.rst

Conflicts:

	astropy/io/fits/__init__.py
	astropy/io/fits/card.py
	astropy/io/fits/column.py
	astropy/io/fits/convenience.py
	astropy/io/fits/core.py
	astropy/io/fits/diff.py
	astropy/io/fits/file.py
	astropy/io/fits/fitsrec.py
	astropy/io/fits/hdu/base.py
	astropy/io/fits/hdu/compressed.py
	astropy/io/fits/hdu/groups.py
	astropy/io/fits/hdu/hdulist.py
	astropy/io/fits/hdu/image.py
	astropy/io/fits/hdu/nonstandard.py
	astropy/io/fits/hdu/streaming.py
	astropy/io/fits/hdu/table.py
	astropy/io/fits/header.py
	astropy/io/fits/src/compressionmodule.c
	astropy/io/fits/tests/test_core.py
	astropy/io/fits/tests/test_diff.py
	astropy/io/fits/tests/test_image.py
	astropy/io/fits/tests/test_nonstandard.py
	astropy/io/fits/tests/test_table.py
	astropy/io/fits/tests/test_uint.py
	astropy/io/fits/util.py
	docs/io/fits/appendix/faq.rst
	docs/io/fits/appendix/history.rst
	docs/io/fits/users_guide/users_tutorial.rst
  • Loading branch information...
1 parent 8a9d023 commit 346517f3b6aec7c5f8a432464f264d0cd9704b0b @embray embray committed Nov 9, 2013
Showing with 5,701 additions and 2,383 deletions.
  1. +1 −0 .gitattributes
  2. +0 −2 astropy/io/fits/card.py
  3. +755 −247 astropy/io/fits/column.py
  4. +9 −0 astropy/io/fits/connect.py
  5. +22 −30 astropy/io/fits/convenience.py
  6. +14 −15 astropy/io/fits/diff.py
  7. +190 −57 astropy/io/fits/file.py
  8. +451 −198 astropy/io/fits/fitsrec.py
  9. +224 −87 astropy/io/fits/hdu/base.py
  10. +274 −68 astropy/io/fits/hdu/compressed.py
  11. +22 −23 astropy/io/fits/hdu/groups.py
  12. +44 −57 astropy/io/fits/hdu/hdulist.py
  13. +20 −14 astropy/io/fits/hdu/image.py
  14. +10 −2 astropy/io/fits/hdu/nonstandard.py
  15. +2 −2 astropy/io/fits/hdu/streaming.py
  16. +139 −218 astropy/io/fits/hdu/table.py
  17. +6 −31 astropy/io/fits/header.py
  18. +8 −8 astropy/io/fits/py3compat.py
  19. +34 −3 astropy/io/fits/src/compressionmodule.c
  20. +4 −0 astropy/io/fits/src/compressionmodule.h
  21. +1 −0 astropy/io/fits/tests/__init__.py
  22. +5 −2 astropy/io/fits/tests/test_connect.py
  23. +116 −0 astropy/io/fits/tests/test_core.py
  24. +0 −1 astropy/io/fits/tests/test_diff.py
  25. +0 −1 astropy/io/fits/tests/test_groups.py
  26. +10 −10 astropy/io/fits/tests/test_hdulist.py
  27. +43 −4 astropy/io/fits/tests/test_image.py
  28. +23 −0 astropy/io/fits/tests/test_nonstandard.py
  29. +211 −115 astropy/io/fits/tests/test_table.py
  30. +55 −33 astropy/io/fits/tests/test_uint.py
  31. +187 −29 astropy/io/fits/util.py
  32. +454 −46 cextern/cfitsio/cfileio.c
  33. +257 −14 cextern/cfitsio/changes.txt
  34. +75 −2 cextern/cfitsio/drvrfile.c
  35. +40 −10 cextern/cfitsio/drvrmem.c
  36. +7 −4 cextern/cfitsio/editcol.c
  37. +19 −3 cextern/cfitsio/eval_f.c
  38. +221 −114 cextern/cfitsio/fitscore.c
  39. +103 −80 cextern/cfitsio/fitsio.h
  40. +27 −23 cextern/cfitsio/fitsio2.h
  41. +5 −4 cextern/cfitsio/getcolb.c
  42. +5 −4 cextern/cfitsio/getcold.c
  43. +5 −4 cextern/cfitsio/getcole.c
  44. +5 −4 cextern/cfitsio/getcoli.c
  45. +10 −8 cextern/cfitsio/getcolj.c
  46. +5 −4 cextern/cfitsio/getcolk.c
  47. +8 −1 cextern/cfitsio/getcoll.c
  48. +5 −4 cextern/cfitsio/getcolui.c
  49. +5 −4 cextern/cfitsio/getcoluj.c
  50. +5 −4 cextern/cfitsio/getcoluk.c
  51. +36 −30 cextern/cfitsio/getkey.c
  52. +950 −360 cextern/cfitsio/imcompress.c
  53. +9 −9 cextern/cfitsio/iraffits.c
  54. +4 −3 cextern/cfitsio/longnam.h
  55. +274 −225 cextern/cfitsio/modkey.c
  56. +5 −4 cextern/cfitsio/putcolb.c
  57. +5 −4 cextern/cfitsio/putcold.c
  58. +5 −4 cextern/cfitsio/putcole.c
  59. +5 −4 cextern/cfitsio/putcoli.c
  60. +10 −8 cextern/cfitsio/putcolj.c
  61. +5 −4 cextern/cfitsio/putcolk.c
  62. +1 −1 cextern/cfitsio/putcolu.c
  63. +37 −37 cextern/cfitsio/putkey.c
  64. +44 −12 cextern/cfitsio/quantize.c
  65. +5 −0 cextern/cfitsio/region.c
  66. +48 −77 cextern/cfitsio/ricecomp.c
  67. +115 −6 docs/io/fits/appendix/history.rst
  68. +2 −1 docs/io/fits/index.rst
View
1 .gitattributes
@@ -0,0 +1 @@
+*.fits -text
View
2 astropy/io/fits/card.py
@@ -2,8 +2,6 @@
import copy
import re
-import string
-import sys
import warnings
import numpy as np
View
1,002 astropy/io/fits/column.py
@@ -1,8 +1,10 @@
# Licensed under a 3-clause BSD style license - see PYFITS.rst
+import copy
import operator
import re
import sys
+import warnings
import weakref
import numpy as np
@@ -46,6 +48,28 @@
# Note that only double precision floating point/complex are supported
FORMATORDER = ['L', 'B', 'I', 'J', 'K', 'D', 'M', 'A']
+# mapping from ASCII table TFORM data type to numpy data type
+# A: Character
+# I: Integer (32-bit)
+# J: Integer (64-bit; non-standard)
+# F: Float (32-bit; fixed decimal notation)
+# E: Float (32-bit; exponential notation)
+# D: Float (64-bit; exponential notation, always 64-bit by convention)
+ASCII2NUMPY = {'A': 'a', 'I': 'i4', 'J': 'i8', 'F': 'f4', 'E': 'f4',
+ 'D': 'f8'}
+
+# Maps FITS ASCII column format codes to the appropriate Python string
+# formatting codes for that type.
+ASCII2STR = {'A': 's', 'I': 'd', 'J': 'd', 'F': 'f', 'E': 'E', 'D': 'E'}
+
+# For each ASCII table format code, provides a default width (and decimal
+# precision) for when one isn't given explicity in the column format
+ASCII_DEFAULT_WIDTHS= {'A': (1, 0), 'I': (10, 0), 'J': (15, 0),
+ 'E': (15, 7), 'F': (16, 7), 'D': (25, 17)}
+
+
+
+
# lists of column/field definition common names and keyword names, make
# sure to preserve the one-to-one correspondence when updating the list(s).
# Use lists, instead of dictionaries so the names can be displayed in a
@@ -55,9 +79,17 @@
KEYWORD_ATTRIBUTES = ['name', 'format', 'unit', 'null', 'bscale', 'bzero',
'disp', 'start', 'dim']
-# TFORM regular expression
-TFORMAT_RE = re.compile(r'(?P<repeat>^[0-9]*)(?P<dtype>[A-Za-z])'
- r'(?P<option>[!-~]*)')
+# TFORMn regular expression
+TFORMAT_RE = re.compile(r'(?P<repeat>^[0-9]*)(?P<format>[LXBIJKAEDCMPQ])'
+ r'(?P<option>[!-~]*)', re.I)
+
+# TFORMn for ASCII tables; two different versions depending on whether
+# the format is floating-point or not; allows empty values for width
+# in which case defaults are used
+TFORMAT_ASCII_RE = re.compile(r'(?:(?P<format>[AIJ])(?P<width>[0-9]*))|'
+ r'(?:(?P<formatf>[FED])'
+ r'(?:(?P<widthf>[0-9]+)\.'
+ r'(?P<precision>[0-9]+))?)')
# table definition keyword regular expression
TDEF_RE = re.compile(r'(?P<label>^T[A-Z]*)(?P<num>[1-9][0-9 ]*$)')
@@ -68,6 +100,10 @@
ASCIITNULL = 0 # value for ASCII table cell with value = TNULL
# this can be reset by user.
+# The default placeholder to use for NULL values in ASCII tables when
+# converting from binary to ASCII tables
+DEFAULT_ASCII_TNULL = '---'
+
class Delayed(object):
"""Delayed file-reading data."""
@@ -84,42 +120,69 @@ def __getitem__(self, key):
return self.hdu.data[key][self.field]
-class _ColumnFormat(str):
+class _BaseColumnFormat(str):
"""
- Represents a FITS column format.
-
- This is an enhancement over using a normal string for the format, since the
- repeat count, format code, and option are available as separate attributes,
- and smart comparison is used. For example 1J == J.
+ Base class for binary table column formats (just called _ColumnFormat)
+ and ASCII table column formats (_AsciiColumnFormat).
"""
- def __new__(cls, format):
- self = super(_ColumnFormat, cls).__new__(cls, format)
- self.repeat, self.format, self.option = _parse_tformat(format)
- self.format = self.format.upper()
- return self
-
def __eq__(self, other):
if not other:
return False
if isinstance(other, str):
- if not isinstance(other, _ColumnFormat):
+ if not isinstance(other, self.__class__):
try:
- other = _ColumnFormat(other)
+ other = self.__class__(other)
except ValueError:
return False
else:
return False
- a = (self.repeat, self.format, self.option)
- b = (other.repeat, other.format, other.option)
- return a == b
+ return self.canonical == other.canonical
def __hash__(self):
return hash(self.canonical)
@classmethod
+ def from_column_format(cls, format):
+ """Creates a column format object from another column format object
+ regardless of their type.
+
+ That is, this can convert a _ColumnFormat to an _AsciiColumnFormat
+ or vice versa at least in cases where a direct translation is possible.
+ """
+
+ return cls.from_recformat(format.recformat)
+
+
+class _ColumnFormat(_BaseColumnFormat):
+ """
+ Represents a FITS binary table column format.
+
+ This is an enhancement over using a normal string for the format, since the
+ repeat count, format code, and option are available as separate attributes,
+ and smart comparison is used. For example 1J == J.
+ """
+
+ def __new__(cls, format):
+ self = super(_ColumnFormat, cls).__new__(cls, format)
+ self.repeat, self.format, self.option = _parse_tformat(format)
+ self.format = self.format.upper()
+ if self.format in ('P', 'Q'):
+ # TODO: There should be a generic factory that returns either
+ # _FormatP or _FormatQ as appropriate for a given TFORMn
+ if self.format == 'P':
+ recformat = _FormatP.from_tform(format)
+ else:
+ recformat = _FormatQ.from_tform(format)
+ # Format of variable length arrays
+ self.p_format = recformat.format
+ else:
+ self.p_format = None
+ return self
+
+ @classmethod
def from_recformat(cls, recformat):
"""Creates a column format from a Numpy record dtype format."""
@@ -149,10 +212,85 @@ def canonical(self):
return '%s%s%s' % (repeat, self.format, self.option)
+class _AsciiColumnFormat(_BaseColumnFormat):
+ """Similar to _ColumnFormat but specifically for columns in ASCII tables.
+
+ The formats of ASCII table columns and binary table columns are inherently
+ incompatible in FITS. They don't support the same ranges and types of
+ values, and even reuse format codes in subtly different ways. For example
+ the format code 'Iw' in ASCII columns refers to any integer whose string
+ representation is at most w characters wide, so 'I' can represent
+ effectively any integer that will fit in a FITS columns. Whereas for
+ binary tables 'I' very explicitly refers to a 16-bit signed integer.
+
+ Conversions between the two column formats can be performed using the
+ ``to/from_binary`` methods on this class, or the ``to/from_ascii``
+ methods on the `_ColumnFormat` class. But again, not all conversions are
+ possible and may result in a `ValueError`.
+ """
+
+ def __new__(cls, format):
+ self = super(_AsciiColumnFormat, cls).__new__(cls, format)
+ self.format, self.width, self.precision = _parse_ascii_tformat(format)
+
+ # This is to support handling logical (boolean) data from binary tables
+ # in an ASCII table
+ self._pseudo_logical = False
+ return self
+
+ def __hash__(self):
+ return hash(self.canonical)
+
+ @classmethod
+ def from_column_format(cls, format):
+ inst = cls.from_recformat(format.recformat)
+ # Hack
+ if format.format == 'L':
+ inst._pseudo_logical = True
+ return inst
+
+ @classmethod
+ def from_recformat(cls, recformat):
+ """Creates a column format from a Numpy record dtype format."""
+
+ return cls(_convert_ascii_format(recformat, reverse=True))
+
+ @lazyproperty
+ def recformat(self):
+ """Returns the equivalent Numpy record format string."""
+
+ return _convert_ascii_format(self)
+
+ @lazyproperty
+ def canonical(self):
+ """
+ Returns a 'canonical' string representation of this format.
+
+ This is in the proper form of Tw.d where T is the single character data
+ type code, w is the width in characters for this field, and d is the
+ number of digits after the decimal place (for format codes 'E', 'F',
+ and 'D' only).
+ """
+
+ if self.format in ('E', 'F', 'D'):
+ return '%s%s.%s' % (self.format, self.width, self.precision)
+
+ return '%s%s' % (self.format, self.width)
+
+
class _FormatX(str):
"""For X format in binary tables."""
- pass
+ def __new__(cls, repeat=1):
+ nbytes = ((repeat - 1) // 8) + 1
+ # use an array, even if it is only ONE u1 (i.e. use tuple always)
+ obj = super(_FormatX, cls).__new__(cls, repr((nbytes,)) + 'u1')
+ obj.repeat = repeat
+ return obj
+
+ @property
+ def tform(self):
+ return '%sX' % self.repeat
# TODO: Table column formats need to be verified upon first reading the file;
@@ -161,19 +299,25 @@ class _FormatX(str):
class _FormatP(str):
"""For P format in variable length table."""
- _formatp_re = re.compile(r'(?P<repeat>\d+)?P(?P<dtype>[A-Z])'
- '(?:\((?P<max>\d*)\))?')
+ # As far as I can tell from my reading of the FITS standard, a type code is
+ # *required* for P and Q formats; there is no default
+ _format_re_template = (r'(?P<repeat>\d+)?%s(?P<dtype>[LXBIJKAEDCM])'
+ '(?:\((?P<max>\d*)\))?')
+ _format_code = 'P'
+ _format_re = re.compile(_format_re_template % _format_code)
+ _descriptor_format = '2i4'
def __new__(cls, dtype, repeat=None, max=None):
- obj = super(_FormatP, cls).__new__(cls, '2i4')
+ obj = super(_FormatP, cls).__new__(cls, cls._descriptor_format)
+ obj.format = NUMPY2FITS[dtype]
obj.dtype = dtype
obj.repeat = repeat
obj.max = max
return obj
@classmethod
def from_tform(cls, format):
- m = cls._formatp_re.match(format)
+ m = cls._format_re.match(format)
if not m or m.group('dtype') not in FITS2NUMPY:
raise VerifyError('Invalid column format: %s' % format)
repeat = m.group('repeat')
@@ -187,7 +331,19 @@ def from_tform(cls, format):
def tform(self):
repeat = '' if self.repeat is None else self.repeat
max = '' if self.max is None else self.max
- return '%sP%s(%s)' % (repeat, NUMPY2FITS[self.dtype], max)
+ return '%s%s%s(%s)' % (repeat, self._format_code, self.format, max)
+
+
+class _FormatQ(_FormatP):
+ """Carries type description of the Q format for variable length arrays.
+
+ The Q format is like the P format but uses 64-bit integers in the array
+ descriptors, allowing for heaps stored beyond 2GB into a file.
+ """
+
+ _format_code = 'Q'
+ _format_re = re.compile(_FormatP._format_re_template % _format_code)
+ _descriptor_format = '2l4'
class Column(object):
@@ -198,8 +354,8 @@ class Column(object):
"""
def __init__(self, name=None, format=None, unit=None, null=None,
- bscale=None, bzero=None, disp=None, start=None,
- dim=None, array=None):
+ bscale=None, bzero=None, disp=None, start=None, dim=None,
+ array=None, ascii=None):
"""
Construct a `Column` by specifying attributes. All attributes
except `format` can be optional.
@@ -233,6 +389,20 @@ def __init__(self, name=None, format=None, unit=None, null=None,
dim : str, optional
column dimension corresponding to ``TDIM`` keyword
+
+ array : iterable, optional
+ a `list`, `numpy.ndarray` (or other iterable that can be used to
+ initialize an ndarray) providing intial data for this column.
+ The array will be automatically converted, if possible, to the data
+ format of the column. In the case were non-trivial ``bscale``
+ and/or ``bzero`` arguments are given, the values in the array must
+ be the *physical* values--that is, the values of column as if the
+ scaling has already been applied (the array stored on the column
+ object will then be converted back to its storage values).
+
+ ascii : bool, optional
+ set `True` if this describes a column for an ASCII table; this
+ may be required to disambiguate the column format
"""
if format is None:
@@ -252,18 +422,32 @@ def __init__(self, name=None, format=None, unit=None, null=None,
# input arrays can be just list or tuple, not required to be ndarray
# check format
- if not isinstance(format, _ColumnFormat):
- try:
- # legit FITS format?
- format = _ColumnFormat(format)
- recformat = format.recformat
- except ValueError:
- try:
- # legit recarray format?
- recformat = format
- format = _ColumnFormat.from_recformat(format)
- except ValueError:
- raise ValueError('Illegal format `%s`.' % format)
+ if ascii is None and not isinstance(format, _BaseColumnFormat):
+ # We basically have to guess what type of table is column is for.
+ if start and dim:
+ # This is impossible; this can't be a valid FITS column
+ raise ValueError(
+ 'Columns cannot have both a start (TCOLn) and dim '
+ '(TDIMn) option, since the former is only applies to '
+ 'ASCII tables, and the latter is only valid for binary '
+ 'tables.')
+ elif start:
+ # Only ASCII table columns can have a 'start' option
+ guess_format = _AsciiColumnFormat
+ elif dim:
+ # Only binary tables can have a dim option
+ guess_format = _ColumnFormat
+ else:
+ # A safe guess which reflects the existing behavior of previous
+ # PyFITS versions
+ guess_format = _ColumnFormat
+
+ format, recformat = self._convert_format(format, guess_format)
+ elif not ascii and not isinstance(format, _BaseColumnFormat):
+ format, recformat = self._convert_format(format, _ColumnFormat)
+ elif ascii and not isinstance(format, _AsciiColumnFormat):
+ format, recformat = self._convert_format(format,
+ _AsciiColumnFormat)
self.format = format
# Zero-length formats are legal in the FITS format, but since they
@@ -276,6 +460,79 @@ def __init__(self, name=None, format=None, unit=None, null=None,
else:
self._phantom = False
+ # Awful hack to use for now to keep track of whether the column holds
+ # pseudo-unsigned int data
+ self._pseudo_unsigned_ints = False
+
+ # TODO: Perhaps offload option verification/handling to a separate
+ # method
+
+ # Validate null option
+ # Note: Enough code exists that thinks empty strings are sensible
+ # inputs for these options that we need to treat '' as None
+ if null is not None and null != '':
+ if isinstance(format, _AsciiColumnFormat):
+ null = str(null)
+ if len(null) > format.width:
+ warnings.warn(
+ "ASCII table null option (TNULLn) is longer than "
+ "the column's character width and will be truncated "
+ "(got %r)." % null)
+ else:
+ if not _is_int(null):
+ # Make this an exception instead of a warning, since any
+ # non-int value is meaningless
+ # TODO: We *might* be able to issue just a warning if we
+ # get an object that can be converted to an int, such as a
+ # string
+ raise TypeError('Column null option (TNULLn) must be an '
+ 'integer for binary table columns '
+ '(got %r).' % null)
+ tnull_formats = ('B', 'I', 'J', 'K')
+ if not (format.format in tnull_formats or
+ (format.format in ('P', 'Q') and
+ format.p_format in tnull_formats)):
+ # TODO: We should also check that TNULLn's integer value
+ # is in the range allowed by the column's format
+ warnings.warn('Column null option (TNULLn) is invalid '
+ 'for binary table columns of type %r '
+ '(got %r).' % (format, null))
+
+ # Validate the disp option
+ # TODO: Add full parsing and validation of TDISPn keywords
+ if disp is not None and null != '':
+ if not isinstance(disp, basestring):
+ raise TypeError('Column disp option (TDISPn) must be a '
+ 'string (got %r).' % disp)
+ if (isinstance(format, _AsciiColumnFormat) and
+ disp[0].upper() == 'L'):
+ # disp is at least one character long and has the 'L' format
+ # which is not recognized for ASCII tables
+ warnings.warn("Column disp option (TDISPn) may not use the "
+ "'L' format with ASCII table columns.")
+
+ # Validate the start option
+ if start is not None and start != '':
+ if not isinstance(format, _AsciiColumnFormat):
+ # The 'start' option only applies to ASCII columns
+ warnings.warn('Column start option (TBCOLn) is not allowed '
+ 'for binary table columns (got %r).' % start)
+ try:
+ start = int(start)
+ except (TypeError, ValueError):
+ pass
+
+ if not _is_int(start) and start < 1:
+ raise TypeError('Column start option (TBCOLn) must be a '
+ 'positive integer (got %r).' % start)
+
+ # Process TDIMn options
+ # ASCII table columns can't have a TDIMn keyword associated with it;
+ # for now we just issue a warning and ignore it.
+ # TODO: This should be checked by the FITS verification code
+ if dim is not None and isinstance(format, _AsciiColumnFormat):
+ warnings.warn('Column dim option (TDIMn) is not allowed for ASCII '
+ 'table columns (got %r).' % dim)
if isinstance(dim, basestring):
self._dims = _parse_tdim(dim)
elif isinstance(dim, tuple):
@@ -289,8 +546,7 @@ def __init__(self, name=None, format=None, unit=None, null=None,
"or a tuple containing the C-order dimensions for the column")
if self._dims:
- repeat = _parse_tformat(format)[0]
- if reduce(operator.mul, self._dims) > repeat:
+ if reduce(operator.mul, self._dims) > self.format.repeat:
raise ValueError(
"The repeat count of the column format %r for column %r "
"is fewer than the number of elements per the TDIM "
@@ -308,22 +564,23 @@ def __init__(self, name=None, format=None, unit=None, null=None,
array = chararray.array(array, itemsize=itemsize)
except ValueError:
# then try variable length array
+ # Note: This includes _FormatQ by inheritance
if isinstance(recformat, _FormatP):
array = _VLF(array, dtype=recformat.dtype)
else:
raise ValueError('Data is inconsistent with the '
'format `%s`.' % format)
- # scale the array back to storage values if there is bscale/bzero
+ array = self._convert_to_valid_data_type(array)
+
+ # We have required (through documentation) that arrays passed in to
+ # this constructor are already in their physical values, so we make
+ # note of that here
if isinstance(array, np.ndarray):
- # make a copy if scaled, so as not to corrupt the original array
- if bzero not in ['', None, 0] or bscale not in ['', None, 1]:
- if bzero not in ['', None, 0]:
- array = array - bzero
- if bscale not in ['', None, 1]:
- array = array / bscale
+ self._physical_values = True
+ else:
+ self._physical_values = False
- array = self._convert_to_valid_data_type(array)
self.array = array
def __repr__(self):
@@ -348,11 +605,16 @@ def __eq__(self, other):
def __hash__(self):
"""
Like __eq__, the hash of a column should be based on the unique column
- name and format, and be case-insensitive with respect to the column name.
+ name and format, and be case-insensitive with respect to the column
+ name.
"""
return hash((self.name.lower(), self.format))
+ @lazyproperty
+ def dtype(self):
+ return np.dtype(_convert_format(self.format))
+
def copy(self):
"""
Return a copy of this `Column`.
@@ -361,6 +623,29 @@ def copy(self):
tmp.__dict__ = self.__dict__.copy()
return tmp
+ @staticmethod
+ def _convert_format(format, cls):
+ """The format argument to this class's initializer may come in many
+ forms. This uses the given column format class ``cls`` to convert
+ to a format of that type.
+
+ TODO: There should be an abc base class for column format classes
+ """
+
+ try:
+ # legit FITS format?
+ format = cls(format)
+ recformat = format.recformat
+ except ValueError:
+ try:
+ # legit recarray format?
+ recformat = format
+ format = cls.from_recformat(format)
+ except ValueError:
+ raise ValueError('Illegal format `%s`.' % format)
+
+ return format, recformat
+
def _convert_to_valid_data_type(self, array):
# Convert the format to a type we understand
if isinstance(array, Delayed):
@@ -370,34 +655,54 @@ def _convert_to_valid_data_type(self, array):
else:
format = self.format
dims = self._dims
- if 'A' in format and 'P' not in format:
+ if 'P' in format or 'Q' in format:
+ return array
+ elif 'A' in format:
if array.dtype.char in 'SU':
if dims:
# The 'last' dimension (first in the order given
# in the TDIMn keyword itself) is the number of
# characters in each string
fsize = dims[-1]
else:
- fsize = int(_convert_format(format)[1:])
+ fsize = np.dtype(format.recformat).itemsize
return chararray.array(array, itemsize=fsize)
else:
- numpy_format = _convert_format(format)
- return _convert_array(array, np.dtype(numpy_format))
+ return _convert_array(array, np.dtype(format.recformat))
elif 'L' in format:
# boolean needs to be scaled back to storage values ('T', 'F')
if array.dtype == np.dtype('bool'):
return np.where(array == False, ord('F'), ord('T'))
else:
return np.where(array == 0, ord('F'), ord('T'))
- elif 'X' not in format and 'P' not in format:
- (repeat, fmt, option) = _parse_tformat(format)
- # Preserve byte order of the original array for now; see #77
- numpy_format = array.dtype.byteorder + _convert_format(fmt)
- return _convert_array(array, np.dtype(numpy_format))
elif 'X' in format:
return _convert_array(array, np.dtype('uint8'))
else:
- return array
+ # Preserve byte order of the original array for now; see #77
+ # TODO: For some reason we drop the format repeat here; need
+ # to investigate why that was and if it's something we can
+ # avoid doing...
+ new_format = _convert_format(format.format)
+ numpy_format = array.dtype.byteorder + new_format
+
+ # Handle arrays passed in as unsigned ints as pseudo-unsigned
+ # int arrays; blatantly tacked in here for now--we need columns
+ # to have explicit knowledge of whether they treated as
+ # pseudo-unsigned
+ bzeros = {2: np.uint16(2**15), 4: np.uint32(2**31),
+ 8: np.uint64(2**63)}
+ if (array.dtype.kind == 'u' and
+ array.dtype.itemsize in bzeros and
+ self.bscale in (1, None, '') and
+ self.bzero == bzeros[array.dtype.itemsize]):
+ # Basically the array is uint, has scale == 1.0, and the
+ # bzero is the appropriate value for a pseudo-unsigned
+ # integer of the input dtype, then go ahead and assume that
+ # uint is assumed
+ numpy_format = numpy_format.replace('i', 'u')
+ self._pseudo_unsigned_ints = True
+
+ return _convert_array(array, np.dtype(numpy_format))
class ColDefs(object):
@@ -411,19 +716,20 @@ class ColDefs(object):
"""
_padding_byte = '\x00'
+ _col_format_cls = _ColumnFormat
def __new__(cls, input, tbtype='BinTableHDU'):
from .hdu.table import TableHDU
if tbtype == 'BinTableHDU':
klass = cls
elif tbtype == 'TableHDU':
- klass = _ASCIIColDefs
+ klass = _AsciiColDefs
else:
raise ValueError('Invalid table type: %s.' % tbtype)
if isinstance(input, TableHDU):
- klass = _ASCIIColDefs
+ klass = _AsciiColDefs
return object.__new__(klass)
@@ -448,92 +754,158 @@ def __init__(self, input, tbtype='BinTableHDU'):
self._tbtype = tbtype
if isinstance(input, ColDefs):
- self.columns = [col.copy() for col in input.columns]
- # Prevent duplicating any additional work in the __init__
- return
-
- # if the input is a list of Columns
+ self._init_from_coldefs(input)
elif isinstance(input, (list, tuple)):
- for col in input:
- if not isinstance(col, Column):
- raise TypeError(
- 'Element %d in the ColDefs input is not a Column.'
- % input.index(col))
- self.columns = [col.copy() for col in input]
-
- # Construct columns from the fields of a record array
+ # if the input is a list of Columns
+ # TODO: Expand this to accept any iterable
+ self._init_from_sequence(input)
elif isinstance(input, np.ndarray) and input.dtype.fields is not None:
- self.columns = []
- for idx in range(len(input.dtype)):
- cname = input.dtype.names[idx]
- ftype = input.dtype.fields[cname][0]
- # String formats should have 'A' first
- if ftype.type == np.string_:
- format = 'A' + str(ftype.itemsize)
- else:
- format = _convert_format(ftype, reverse=True)
- # Determine the appropriate dimensions for items in the column
- # (typically just 1D)
- dim = input.dtype[idx].shape[::-1]
- if dim and (len(dim) > 1 or 'A' in format):
- if 'A' in format:
- # n x m string arrays must include the max string
- # length in their dimensions (e.g. l x n x m)
- dim = (input.dtype[idx].base.itemsize,) + dim
- dim = repr(dim).replace(' ', '')
- else:
- dim = None
- # Check for unsigned ints.
- if 'I' in format and ftype == np.dtype('uint16'):
- c = Column(name=cname, format=format,
- array=input.view(np.ndarray)[cname],
- dim=dim, bzero=np.uint16(2**15))
- elif 'J' in format and ftype == np.dtype('uint32'):
- c = Column(name=cname, format=format,
- array=input.view(np.ndarray)[cname],
- dim=dim, bzero=np.uint32(2**31))
- elif 'K' in format and ftype == np.dtype('uint64'):
- c = Column(name=cname, format=format,
- array=input.view(np.ndarray)[cname],
- dim=dim, bzero=np.uint64(2**63))
- else:
- c = Column(name=cname, format=format,
- array=input.view(np.ndarray)[cname], dim=dim)
- self.columns.append(c)
-
+ # Construct columns from the fields of a record array
+ self._init_from_array(input)
# Construct columns from fields in an HDU header
elif isinstance(input, _TableBaseHDU):
- hdr = input._header
- nfields = hdr['TFIELDS']
- self._width = hdr['NAXIS1']
- self._shape = hdr['NAXIS2']
-
- # go through header keywords to pick out column definition keywords
- # definition dictionaries for each field
- col_attributes = [{} for i in range(nfields)]
- for keyword, value in hdr.iteritems():
- key = TDEF_RE.match(keyword)
- try:
- keyword = key.group('label')
- except:
- continue # skip if there is no match
- if (keyword in KEYWORD_NAMES):
- col = int(key.group('num'))
- if col <= nfields and col > 0:
- idx = KEYWORD_NAMES.index(keyword)
- attr = KEYWORD_ATTRIBUTES[idx]
- col_attributes[col - 1][attr] = value
-
- # data reading will be delayed
- for col in range(nfields):
- col_attributes[col]['array'] = Delayed(input, col)
-
- # now build the columns
- self.columns = [Column(**attrs) for attrs in col_attributes]
- self._listener = weakref.proxy(input)
+ self._init_from_table(input)
else:
- raise TypeError('Input to ColDefs must be a table HDU or a list '
- 'of Columns.')
+ raise TypeError('Input to ColDefs must be a table HDU, a list '
+ 'of Columns, or a record/field array.')
+
+ def _init_from_coldefs(self, coldefs):
+ """Initialize from an existing ColDefs object (just copy the
+ columns and convert their formats if necessary).
+ """
+
+ self.columns = [self._copy_column(col) for col in coldefs]
+
+ def _init_from_sequence(self, columns):
+ for col in columns:
+ if not isinstance(col, Column):
+ raise TypeError(
+ 'Element %d in the ColDefs input is not a Column.'
+ % input.index(col))
+
+ self._init_from_coldefs(columns)
+
+ def _init_from_array(self, array):
+ self.columns = []
+ for idx in range(len(array.dtype)):
+ cname = array.dtype.names[idx]
+ ftype = array.dtype.fields[cname][0]
+ format = self._col_format_cls.from_recformat(ftype)
+
+ # Determine the appropriate dimensions for items in the column
+ # (typically just 1D)
+ dim = array.dtype[idx].shape[::-1]
+ if dim and (len(dim) > 1 or 'A' in format):
+ if 'A' in format:
+ # n x m string arrays must include the max string
+ # length in their dimensions (e.g. l x n x m)
+ dim = (array.dtype[idx].base.itemsize,) + dim
+ dim = repr(dim).replace(' ', '')
+ else:
+ dim = None
+
+ # Check for unsigned ints.
+ bzero = None
+ if 'I' in format and ftype == np.dtype('uint16'):
+ bzero = np.uint16(2**15)
+ elif 'J' in format and ftype == np.dtype('uint32'):
+ bzero = np.uint32(2**31)
+ elif 'K' in format and ftype == np.dtype('uint64'):
+ bzero = np.uint64(2**63)
+
+ c = Column(name=cname, format=format,
+ array=array.view(np.ndarray)[cname], bzero=bzero,
+ dim=dim)
+ self.columns.append(c)
+
+ def _init_from_table(self, table):
+ hdr = table._header
+ nfields = hdr['TFIELDS']
+ self._width = hdr['NAXIS1']
+ self._shape = hdr['NAXIS2']
+
+ # go through header keywords to pick out column definition keywords
+ # definition dictionaries for each field
+ col_attributes = [{} for i in range(nfields)]
+ for keyword, value in hdr.iteritems():
+ key = TDEF_RE.match(keyword)
+ try:
+ keyword = key.group('label')
+ except:
+ continue # skip if there is no match
+ if keyword in KEYWORD_NAMES:
+ col = int(key.group('num'))
+ if col <= nfields and col > 0:
+ idx = KEYWORD_NAMES.index(keyword)
+ attr = KEYWORD_ATTRIBUTES[idx]
+ if attr == 'format':
+ # Go ahead and convert the format value to the
+ # appropriate ColumnFormat container now
+ value = self._col_format_cls(value)
+ col_attributes[col - 1][attr] = value
+
+ # data reading will be delayed
+ for col in range(nfields):
+ col_attributes[col]['array'] = Delayed(table, col)
+
+ # now build the columns
+ self.columns = [Column(**attrs) for attrs in col_attributes]
+ self._listener = weakref.proxy(table)
+
+ def __copy__(self):
+ return self.__class__(self, self._tbtype)
+
+ def __deepcopy__(self, memo):
+ return self.__class__([copy.deepcopy(c, memo) for c in self.columns],
+ tbtype=self._tbtype)
+
+ def _copy_column(self, column):
+ """Utility function used currently only by _init_from_coldefs
+ to help convert columns from binary format to ASCII format or vice
+ versa if necessary (otherwise performs a straight copy).
+ """
+
+ if isinstance(column.format, self._col_format_cls):
+ # This column has a FITS format compatible with this column
+ # definitions class (that is ascii or binary)
+ return column.copy()
+
+ new_column = column.copy()
+
+ # Try to use the Numpy recformat as the equivalency between the
+ # two formats; if that conversion can't be made then these
+ # columns can't be transferred
+ # TODO: Catch exceptions here and raise an explicit error about
+ # column format conversion
+ new_column.format = self._col_format_cls.from_column_format(
+ column.format)
+
+ # Handle a few special cases of column format options that are not
+ # compatible between ASCII an binary tables
+ # TODO: This is sort of hacked in right now; we really neet
+ # separate classes for ASCII and Binary table Columns, and they
+ # should handle formatting issues like these
+ if not isinstance(new_column.format, _AsciiColumnFormat):
+ # the column is a binary table column...
+ new_column.start = None
+ if new_column.null is not None:
+ # We can't just "guess" a value to represent null
+ # values in the new column, so just disable this for
+ # now; users may modify it later
+ new_column.null = None
+ else:
+ # the column is an ASCII table column...
+ if new_column.null is not None:
+ new_column.null = DEFAULT_ASCII_TNULL
+ if (new_column.disp is not None and
+ new_column.disp.upper().startswith('L')):
+ # ASCII columns may not use the logical data display format;
+ # for now just drop the TDISPn option for this column as we
+ # don't have a systematic conversion of boolean data to ASCII
+ # tables yet
+ new_column.disp = None
+
+ return new_column
def __getattr__(self, name):
"""
@@ -556,12 +928,21 @@ def __getattr__(self, name):
raise AttributeError(name)
@lazyproperty
+ def dtype(self):
+ recformats = [f for idx, f in enumerate(self._recformats)
+ if not self[idx]._phantom]
+ formats = ','.join(recformats)
+ names = [n for idx, n in enumerate(self.names)
+ if not self[idx]._phantom]
+ return np.rec.format_parser(formats, names, None).dtype
+
+ @lazyproperty
def _arrays(self):
return [col.array for col in self.columns]
@lazyproperty
def _recformats(self):
- return [_convert_format(fmt) for fmt in self.formats]
+ return [fmt.recformat for fmt in self.formats]
@lazyproperty
def _dims(self):
@@ -581,7 +962,9 @@ def __len__(self):
def __repr__(self):
rep = 'ColDefs('
- if self.columns:
+ if hasattr(self, 'columns') and self.columns:
+ # The hasattr check is mostly just useful in debugging sessions
+ # where self.columns may not be defined yet
rep += '\n '
rep += '\n '.join([repr(c) for c in self.columns])
rep += '\n'
@@ -636,7 +1019,9 @@ def add_col(self, column):
self._arrays.append(column.array)
# Obliterate caches of certain things
+ del self.dtype
del self._recformats
+ del self._dims
self.columns.append(column)
@@ -661,7 +1046,9 @@ def del_col(self, col_name):
del self._arrays[indx]
# Obliterate caches of certain things
+ del self.dtype
del self._recformats
+ del self._dims
del self.columns[indx]
@@ -770,7 +1157,7 @@ def info(self, attrib='all', output=None):
if attr not in KEYWORD_ATTRIBUTES:
output.write("'%s' is not an attribute of the column "
"definitions.\n" % attr)
- continue
+ continue
output.write("%s:\n" % attr)
output.write(' %s\n' % getattr(self, attr + 's'))
else:
@@ -780,62 +1167,84 @@ def info(self, attrib='all', output=None):
return ret
-class _ASCIIColDefs(ColDefs):
+class _AsciiColDefs(ColDefs):
"""ColDefs implementation for ASCII tables."""
- _ascii_fmt = {'A': 'A1', 'I': 'I10', 'J': 'I15', 'E': 'E15.7',
- 'F': 'F16.7', 'D': 'D25.17'}
-
_padding_byte = ' '
+ _col_format_cls = _AsciiColumnFormat
def __init__(self, input, tbtype='TableHDU'):
- super(_ASCIIColDefs, self).__init__(input, tbtype)
+ super(_AsciiColDefs, self).__init__(input, tbtype)
# if the format of an ASCII column has no width, add one
- if not isinstance(input, _ASCIIColDefs):
- for col in self.columns:
- (type, width) = _convert_ascii_format(col.format)
- if width is None:
- col.format = self._ascii_fmt[col.format]
+ if not isinstance(input, _AsciiColDefs):
+ self._update_field_metrics()
+ else:
+ for idx, s in enumerate(input.starts):
+ self.columns[idx].start = s
+
+ self._spans = input.spans
+ self._width = input._width
@lazyproperty
+ def dtype(self):
+ _itemsize = self.spans[-1] + self.starts[-1] - 1
+ dtype = {}
+
+ for j in range(len(self)):
+ data_type = 'S' + str(self.spans[j])
+ dtype[self.names[j]] = (data_type, self.starts[j] - 1)
+
+ return np.dtype(dtype)
+
+ @property
def spans(self):
- # make sure to consider the case that the starting column of
- # a field may not be the column right after the last field
- end = 0
- spans = [0] * len(self)
- for idx, col in enumerate(self):
- format, width = _convert_ascii_format(col.format)
- if not col.start:
- col.start = end + 1
- end = col.start + width - 1
- spans[idx] = width
- self._width = end
- return spans
+ """A list of the widths of each field in the table."""
+
+ return self._spans
@lazyproperty
def _recformats(self):
if len(self) == 1:
widths = []
else:
widths = [y - x for x, y in pairwise(self.starts)]
- # NOTE: The self._width attribute only exists if this ColDefs was
- # instantiated with a _TableHDU object; make sure that's the only
- # context in which this is used, for now...
- # Touch spans to make sure self.starts is set
- self.spans
+
+ # Widths is the width of each field *including* any space between
+ # fields; this is so that we can map the fields to string records in a
+ # Numpy recarray
widths.append(self._width - self.starts[-1] + 1)
return ['a' + str(w) for w in widths]
def add_col(self, column):
- # Clear existing spans value
- del self.spans
- super(_ASCIIColDefs, self).add_col(column)
+ super(_AsciiColDefs, self).add_col(column)
+ self._update_field_metrics()
def del_col(self, col_name):
- # Clear existing spans value
- del self.spans
- super(_ASCIIColDefs, self).del_col(col_name)
+ super(_AsciiColDefs, self).del_col(col_name)
+ self._update_field_metrics()
+
+ def _update_field_metrics(self):
+ """
+ Updates the list of the start columns, the list of the widths of each
+ field, and the total width of each record in the table.
+ """
+
+ spans = [0] * len(self.columns)
+ end_col = 0 # Refers to the ASCII text column, not the table col
+ for idx, col in enumerate(self.columns):
+ width = col.format.width
+
+ # Update the start columns and column span widths taking into
+ # account the case that the starting column of a field may not
+ # be the column immediately after the previous field
+ if not col.start:
+ col.start = end_col + 1
+ end_col = col.start + width - 1
+ spans[idx] = width
+
+ self._spans = spans
+ self._width = end_col
class _VLF(np.ndarray):
@@ -853,12 +1262,12 @@ def __new__(cls, input, dtype='a'):
try:
# this handles ['abc'] and [['a','b','c']]
# equally, beautiful!
- input = map(lambda x: chararray.array(x, itemsize=1), input)
+ input = [chararray.array(x, itemsize=1) for x in input]
except:
raise ValueError('Inconsistent input data array: %s' % input)
a = np.array(input, dtype=np.object)
- self = np.ndarray.__new__(cls, shape=(len(input)), buffer=a,
+ self = np.ndarray.__new__(cls, shape=(len(input),), buffer=a,
dtype=np.object)
self.max = 0
self.element_dtype = dtype
@@ -934,7 +1343,7 @@ def _get_index(names, key):
return indx
-def _unwrapx(input, output, nx):
+def _unwrapx(input, output, repeat):
"""
Unwrap the X format column into a Boolean array.
@@ -944,43 +1353,43 @@ def _unwrapx(input, output, nx):
input ``Uint8`` array of shape (`s`, `nbytes`)
output
- output Boolean array of shape (`s`, `nx`)
+ output Boolean array of shape (`s`, `repeat`)
- nx
+ repeat
number of bits
"""
pow2 = np.array([128, 64, 32, 16, 8, 4, 2, 1], dtype='uint8')
- nbytes = ((nx - 1) // 8) + 1
+ nbytes = ((repeat - 1) // 8) + 1
for i in range(nbytes):
_min = i * 8
- _max = min((i + 1) * 8, nx)
+ _max = min((i + 1) * 8, repeat)
for j in range(_min, _max):
output[..., j] = np.bitwise_and(input[..., i], pow2[j - i * 8])
-def _wrapx(input, output, nx):
+def _wrapx(input, output, repeat):
"""
Wrap the X format column Boolean array into an ``UInt8`` array.
Parameters
----------
input
- input Boolean array of shape (`s`, `nx`)
+ input Boolean array of shape (`s`, `repeat`)
output
output ``Uint8`` array of shape (`s`, `nbytes`)
- nx
+ repeat
number of bits
"""
output[...] = 0 # reset the output
- nbytes = ((nx - 1) // 8) + 1
- unused = nbytes * 8 - nx
+ nbytes = ((repeat - 1) // 8) + 1
+ unused = nbytes * 8 - repeat
for i in range(nbytes):
_min = i * 8
- _max = min((i + 1) * 8, nx)
+ _max = min((i + 1) * 8, repeat)
for j in range(_min, _max):
if j != _min:
np.left_shift(output[..., i], 1, output[..., i])
@@ -990,9 +1399,9 @@ def _wrapx(input, output, nx):
np.left_shift(output[..., i], unused, output[..., i])
-def _makep(input, desp_output, format, nrows=None):
+def _makep(array, descr_output, format, nrows=None):
"""
- Construct the P format column array, both the data descriptors and
+ Construct the P (or Q) format column array, both the data descriptors and
the data. It returns the output "data" array of data type `dtype`.
The descriptor location will have a zero offset for all columns
@@ -1001,26 +1410,29 @@ def _makep(input, desp_output, format, nrows=None):
Parameters
----------
- input
+ array
input object array
- desp_output
- output "descriptor" array of data type ``Int32``--must be nrows wide in
- its first dimension
+ descr_output
+ output "descriptor" array of data type int32 (for P format arrays) or
+ int64 (for Q format arrays)--must be nrows long in its first dimension
format
- the _FormatP object reperesenting the format of the variable array
+ the _FormatP object representing the format of the variable array
nrows : int, optional
number of rows to create in the column; defaults to the number of rows
in the input array
"""
+ # TODO: A great deal of this is redundant with FITS_rec._convert_p; see if
+ # we can merge the two somehow.
+
_offset = 0
if not nrows:
- nrows = len(input)
- n = min(len(input), nrows)
+ nrows = len(array)
+ n = min(len(array), nrows)
data_output = _VLF([None] * nrows, dtype=format.dtype)
@@ -1030,8 +1442,8 @@ def _makep(input, desp_output, format, nrows=None):
_nbytes = np.array([], dtype=format.dtype).itemsize
for idx in range(nrows):
- if idx < len(input):
- rowval = input[idx]
+ if idx < len(array):
+ rowval = array[idx]
else:
if format.dtype == 'a':
rowval = ' ' * data_output.max
@@ -1043,30 +1455,85 @@ def _makep(input, desp_output, format, nrows=None):
else:
data_output[idx] = np.array(rowval, dtype=format.dtype)
- desp_output[idx, 0] = len(data_output[idx])
- desp_output[idx, 1] = _offset
+ descr_output[idx, 0] = len(data_output[idx])
+ descr_output[idx, 1] = _offset
_offset += len(data_output[idx]) * _nbytes
return data_output
def _parse_tformat(tform):
- """Parse the ``TFORM`` value into `repeat`, `dtype`, and `option`."""
+ """Parse ``TFORMn`` keyword for a binary table into a
+ ``(repeat, format, option)`` tuple.
+ """
try:
- (repeat, dtype, option) = TFORMAT_RE.match(tform.strip()).groups()
+ (repeat, format, option) = TFORMAT_RE.match(tform.strip()).groups()
except:
# TODO: Maybe catch this error use a default type (bytes, maybe?) for
# unrecognized column types. As long as we can determine the correct
# byte width somehow..
- raise VerifyError('Format "%s" is not recognized.' % tform)
+ raise VerifyError('Format %r is not recognized.' % tform)
if repeat == '':
repeat = 1
else:
repeat = int(repeat)
- return (repeat, dtype, option)
+ return (repeat, format.upper(), option)
+
+
+def _parse_ascii_tformat(tform):
+ """Parse the ``TFORMn`` keywords for ASCII tables into a
+ ``(format, width, precision)`` tuple (the latter is zero unless
+ width is one of 'E', 'F', or 'D').
+ """
+
+ match = TFORMAT_ASCII_RE.match(tform.strip())
+ if not match:
+ raise VerifyError('Format %r is not recognized.' % tform)
+
+ # Be flexible on case
+ format = match.group('format')
+ if format is None:
+ # Floating point format
+ format = match.group('formatf').upper()
+ width = match.group('widthf') or 0
+ precision = match.group('precision') or 1
+ else:
+ format = format.upper()
+ width = match.group('width') or 0
+ precision = 0
+
+ def convert_int(val):
+ msg = ('Format %r is not valid--field width and decimal precision '
+ 'must be positive integers.')
+ try:
+ val = int(val)
+ except (ValueError, TypeError):
+ raise VerifyError(msg % tform)
+
+ if val <= 0:
+ raise VerifyError(msg % tform)
+
+ return val
+
+ if width and precision:
+ # This should only be the case for floating-point formats
+ width, precision = convert_int(width), convert_int(precision)
+ elif width:
+ # Just for integer/string formats; ignore precision
+ width = convert_int(width)
+ else:
+ # For any format, if width was unspecified use the set defaults
+ width, precision = ASCII_DEFAULT_WIDTHS[format]
+
+ if precision >= width:
+ raise VerifyError("Format %r not valid--the number of decimal digits "
+ "must be less than the format's total width %s." &
+ (tform, width))
+
+ return format, width, precision
def _parse_tdim(tdim):
@@ -1155,13 +1622,11 @@ def _convert_fits2record(format):
output_format = repeat_str + FITS2NUMPY[dtype]
elif dtype == 'X':
- nbytes = ((repeat - 1) // 8) + 1
- # use an array, even if it is only ONE u1 (i.e. use tuple always)
- output_format = _FormatX(repr((nbytes,)) + 'u1')
- output_format._nx = repeat
-
+ output_format = _FormatX(repeat)
elif dtype == 'P':
output_format = _FormatP.from_tform(format)
+ elif dtype == 'Q':
+ output_format = _FormatQ.from_tform(format)
elif dtype == 'F':
output_format = 'f8'
else:
@@ -1175,22 +1640,22 @@ def _convert_record2fits(format):
Convert record format spec to FITS format spec.
"""
- if isinstance(format, np.dtype):
- shape = format.shape
- kind = format.base.kind
- option = str(format.base.itemsize)
- if kind in ('U', 'S'):
- kind = 'a'
- dtype = kind
+ if not isinstance(format, np.dtype):
+ format = np.dtype(format)
- ndims = len(shape)
- repeat = 1
- if ndims > 0:
- nel = np.array(shape, dtype='i8').prod()
- if nel > 1:
- repeat = nel
- else:
- repeat, dtype, option = _parse_tformat(format)
+ shape = format.shape
+ kind = format.base.kind
+ option = str(format.base.itemsize)
+ if kind in ('U', 'S'):
+ kind = 'a'
+ dtype = kind
+
+ ndims = len(shape)
+ repeat = 1
+ if ndims > 0:
+ nel = np.array(shape, dtype='i8').prod()
+ if nel > 1:
+ repeat = nel
if dtype == 'a':
# This is a kludge that will place string arrays into a
@@ -1225,23 +1690,66 @@ def _convert_format(format, reverse=False):
return _convert_fits2record(format)
-def _convert_ascii_format(input_format):
+def _convert_ascii_format(format, reverse=False):
"""Convert ASCII table format spec to record format spec."""
- ascii2rec = {'A': 'a', 'I': 'i4', 'J': 'i8', 'F': 'f4', 'E': 'f4',
- 'D': 'f8'}
- _re = re.compile(r'(?P<dtype>[AIJFED])(?P<width>[0-9]*)')
-
- # Parse the TFORM value into data type and width.
- try:
- (dtype, width) = _re.match(input_format.strip()).groups()
- dtype = ascii2rec[dtype]
- if width == '':
- width = None
- else:
- width = int(width)
- except KeyError:
- raise ValueError('Illegal format `%s` for ASCII table.'
- % input_format)
+ if reverse:
+ if not isinstance(format, np.dtype):
+ format = np.dtype(format)
- return (dtype, width)
+ kind = format.base.kind
+ itemsize = format.base.itemsize
+ recformat = kind + str(itemsize)
+ if kind in ('U', 'S'):
+ kind = 'a'
+ if kind == 'a':
+ return 'A' + str(itemsize)
+ elif NUMPY2FITS.get(recformat) == 'L':
+ # Special case for logical/boolean types--for ASCII tables we
+ # represent these as single character columns containing 'T' or 'F'
+ # (a la the storage format for Logical columns in binary tables)
+ return 'A1'
+ elif kind == 'i':
+ # Use for the width the maximum required to represent integers
+ # of that byte size plus 1 for signs, but use a minumum of the
+ # default width (to keep with existing behavior)
+ width = 1 + len(str(2 ** (itemsize * 8)))
+ width = max(width, ASCII_DEFAULT_WIDTHS['I'][0])
+ return 'I' + str(width)
+ elif kind == 'f':
+ # This is tricky, but go ahead and use D if float-64, and E
+ # if float-32 with their default widths
+ if itemsize >= 8:
+ format = 'D'
+ else:
+ format = 'E'
+ width = '.'.join(str(w) for w in ASCII_DEFAULT_WIDTHS[format])
+ return format + width
+ # TODO: There may be reasonable ways to represent other Numpy types so
+ # let's see what other possibilities there are besides just 'a', 'i',
+ # and 'f'. If it doesn't have a reasonable ASCII representation then
+ # raise an exception
+ else:
+ format, width, precision = _parse_ascii_tformat(format)
+
+ # This gives a sensible "default" dtype for a given ASCII
+ # format code
+ recformat = ASCII2NUMPY[format]
+
+ # The following logic is taken from CFITSIO:
+ # For integers, if the width <= 4 we can safely use 16-bit ints for all
+ # values [for the non-standard J format code just always force 64-bit]
+ if format == 'I' and width <= 4:
+ recformat = 'i2'
+ elif format == 'F' and width > 7:
+ # 32-bit floats (the default) may not be accurate enough to support
+ # all values that can fit in this field, so upgrade to 64-bit
+ recformat = 'f8'
+ elif format == 'E' and precision > 6:
+ # Again upgrade to a 64-bit int if we require greater decimal
+ # precision
+ recformat = 'f8'
+ elif format == 'A':
+ recformat += str(width)
+
+ return recformat
View
9 astropy/io/fits/connect.py
@@ -216,10 +216,19 @@ def write_table_fits(input, output, overwrite=False):
if input.masked:
table_hdu = BinTableHDU(np.array(input.filled()))
for col in table_hdu.columns:
+ # Binary FITS tables support TNULL *only* for integer data columns
+ # TODO: Determine a schema for handling non-integer masked columns
+ # in FITS (if at all possible)
+ int_formats = ('B', 'I', 'J', 'K')
+ if not (col.format in int_formats or
+ col.format.p_format in int_formats):
+ continue
+
# The astype is necessary because if the string column is less
# than one character, the fill value will be N/A by default which
# is too long, and so no values will get masked.
fill_value = input[col.name].get_fill_value()
+
col.null = fill_value.astype(input[col.name].dtype)
else:
table_hdu = BinTableHDU(np.array(input))
View
52 astropy/io/fits/convenience.py
@@ -55,18 +55,18 @@
"""
-import gzip
import os
import numpy as np
-from .file import PYTHON_MODES, _File
+from .file import FILE_MODES, _File
from .hdu.base import _BaseHDU, _ValidHDU
from .hdu.hdulist import fitsopen
from .hdu.image import PrimaryHDU, ImageHDU
from .hdu.table import BinTableHDU
from .header import Header
-from .util import fileobj_closed, fileobj_name, isfile, _is_int
+from .util import (fileobj_closed, fileobj_name, fileobj_mode,
+ fileobj_closed, _is_int)
from ...utils import deprecated
@@ -555,7 +555,7 @@ def info(filename, output=None, **kwargs):
*Note:* This function sets ``ignore_missing_end=True`` by default.
"""
- mode, closed = _get_file_mode(filename, default='copyonwrite')
+ mode, closed = _get_file_mode(filename, default='readonly')
# Set the default value for the ignore_missing_end parameter
if not 'ignore_missing_end' in kwargs:
kwargs['ignore_missing_end'] = True
@@ -613,12 +613,14 @@ def tabledump(filename, datafile=None, cdfile=None, hfile=None, ext=1,
# and leave the file in the same state (opened or closed) as when
# the function was called
- mode, closed = _get_file_mode(filename, default='copyonwrite')
+ mode, closed = _get_file_mode(filename, default='readonly')
f = fitsopen(filename, mode=mode)
# Create the default data file name if one was not provided
if not datafile:
+ # TODO: Really need to provide a better way to access the name of any
+ # files underlying an HDU
root, tail = os.path.splitext(f._HDUList__file.name)
datafile = root + '_' + repr(ext) + '.txt'
@@ -723,12 +725,12 @@ def _getext(filename, mode, *args, **kwargs):
raise TypeError('Too many positional arguments.')
if (ext is not None and
- not (_is_int(ext) or
- (isinstance(ext, tuple) and len(ext) == 2 and
- isinstance(ext[0], basestring) and _is_int(ext[1])))):
- raise ValueError(
- 'The ext keyword must be either an extension number '
- '(zero-indexed) or a (extname, extver) tuple.')
+ not (_is_int(ext) or
+ (isinstance(ext, tuple) and len(ext) == 2 and
+ isinstance(ext[0], basestring) and _is_int(ext[1])))):
+ raise ValueError(
+ 'The ext keyword must be either an extension number '
+ '(zero-indexed) or a (extname, extver) tuple.')
if extname is not None and not isinstance(extname, basestring):
raise ValueError('The extname argument must be a string.')
if extver is not None and not _is_int(extver):
@@ -785,32 +787,22 @@ def _stat_filename_or_fileobj(filename):
return name, closed, noexist_or_empty
-# TODO: Replace this with fileobj_mode
-def _get_file_mode(filename, default='copyonwrite'):
+def _get_file_mode(filename, default='readonly'):
"""
Allow file object to already be opened in any of the valid modes and
and leave the file in the same state (opened or closed) as when
the function was called.
"""
mode = default
- closed = True
-
- if hasattr(filename, 'closed'):
- closed = filename.closed
- elif hasattr(filename, 'fileobj') and filename.fileobj is not None:
- closed = filename.fileobj.closed
-
- if (isfile(filename) or
- isinstance(filename, gzip.GzipFile) and not closed):
- if isinstance(filename, gzip.GzipFile):
- file_mode = filename.fileobj.mode
- else:
- file_mode = filename.mode
+ closed = fileobj_closed(filename)
- for key, val in PYTHON_MODES.iteritems():
- if val == file_mode:
- mode = key
- break
+ fmode = fileobj_mode(filename)
+ if fmode is not None:
+ mode = FILE_MODES.get(fmode)
+ if mode is None:
+ raise IOError(
+ "File mode of the input file object (%r) cannot be used to "
+ "read/write FITS files." % fmode)
return mode, closed
View
29 astropy/io/fits/diff.py
@@ -13,23 +13,15 @@
import glob
import inspect
import io
-import os
import textwrap
from collections import defaultdict
from itertools import islice, izip
-try:
- from functools import reduce
-except ImportError:
- # Python versions (i.e. 2.5) that don't have functools.reduce will have the
- # reduce() builtin
- pass
-
import numpy as np
-from numpy import char
from ... import __version__
+from ...extern.six.moves import reduce
from ...utils import indent
from .card import Card, BLANK_CARD
from .header import Header
@@ -360,6 +352,9 @@ class HDUDiff(_BaseDiff):
- `diff_extvers`: If the two HDUS have different EXTVER values, this
contains a 2-tuple of the different extension versions.
+ - `diff_extlevels`: If the two HDUs have different EXTLEVEL values, this
+ contains a 2-tuple of the different extension levels.
+
- `diff_extension_types`: If the two HDUs have different XTENSION values,
this contains a 2-tuple of the different extension types.
@@ -390,6 +385,7 @@ def __init__(self, a, b, ignore_keywords=[], ignore_comments=[],
self.diff_extnames = ()
self.diff_extvers = ()
+ self.diff_extlevels = ()
self.diff_extension_types = ()
self.diff_headers = None
self.diff_data = None
@@ -400,12 +396,11 @@ def _diff(self):
if self.a.name != self.b.name:
self.diff_extnames = (self.a.name, self.b.name)
- # TODO: All extension headers should have a .extver attribute;
- # currently they have a hidden ._extver attribute, but there's no
- # reason it should be hidden
- if self.a.header.get('EXTVER') != self.b.header.get('EXTVER'):
- self.diff_extvers = (self.a.header.get('EXTVER'),
- self.b.header.get('EXTVER'))
+ if self.a.ver != self.b.ver:
+ self.diff_extvers = (self.a.ver, self.b.ver)
+
+ if self.a.level != self.b.level:
+ self.diff_extlevels = (self.a.level, self.b.level)
if self.a.header.get('XTENSION') != self.b.header.get('XTENSION'):
self.diff_extension_types = (self.a.header.get('XTENSION'),
@@ -444,6 +439,10 @@ def _report(self):
self._writeln(u(" Extension versions differ:\n a: %s\n b: %s") %
self.diff_extvers)
+ if self.diff_extlevels:
+ self._writeln(" Extension levels differ:\n a: %s\n b: %s" %
+ self.diff_extlevels)
+
if not self.diff_headers.identical:
self._fileobj.write(u('\n'))
self._writeln(u(" Headers contain differences:"))
View
247 astropy/io/fits/file.py
@@ -1,12 +1,10 @@
# Licensed under a 3-clause BSD style license - see PYFITS.rst
-from __future__ import division
-from __future__ import with_statement
+from __future__ import division, with_statement
import gzip
import mmap
import os
-import sys
import tempfile
import urllib
import warnings
@@ -18,12 +16,38 @@
from .util import (isreadable, iswritable, isfile, fileobj_open, fileobj_name,
fileobj_closed, fileobj_mode, _array_from_file,
_array_to_file, _write_string, b)
-from ...utils import deprecated
from ...utils.exceptions import AstropyUserWarning
-# File object open modes
-PYTHON_MODES = {'readonly': 'rb', 'copyonwrite': 'rb', 'update': 'rb+',
- 'append': 'ab+', 'ostream': 'wb', 'denywrite': 'rb'}
+
+# Maps PyFITS-specific file mode names to the appropriate file modes to use
+# for the underlying raw files
+# TODO: This should probably renamed IO_FITS_MODES or something, but since it's
+# used primarily internally I'm going to leave PYFITS in the name for now for
+# in the off chance any third-party software is trying to do anything with this
+# object.
+PYFITS_MODES = {
+ 'readonly': 'rb',
+ 'copyonwrite': 'rb',
+ 'update': 'rb+',
+ 'append': 'ab+',
+ 'ostream': 'wb',
+ 'denywrite': 'rb'}
+
+# This is the old name of the PYFITS_MODES dict; it is maintained here for
+# backwards compatibility and should be removed no sooner than PyFITS 3.4
+PYTHON_MODES = PYFITS_MODES
+
+# Maps OS-level file modes to the appropriate PyFITS specific mode to use
+# when given file objects but no mode specified; obviously in PYFITS_MODES
+# there are overlaps; for example 'readonly' and 'denywrite' both require
+# the file to be opened in 'rb' mode. But 'readonly' is the default
+# behavior for such files if not otherwise specified.
+# Note: 'ab' is only supported for 'ostream' which is output-only.
+FILE_MODES = {
+ 'rb': 'readonly', 'rb+': 'update',
+ 'wb': 'ostream', 'wb+': 'update',
+ 'ab': 'ostream', 'ab+': 'append'}
+
# readonly actually uses copyonwrite for mmap so that readonly without mmap and
# with mmap still have to same behavior with regard to updating the array. To
@@ -52,10 +76,11 @@ class _File(object):
# See self._test_mmap
_mmap_available = None
- def __init__(self, fileobj=None, mode='readonly', memmap=False):
+ def __init__(self, fileobj=None, mode=None, memmap=False, clobber=False):
if fileobj is None:
self.__file = None
self.closed = False
+ self.binary = True
self.mode = mode
self.memmap = memmap
self.compression = None
@@ -66,27 +91,43 @@ def __init__(self, fileobj=None, mode='readonly', memmap=False):
else:
self.simulateonly = False
- if mode not in PYTHON_MODES:
+ if mode is None:
+ if _is_random_access_file_backed(fileobj):
+ fmode = fileobj_mode(fileobj)
+ # If the mode is unsupported just leave it as None; we'll
+ # catch this case below
+ mode = FILE_MODES.get(fmode)
+ else:
+ mode = 'readonly' # The default
+
+ if mode not in PYFITS_MODES:
raise ValueError("Mode '%s' not recognized" % mode)
- if (isinstance(fileobj, basestring) and mode != 'append' and
- not os.path.exists(fileobj) and
- not os.path.splitdrive(fileobj)[0]):
- #
- # Not writing file and file does not exist on local machine and
- # name does not begin with a drive letter (Windows), try to
- # get it over the web.
- #
+ if (isinstance(fileobj, basestring) and
+ mode not in ('ostream', 'append') and
+ not os.path.exists(fileobj)):
+
+ # Not writing file and file does not exist on local machine and
+ # name does not begin with a drive letter (Windows), try to get it
+ # over the web.
try:
- self.name, _ = urllib.urlretrieve(fileobj)
- except (TypeError, ValueError):
+ if not os.path.splitdrive(fileobj)[0]:
+ # Basically if the filename (on Windows anyways) doesn't
+ # have a drive letter try to open it as a URL
+ self.name, _ = urllib.urlretrieve(fileobj)
+ else:
+ # Otherwise the file was already not found so just raise
+ # a ValueError
+ raise ValueError("File not found")
+ except (TypeError, ValueError, IOError):
# A couple different exceptions can occur here when passing a
# filename into urlretrieve in Python 3
raise IOError('File does not exist: %r' % fileobj)
else:
self.name = fileobj_name(fileobj)
self.closed = False
+ self.binary = True
self.mode = mode
self.memmap = memmap
@@ -99,12 +140,12 @@ def __init__(self, fileobj=None, mode='readonly', memmap=False):
self.writeonly = False
# Initialize the internal self.__file object
- if isfile(fileobj) or isinstance(fileobj, gzip.GzipFile):
- self._open_fileobj(fileobj, mode)
+ if _is_random_access_file_backed(fileobj):
+ self._open_fileobj(fileobj, mode, clobber)
elif isinstance(fileobj, basestring):
- self._open_filename(fileobj, mode)
+ self._open_filename(fileobj, mode, clobber)
else:
- self._open_filelike(fileobj, mode)
+ self._open_filelike(fileobj, mode, clobber)
if isinstance(fileobj, gzip.GzipFile):
self.compression = 'gzip'
@@ -159,7 +200,15 @@ def readable(self):
def read(self, size=None):
if not hasattr(self.__file, 'read'):
raise EOFError
- return self.__file.read(size)
+ try:
+ return self.__file.read(size)
+ except IOError:
+ # On some versions of Python, it appears, GzipFile will raise an
+ # IOError if you try to read past its end (as opposed to just
+ # returning '')
+ if self.compression == 'gzip':
+ return ''
+ raise
def readarray(self, size=None, offset=0, dtype=np.uint8, shape=None):
"""
@@ -278,86 +327,128 @@ def close(self):
self.closed = True
- def _open_fileobj(self, fileobj, mode):
+ def _overwrite_existing(self, clobber, fileobj, closed):
+ """Overwrite an existing file if ``clobber`` is ``True``, otherwise
+ raise an IOError. The exact behavior of this method depends on the
+ _File object state and is only meant for use within the ``_open_*``
+ internal methods.
+ """
+
+ # The file will be overwritten...
+ if ((self.file_like and
+ (hasattr(fileobj, 'len') and fileobj.len > 0)) or
+ (os.path.exists(self.name) and
+ os.path.getsize(self.name) != 0)):
+ if clobber:
+ warnings.warn("Overwriting existing file %r." % self.name,
+ AstropyUserWarning)
+ if self.file_like and hasattr(fileobj, 'truncate'):
+ fileobj.truncate(0)
+ else:
+ if not closed:
+ fileobj.close()
+ os.remove(self.name)
+ else:
+ raise IOError("File %r already exists." % self.name)
+
+ def _open_fileobj(self, fileobj, mode, clobber):
"""Open a FITS file from a file object or a GzipFile object."""
closed = fileobj_closed(fileobj)
- fmode = fileobj_mode(fileobj) or PYTHON_MODES[mode]
+ fmode = fileobj_mode(fileobj) or PYFITS_MODES[mode]
+
+ if mode == 'ostream':
+ self._overwrite_existing(clobber, fileobj, closed)
if not closed:
- # In some cases (like on Python 3) a file opened for appending
- # still shows a mode of 'r+', hence the extra check for the append
- # case
- if ((mode == 'append' and fmode not in ('ab+', 'rb+')) or
- (mode != 'append' and PYTHON_MODES[mode] != fmode)):
+ # Although we have a specific mapping in PYFITS_MODES from our
+ # custom file modes to raw file object modes, many of the latter
+ # can be used appropriately for the former. So determine whether
+ # the modes match up appropriately
+ if ((mode in ('readonly', 'denywrite', 'copyonwrite') and
+ not ('r' in fmode or '+' in fmode)) or
+ (mode == 'append' and fmode not in ('ab+', 'rb+')) or
+ (mode == 'ostream' and
+ not ('w' in fmode or 'a' in fmode or '+' in fmode)) or
+ (mode == 'update' and fmode not in ('rb+', 'wb+'))):
raise ValueError(
- "Input mode '%s' (%s) does not match mode of the "
- "input file (%s)." % (mode, PYTHON_MODES[mode], fmode))
+ "Mode argument '%s' does not match mode of the input "
+ "file (%s)." % (mode, fmode))
self.__file = fileobj
elif isfile(fileobj):
- self.__file = fileobj_open(self.name, PYTHON_MODES[mode])
+ self.__file = fileobj_open(self.name, PYFITS_MODES[mode])
+ else:
+ self.__file = gzip.open(self.name, PYFITS_MODES[mode])
+
+ if fmode == 'ab+':
# Return to the beginning of the file--in Python 3 when opening in
# append mode the file pointer is at the end of the file
self.__file.seek(0)
- else:
- self.__file = gzip.open(self.name, PYTHON_MODES[mode])
- def _open_filelike(self, fileobj, mode):
+ def _open_filelike(self, fileobj, mode, clobber):
"""Open a FITS file from a file-like object, i.e. one that has
read and/or write methods.
"""
self.file_like = True
self.__file = fileobj
+ if fileobj_closed(fileobj):
+ raise IOError("Cannot read from/write to a closed file-like "
+ "object (%r)." % fileobj)
+
+ if isinstance(fileobj, zipfile.ZipFile):
+ self._open_zipfile(fileobj, mode)
+ self.__file.seek(0)
+ # We can bypass any additional checks at this point since now
+ # self.__file points to the temp file extracted from the zip
+ return
+
# If there is not seek or tell methods then set the mode to
# output streaming.
if (not hasattr(self.__file, 'seek') or
not hasattr(self.__file, 'tell')):
self.mode = mode = 'ostream'
- if (self.mode in ('copyonwrite', 'update', 'append') and
+ if mode == 'ostream':
+ self._overwrite_existing(clobber, fileobj, False)
+
+ # Any "writeable" mode requires a write() method on the file object
+ if (self.mode in ('update', 'append', 'ostream') and
not hasattr(self.__file, 'write')):
raise IOError("File-like object does not have a 'write' "
"method, required for mode '%s'."
% self.mode)
- if (self.mode in ('readonly', 'denywrite') and
- not hasattr(self.__file, 'read')):
+ # Any mode except for 'ostream' requires readability
+ if self.mode != 'ostream' and not hasattr(self.__file, 'read'):
raise IOError("File-like object does not have a 'read' "
"method, required for mode %r."
% self.mode)
- def _open_filename(self, filename, mode):
+ def _open_filename(self, filename, mode, clobber):
"""Open a FITS file from a filename string."""
+ if mode == 'ostream':
+ self._overwrite_existing(clobber, None, True)
+
if os.path.exists(self.name):
with fileobj_open(self.name, 'rb') as f:
magic = f.read(4)
else:
magic = b('')
+
ext = os.path.splitext(self.name)[1]
+
if ext == '.gz' or magic.startswith(GZIP_MAGIC):
# Handle gzip files
- self.__file = gzip.open(self.name, PYTHON_MODES[mode])
+ self.__file = gzip.open(self.name, PYFITS_MODES[mode])
self.compression = 'gzip'