/
compressed.py
2068 lines (1748 loc) · 83.1 KB
/
compressed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Licensed under a 3-clause BSD style license - see PYFITS.rst
import ctypes
import gc
import itertools
import math
import re
import time
import warnings
from contextlib import suppress
import numpy as np
from astropy.io.fits import conf
from astropy.io.fits._tiled_compression import compress_hdu, decompress_hdu
from astropy.io.fits.card import Card
from astropy.io.fits.column import KEYWORD_NAMES as TABLE_KEYWORD_NAMES
from astropy.io.fits.column import TDEF_RE, ColDefs, Column
from astropy.io.fits.fitsrec import FITS_rec
from astropy.io.fits.header import Header
from astropy.io.fits.util import (
_get_array_mmap,
_is_int,
_is_pseudo_integer,
_pseudo_zero,
)
from astropy.utils import lazyproperty
from astropy.utils.exceptions import AstropyDeprecationWarning, AstropyUserWarning
from .base import BITPIX2DTYPE, DELAYED, DTYPE2BITPIX, ExtensionHDU
from .image import ImageHDU
from .table import BinTableHDU
# This global variable is used e.g., when calling fits.open with
# disable_image_compression which temporarily changes the global variable to
# False. This should ideally be refactored to avoid relying on global module
# variables.
COMPRESSION_ENABLED = True
# Quantization dithering method constants; these are right out of fitsio.h
NO_DITHER = -1
SUBTRACTIVE_DITHER_1 = 1
SUBTRACTIVE_DITHER_2 = 2
QUANTIZE_METHOD_NAMES = {
NO_DITHER: "NO_DITHER",
SUBTRACTIVE_DITHER_1: "SUBTRACTIVE_DITHER_1",
SUBTRACTIVE_DITHER_2: "SUBTRACTIVE_DITHER_2",
}
DITHER_SEED_CLOCK = 0
DITHER_SEED_CHECKSUM = -1
COMPRESSION_TYPES = ("RICE_1", "GZIP_1", "GZIP_2", "PLIO_1", "HCOMPRESS_1")
# Default compression parameter values
DEFAULT_COMPRESSION_TYPE = "RICE_1"
DEFAULT_QUANTIZE_LEVEL = 16.0
DEFAULT_QUANTIZE_METHOD = NO_DITHER
DEFAULT_DITHER_SEED = DITHER_SEED_CLOCK
DEFAULT_HCOMP_SCALE = 0
DEFAULT_HCOMP_SMOOTH = 0
DEFAULT_BLOCK_SIZE = 32
DEFAULT_BYTE_PIX = 4
CMTYPE_ALIASES = {"RICE_ONE": "RICE_1"}
COMPRESSION_KEYWORDS = {
"ZIMAGE",
"ZCMPTYPE",
"ZBITPIX",
"ZNAXIS",
"ZMASKCMP",
"ZSIMPLE",
"ZTENSION",
"ZEXTEND",
}
class CompImageHeader(Header):
"""
Header object for compressed image HDUs designed to keep the compression
header and the underlying image header properly synchronized.
This essentially wraps the image header, so that all values are read from
and written to the image header. However, updates to the image header will
also update the table header where appropriate.
Note that if no image header is passed in, the code will instantiate a
regular `~astropy.io.fits.Header`.
"""
# TODO: The difficulty of implementing this screams a need to rewrite this
# module
_keyword_remaps = {
"SIMPLE": "ZSIMPLE",
"XTENSION": "ZTENSION",
"BITPIX": "ZBITPIX",
"NAXIS": "ZNAXIS",
"EXTEND": "ZEXTEND",
"BLOCKED": "ZBLOCKED",
"PCOUNT": "ZPCOUNT",
"GCOUNT": "ZGCOUNT",
"CHECKSUM": "ZHECKSUM",
"DATASUM": "ZDATASUM",
}
_zdef_re = re.compile(r"(?P<label>^[Zz][a-zA-Z]*)(?P<num>[1-9][0-9 ]*$)?")
_compression_keywords = set(_keyword_remaps.values()).union(
["ZIMAGE", "ZCMPTYPE", "ZMASKCMP", "ZQUANTIZ", "ZDITHER0"]
)
_indexed_compression_keywords = {"ZNAXIS", "ZTILE", "ZNAME", "ZVAL"}
# TODO: Once it place it should be possible to manage some of this through
# the schema system, but it's not quite ready for that yet. Also it still
# makes more sense to change CompImageHDU to subclass ImageHDU :/
def __new__(cls, table_header, image_header=None):
# 2019-09-14 (MHvK): No point wrapping anything if no image_header is
# given. This happens if __getitem__ and copy are called - our super
# class will aim to initialize a new, possibly partially filled
# header, but we cannot usefully deal with that.
# TODO: the above suggests strongly we should *not* subclass from
# Header. See also comment above about the need for reorganization.
if image_header is None:
return Header(table_header)
else:
return super().__new__(cls)
def __init__(self, table_header, image_header):
self._cards = image_header._cards
self._keyword_indices = image_header._keyword_indices
self._rvkc_indices = image_header._rvkc_indices
self._modified = image_header._modified
self._table_header = table_header
# We need to override and Header methods that can modify the header, and
# ensure that they sync with the underlying _table_header
def __setitem__(self, key, value):
# This isn't pretty, but if the `key` is either an int or a tuple we
# need to figure out what keyword name that maps to before doing
# anything else; these checks will be repeated later in the
# super().__setitem__ call but I don't see another way around it
# without some major refactoring
if self._set_slice(key, value, self):
return
if isinstance(key, int):
keyword, index = self._keyword_from_index(key)
elif isinstance(key, tuple):
keyword, index = key
else:
# We don't want to specify and index otherwise, because that will
# break the behavior for new keywords and for commentary keywords
keyword, index = key, None
if self._is_reserved_keyword(keyword):
return
super().__setitem__(key, value)
if index is not None:
remapped_keyword = self._remap_keyword(keyword)
self._table_header[remapped_keyword, index] = value
# Else this will pass through to ._update
def __delitem__(self, key):
if isinstance(key, slice) or self._haswildcard(key):
# If given a slice pass that on to the superclass and bail out
# early; we only want to make updates to _table_header when given
# a key specifying a single keyword
return super().__delitem__(key)
if isinstance(key, int):
keyword, index = self._keyword_from_index(key)
elif isinstance(key, tuple):
keyword, index = key
else:
keyword, index = key, None
if key not in self:
raise KeyError(f"Keyword {key!r} not found.")
super().__delitem__(key)
remapped_keyword = self._remap_keyword(keyword)
if remapped_keyword in self._table_header:
if index is not None:
del self._table_header[(remapped_keyword, index)]
else:
del self._table_header[remapped_keyword]
def append(self, card=None, useblanks=True, bottom=False, end=False):
# This logic unfortunately needs to be duplicated from the base class
# in order to determine the keyword
if isinstance(card, str):
card = Card(card)
elif isinstance(card, tuple):
card = Card(*card)
elif card is None:
card = Card()
elif not isinstance(card, Card):
raise ValueError(
"The value appended to a Header must be either a keyword or "
"(keyword, value, [comment]) tuple; got: {!r}".format(card)
)
if self._is_reserved_keyword(card.keyword):
return
super().append(card=card, useblanks=useblanks, bottom=bottom, end=end)
remapped_keyword = self._remap_keyword(card.keyword)
# card.keyword strips the HIERARCH if present so this must be added
# back to avoid a warning.
if str(card).startswith("HIERARCH ") and not remapped_keyword.startswith(
"HIERARCH "
):
remapped_keyword = "HIERARCH " + remapped_keyword
card = Card(remapped_keyword, card.value, card.comment)
# Here we disable the use of blank cards, because the call above to
# Header.append may have already deleted a blank card in the table
# header, thanks to inheritance: Header.append calls 'del self[-1]'
# to delete a blank card, which calls CompImageHeader.__deltitem__,
# which deletes the blank card both in the image and the table headers!
self._table_header.append(card=card, useblanks=False, bottom=bottom, end=end)
def insert(self, key, card, useblanks=True, after=False):
if isinstance(key, int):
# Determine condition to pass through to append
if after:
if key == -1:
key = len(self._cards)
else:
key += 1
if key >= len(self._cards):
self.append(card, end=True)
return
if isinstance(card, str):
card = Card(card)
elif isinstance(card, tuple):
card = Card(*card)
elif not isinstance(card, Card):
raise ValueError(
"The value inserted into a Header must be either a keyword or "
"(keyword, value, [comment]) tuple; got: {!r}".format(card)
)
if self._is_reserved_keyword(card.keyword):
return
# Now the tricky part is to determine where to insert in the table
# header. If given a numerical index we need to map that to the
# corresponding index in the table header. Although rare, there may be
# cases where there is no mapping in which case we just try the same
# index
# NOTE: It is crucial that remapped_index in particular is figured out
# before the image header is modified
remapped_index = self._remap_index(key)
remapped_keyword = self._remap_keyword(card.keyword)
super().insert(key, card, useblanks=useblanks, after=after)
card = Card(remapped_keyword, card.value, card.comment)
# Here we disable the use of blank cards, because the call above to
# Header.insert may have already deleted a blank card in the table
# header, thanks to inheritance: Header.insert calls 'del self[-1]'
# to delete a blank card, which calls CompImageHeader.__delitem__,
# which deletes the blank card both in the image and the table headers!
self._table_header.insert(remapped_index, card, useblanks=False, after=after)
def _update(self, card):
keyword = card[0]
if self._is_reserved_keyword(keyword):
return
super()._update(card)
if keyword in Card._commentary_keywords:
# Otherwise this will result in a duplicate insertion
return
remapped_keyword = self._remap_keyword(keyword)
self._table_header._update((remapped_keyword,) + card[1:])
# Last piece needed (I think) for synchronizing with the real header
# This one is tricky since _relativeinsert calls insert
def _relativeinsert(self, card, before=None, after=None, replace=False):
keyword = card[0]
if self._is_reserved_keyword(keyword):
return
# Now we have to figure out how to remap 'before' and 'after'
if before is None:
if isinstance(after, int):
remapped_after = self._remap_index(after)
else:
remapped_after = self._remap_keyword(after)
remapped_before = None
else:
if isinstance(before, int):
remapped_before = self._remap_index(before)
else:
remapped_before = self._remap_keyword(before)
remapped_after = None
super()._relativeinsert(card, before=before, after=after, replace=replace)
remapped_keyword = self._remap_keyword(keyword)
card = Card(remapped_keyword, card[1], card[2])
self._table_header._relativeinsert(
card, before=remapped_before, after=remapped_after, replace=replace
)
@classmethod
def _is_reserved_keyword(cls, keyword, warn=True):
msg = (
"Keyword {!r} is reserved for use by the FITS Tiled Image "
"Convention and will not be stored in the header for the "
"image being compressed.".format(keyword)
)
if keyword == "TFIELDS":
if warn:
warnings.warn(msg)
return True
m = TDEF_RE.match(keyword)
if m and m.group("label").upper() in TABLE_KEYWORD_NAMES:
if warn:
warnings.warn(msg)
return True
m = cls._zdef_re.match(keyword)
if m:
label = m.group("label").upper()
num = m.group("num")
if num is not None and label in cls._indexed_compression_keywords:
if warn:
warnings.warn(msg)
return True
elif label in cls._compression_keywords:
if warn:
warnings.warn(msg)
return True
return False
@classmethod
def _remap_keyword(cls, keyword):
# Given a keyword that one might set on an image, remap that keyword to
# the name used for it in the COMPRESSED HDU header
# This is mostly just a lookup in _keyword_remaps, but needs handling
# for NAXISn keywords
is_naxisn = False
if keyword[:5] == "NAXIS":
with suppress(ValueError):
index = int(keyword[5:])
is_naxisn = index > 0
if is_naxisn:
return f"ZNAXIS{index}"
# If the keyword does not need to be remapped then just return the
# original keyword
return cls._keyword_remaps.get(keyword, keyword)
def _remap_index(self, idx):
# Given an integer index into this header, map that to the index in the
# table header for the same card. If the card doesn't exist in the
# table header (generally should *not* be the case) this will just
# return the same index
# This *does* also accept a keyword or (keyword, repeat) tuple and
# obtains the associated numerical index with self._cardindex
if not isinstance(idx, int):
idx = self._cardindex(idx)
keyword, repeat = self._keyword_from_index(idx)
remapped_insert_keyword = self._remap_keyword(keyword)
with suppress(IndexError, KeyError):
idx = self._table_header._cardindex((remapped_insert_keyword, repeat))
return idx
def clear(self):
"""
Remove all cards from the header.
"""
self._table_header.clear()
super().clear()
# TODO: Fix this class so that it doesn't actually inherit from BinTableHDU,
# but instead has an internal BinTableHDU reference
class CompImageHDU(BinTableHDU):
"""
Compressed Image HDU class.
"""
_manages_own_heap = True
"""
The calls to CFITSIO lay out the heap data in memory, and we write it out
the same way CFITSIO organizes it. In principle this would break if a user
manually changes the underlying compressed data by hand, but there is no
reason they would want to do that (and if they do that's their
responsibility).
"""
_default_name = "COMPRESSED_IMAGE"
def __init__(
self,
data=None,
header=None,
name=None,
compression_type=DEFAULT_COMPRESSION_TYPE,
tile_size=None,
hcomp_scale=DEFAULT_HCOMP_SCALE,
hcomp_smooth=DEFAULT_HCOMP_SMOOTH,
quantize_level=DEFAULT_QUANTIZE_LEVEL,
quantize_method=DEFAULT_QUANTIZE_METHOD,
dither_seed=DEFAULT_DITHER_SEED,
do_not_scale_image_data=False,
uint=False,
scale_back=False,
**kwargs,
):
"""
Parameters
----------
data : array, optional
Uncompressed image data
header : `~astropy.io.fits.Header`, optional
Header to be associated with the image; when reading the HDU from a
file (data=DELAYED), the header read from the file
name : str, optional
The ``EXTNAME`` value; if this value is `None`, then the name from
the input image header will be used; if there is no name in the
input image header then the default name ``COMPRESSED_IMAGE`` is
used.
compression_type : str, optional
Compression algorithm: one of
``'RICE_1'``, ``'RICE_ONE'``, ``'PLIO_1'``, ``'GZIP_1'``,
``'GZIP_2'``, ``'HCOMPRESS_1'``
tile_size : int, optional
Compression tile sizes. Default treats each row of image as a
tile.
hcomp_scale : float, optional
HCOMPRESS scale parameter
hcomp_smooth : float, optional
HCOMPRESS smooth parameter
quantize_level : float, optional
Floating point quantization level; see note below
quantize_method : int, optional
Floating point quantization dithering method; can be either
``NO_DITHER`` (-1; default), ``SUBTRACTIVE_DITHER_1`` (1), or
``SUBTRACTIVE_DITHER_2`` (2); see note below
dither_seed : int, optional
Random seed to use for dithering; can be either an integer in the
range 1 to 1000 (inclusive), ``DITHER_SEED_CLOCK`` (0; default), or
``DITHER_SEED_CHECKSUM`` (-1); see note below
Notes
-----
The astropy.io.fits package supports 2 methods of image compression:
1) The entire FITS file may be externally compressed with the gzip
or pkzip utility programs, producing a ``*.gz`` or ``*.zip``
file, respectively. When reading compressed files of this type,
Astropy first uncompresses the entire file into a temporary file
before performing the requested read operations. The
astropy.io.fits package does not support writing to these types
of compressed files. This type of compression is supported in
the ``_File`` class, not in the `CompImageHDU` class. The file
compression type is recognized by the ``.gz`` or ``.zip`` file
name extension.
2) The `CompImageHDU` class supports the FITS tiled image
compression convention in which the image is subdivided into a
grid of rectangular tiles, and each tile of pixels is
individually compressed. The details of this FITS compression
convention are described at the `FITS Support Office web site
<https://fits.gsfc.nasa.gov/registry/tilecompression.html>`_.
Basically, the compressed image tiles are stored in rows of a
variable length array column in a FITS binary table. The
astropy.io.fits recognizes that this binary table extension
contains an image and treats it as if it were an image
extension. Under this tile-compression format, FITS header
keywords remain uncompressed. At this time, Astropy does not
support the ability to extract and uncompress sections of the
image without having to uncompress the entire image.
The astropy.io.fits package supports 3 general-purpose compression
algorithms plus one other special-purpose compression technique that is
designed for data masks with positive integer pixel values. The 3
general purpose algorithms are GZIP, Rice, and HCOMPRESS, and the
special-purpose technique is the IRAF pixel list compression technique
(PLIO). The ``compression_type`` parameter defines the compression
algorithm to be used.
The FITS image can be subdivided into any desired rectangular grid of
compression tiles. With the GZIP, Rice, and PLIO algorithms, the
default is to take each row of the image as a tile. The HCOMPRESS
algorithm is inherently 2-dimensional in nature, so the default in this
case is to take 16 rows of the image per tile. In most cases, it makes
little difference what tiling pattern is used, so the default tiles are
usually adequate. In the case of very small images, it could be more
efficient to compress the whole image as a single tile. Note that the
image dimensions are not required to be an integer multiple of the tile
dimensions; if not, then the tiles at the edges of the image will be
smaller than the other tiles. The ``tile_size`` parameter may be
provided as a list of tile sizes, one for each dimension in the image.
For example a ``tile_size`` value of ``[100,100]`` would divide a 300 X
300 image into 9 100 X 100 tiles.
The 4 supported image compression algorithms are all 'lossless' when
applied to integer FITS images; the pixel values are preserved exactly
with no loss of information during the compression and uncompression
process. In addition, the HCOMPRESS algorithm supports a 'lossy'
compression mode that will produce larger amount of image compression.
This is achieved by specifying a non-zero value for the ``hcomp_scale``
parameter. Since the amount of compression that is achieved depends
directly on the RMS noise in the image, it is usually more convenient
to specify the ``hcomp_scale`` factor relative to the RMS noise.
Setting ``hcomp_scale = 2.5`` means use a scale factor that is 2.5
times the calculated RMS noise in the image tile. In some cases it may
be desirable to specify the exact scaling to be used, instead of
specifying it relative to the calculated noise value. This may be done
by specifying the negative of the desired scale value (typically in the
range -2 to -100).
Very high compression factors (of 100 or more) can be achieved by using
large ``hcomp_scale`` values, however, this can produce undesirable
'blocky' artifacts in the compressed image. A variation of the
HCOMPRESS algorithm (called HSCOMPRESS) can be used in this case to
apply a small amount of smoothing of the image when it is uncompressed
to help cover up these artifacts. This smoothing is purely cosmetic
and does not cause any significant change to the image pixel values.
Setting the ``hcomp_smooth`` parameter to 1 will engage the smoothing
algorithm.
Floating point FITS images (which have ``BITPIX`` = -32 or -64) usually
contain too much 'noise' in the least significant bits of the mantissa
of the pixel values to be effectively compressed with any lossless
algorithm. Consequently, floating point images are first quantized
into scaled integer pixel values (and thus throwing away much of the
noise) before being compressed with the specified algorithm (either
GZIP, RICE, or HCOMPRESS). This technique produces much higher
compression factors than simply using the GZIP utility to externally
compress the whole FITS file, but it also means that the original
floating point value pixel values are not exactly preserved. When done
properly, this integer scaling technique will only discard the
insignificant noise while still preserving all the real information in
the image. The amount of precision that is retained in the pixel
values is controlled by the ``quantize_level`` parameter. Larger
values will result in compressed images whose pixels more closely match
the floating point pixel values, but at the same time the amount of
compression that is achieved will be reduced. Users should experiment
with different values for this parameter to determine the optimal value
that preserves all the useful information in the image, without
needlessly preserving all the 'noise' which will hurt the compression
efficiency.
The default value for the ``quantize_level`` scale factor is 16, which
means that scaled integer pixel values will be quantized such that the
difference between adjacent integer values will be 1/16th of the noise
level in the image background. An optimized algorithm is used to
accurately estimate the noise in the image. As an example, if the RMS
noise in the background pixels of an image = 32.0, then the spacing
between adjacent scaled integer pixel values will equal 2.0 by default.
Note that the RMS noise is independently calculated for each tile of
the image, so the resulting integer scaling factor may fluctuate
slightly for each tile. In some cases, it may be desirable to specify
the exact quantization level to be used, instead of specifying it
relative to the calculated noise value. This may be done by specifying
the negative of desired quantization level for the value of
``quantize_level``. In the previous example, one could specify
``quantize_level = -2.0`` so that the quantized integer levels differ
by 2.0. Larger negative values for ``quantize_level`` means that the
levels are more coarsely-spaced, and will produce higher compression
factors.
The quantization algorithm can also apply one of two random dithering
methods in order to reduce bias in the measured intensity of background
regions. The default method, specified with the constant
``SUBTRACTIVE_DITHER_1`` adds dithering to the zero-point of the
quantization array itself rather than adding noise to the actual image.
The random noise is added on a pixel-by-pixel basis, so in order
restore each pixel from its integer value to its floating point value
it is necessary to replay the same sequence of random numbers for each
pixel (see below). The other method, ``SUBTRACTIVE_DITHER_2``, is
exactly like the first except that before dithering any pixel with a
floating point value of ``0.0`` is replaced with the special integer
value ``-2147483647``. When the image is uncompressed, pixels with
this value are restored back to ``0.0`` exactly. Finally, a value of
``NO_DITHER`` disables dithering entirely.
As mentioned above, when using the subtractive dithering algorithm it
is necessary to be able to generate a (pseudo-)random sequence of noise
for each pixel, and replay that same sequence upon decompressing. To
facilitate this, a random seed between 1 and 10000 (inclusive) is used
to seed a random number generator, and that seed is stored in the
``ZDITHER0`` keyword in the header of the compressed HDU. In order to
use that seed to generate the same sequence of random numbers the same
random number generator must be used at compression and decompression
time; for that reason the tiled image convention provides an
implementation of a very simple pseudo-random number generator. The
seed itself can be provided in one of three ways, controllable by the
``dither_seed`` argument: It may be specified manually, or it may be
generated arbitrarily based on the system's clock
(``DITHER_SEED_CLOCK``) or based on a checksum of the pixels in the
image's first tile (``DITHER_SEED_CHECKSUM``). The clock-based method
is the default, and is sufficient to ensure that the value is
reasonably "arbitrary" and that the same seed is unlikely to be
generated sequentially. The checksum method, on the other hand,
ensures that the same seed is used every time for a specific image.
This is particularly useful for software testing as it ensures that the
same image will always use the same seed.
"""
compression_type = CMTYPE_ALIASES.get(compression_type, compression_type)
if data is DELAYED:
# Reading the HDU from a file
super().__init__(data=data, header=header)
else:
# Create at least a skeleton HDU that matches the input
# header and data (if any were input)
super().__init__(data=None, header=header)
# Store the input image data
self.data = data
# Update the table header (_header) to the compressed
# image format and to match the input data (if any);
# Create the image header (_image_header) from the input
# image header (if any) and ensure it matches the input
# data; Create the initially empty table data array to
# hold the compressed data.
self._update_header_data(
header,
name,
compression_type=compression_type,
tile_size=tile_size,
hcomp_scale=hcomp_scale,
hcomp_smooth=hcomp_smooth,
quantize_level=quantize_level,
quantize_method=quantize_method,
dither_seed=dither_seed,
)
# TODO: A lot of this should be passed on to an internal image HDU o
# something like that, see ticket #88
self._do_not_scale_image_data = do_not_scale_image_data
self._uint = uint
self._scale_back = scale_back
self._axes = [
self._header.get("ZNAXIS" + str(axis + 1), 0)
for axis in range(self._header.get("ZNAXIS", 0))
]
# store any scale factors from the table header
if do_not_scale_image_data:
self._bzero = 0
self._bscale = 1
else:
self._bzero = self._header.get("BZERO", 0)
self._bscale = self._header.get("BSCALE", 1)
self._bitpix = self._header["ZBITPIX"]
self._orig_bzero = self._bzero
self._orig_bscale = self._bscale
self._orig_bitpix = self._bitpix
def _remove_unnecessary_default_extnames(self, header):
"""Remove default EXTNAME values if they are unnecessary.
Some data files (eg from CFHT) can have the default EXTNAME and
an explicit value. This method removes the default if a more
specific header exists. It also removes any duplicate default
values.
"""
if "EXTNAME" in header:
indices = header._keyword_indices["EXTNAME"]
# Only continue if there is more than one found
n_extname = len(indices)
if n_extname > 1:
extnames_to_remove = [
index for index in indices if header[index] == self._default_name
]
if len(extnames_to_remove) == n_extname:
# Keep the first (they are all the same)
extnames_to_remove.pop(0)
# Remove them all in reverse order to keep the index unchanged.
for index in reversed(sorted(extnames_to_remove)):
del header[index]
@property
def name(self):
# Convert the value to a string to be flexible in some pathological
# cases (see ticket #96)
# Similar to base class but uses .header rather than ._header
return str(self.header.get("EXTNAME", self._default_name))
@name.setter
def name(self, value):
# This is a copy of the base class but using .header instead
# of ._header to ensure that the name stays in sync.
if not isinstance(value, str):
raise TypeError("'name' attribute must be a string")
if not conf.extension_name_case_sensitive:
value = value.upper()
if "EXTNAME" in self.header:
self.header["EXTNAME"] = value
else:
self.header["EXTNAME"] = (value, "extension name")
@classmethod
def match_header(cls, header):
card = header.cards[0]
if card.keyword != "XTENSION":
return False
xtension = card.value
if isinstance(xtension, str):
xtension = xtension.rstrip()
if xtension not in ("BINTABLE", "A3DTABLE"):
return False
if "ZIMAGE" not in header or not header["ZIMAGE"]:
return False
return COMPRESSION_ENABLED
def _update_header_data(
self,
image_header,
name=None,
compression_type=None,
tile_size=None,
hcomp_scale=None,
hcomp_smooth=None,
quantize_level=None,
quantize_method=None,
dither_seed=None,
):
"""
Update the table header (`_header`) to the compressed
image format and to match the input data (if any). Create
the image header (`_image_header`) from the input image
header (if any) and ensure it matches the input
data. Create the initially-empty table data array to hold
the compressed data.
This method is mainly called internally, but a user may wish to
call this method after assigning new data to the `CompImageHDU`
object that is of a different type.
Parameters
----------
image_header : `~astropy.io.fits.Header`
header to be associated with the image
name : str, optional
the ``EXTNAME`` value; if this value is `None`, then the name from
the input image header will be used; if there is no name in the
input image header then the default name 'COMPRESSED_IMAGE' is used
compression_type : str, optional
compression algorithm 'RICE_1', 'PLIO_1', 'GZIP_1', 'GZIP_2',
'HCOMPRESS_1'; if this value is `None`, use value already in the
header; if no value already in the header, use 'RICE_1'
tile_size : sequence of int, optional
compression tile sizes as a list; if this value is `None`, use
value already in the header; if no value already in the header,
treat each row of image as a tile
hcomp_scale : float, optional
HCOMPRESS scale parameter; if this value is `None`, use the value
already in the header; if no value already in the header, use 1
hcomp_smooth : float, optional
HCOMPRESS smooth parameter; if this value is `None`, use the value
already in the header; if no value already in the header, use 0
quantize_level : float, optional
floating point quantization level; if this value is `None`, use the
value already in the header; if no value already in header, use 16
quantize_method : int, optional
floating point quantization dithering method; can be either
NO_DITHER (-1), SUBTRACTIVE_DITHER_1 (1; default), or
SUBTRACTIVE_DITHER_2 (2)
dither_seed : int, optional
random seed to use for dithering; can be either an integer in the
range 1 to 1000 (inclusive), DITHER_SEED_CLOCK (0; default), or
DITHER_SEED_CHECKSUM (-1)
"""
# Clean up EXTNAME duplicates
self._remove_unnecessary_default_extnames(self._header)
image_hdu = ImageHDU(data=self.data, header=self._header)
self._image_header = CompImageHeader(self._header, image_hdu.header)
self._axes = image_hdu._axes
del image_hdu
# Determine based on the size of the input data whether to use the Q
# column format to store compressed data or the P format.
# The Q format is used only if the uncompressed data is larger than
# 4 GB. This is not a perfect heuristic, as one can contrive an input
# array which, when compressed, the entire binary table representing
# the compressed data is larger than 4GB. That said, this is the same
# heuristic used by CFITSIO, so this should give consistent results.
# And the cases where this heuristic is insufficient are extreme and
# almost entirely contrived corner cases, so it will do for now
if self._has_data:
huge_hdu = self.data.nbytes > 2**32
else:
huge_hdu = False
# Update the extension name in the table header
if not name and "EXTNAME" not in self._header:
# Do not sync this with the image header since the default
# name is specific to the table header.
self._header.set(
"EXTNAME",
self._default_name,
"name of this binary table extension",
after="TFIELDS",
)
elif name:
# Force the name into table and image headers.
self.name = name
# Set the compression type in the table header.
if compression_type:
if compression_type not in COMPRESSION_TYPES:
warnings.warn(
"Unknown compression type provided (supported are {}). "
"Default ({}) compression will be used.".format(
", ".join(map(repr, COMPRESSION_TYPES)),
DEFAULT_COMPRESSION_TYPE,
),
AstropyUserWarning,
)
compression_type = DEFAULT_COMPRESSION_TYPE
self._header.set(
"ZCMPTYPE", compression_type, "compression algorithm", after="TFIELDS"
)
else:
compression_type = self._header.get("ZCMPTYPE", DEFAULT_COMPRESSION_TYPE)
compression_type = CMTYPE_ALIASES.get(compression_type, compression_type)
# If the input image header had BSCALE/BZERO cards, then insert
# them in the table header.
if image_header:
bzero = image_header.get("BZERO", 0.0)
bscale = image_header.get("BSCALE", 1.0)
after_keyword = "EXTNAME"
if bscale != 1.0:
self._header.set("BSCALE", bscale, after=after_keyword)
after_keyword = "BSCALE"
if bzero != 0.0:
self._header.set("BZERO", bzero, after=after_keyword)
try:
bitpix_comment = image_header.comments["BITPIX"]
except (AttributeError, KeyError):
bitpix_comment = "data type of original image"
try:
naxis_comment = image_header.comments["NAXIS"]
except (AttributeError, KeyError):
naxis_comment = "dimension of original image"
# Set the label for the first column in the table
self._header.set(
"TTYPE1", "COMPRESSED_DATA", "label for field 1", after="TFIELDS"
)
# Set the data format for the first column. It is dependent
# on the requested compression type.
if compression_type == "PLIO_1":
tform1 = "1QI" if huge_hdu else "1PI"
else:
tform1 = "1QB" if huge_hdu else "1PB"
self._header.set(
"TFORM1",
tform1,
"data format of field: variable length array",
after="TTYPE1",
)
# Create the first column for the table. This column holds the
# compressed data.
col1 = Column(name=self._header["TTYPE1"], format=tform1)
# Create the additional columns required for floating point
# data and calculate the width of the output table.
zbitpix = self._image_header["BITPIX"]
if zbitpix < 0 and quantize_level != 0.0:
# floating point image has 'COMPRESSED_DATA',
# 'UNCOMPRESSED_DATA', 'ZSCALE', and 'ZZERO' columns (unless using
# lossless compression, per CFITSIO)
ncols = 4
# CFITSIO 3.28 and up automatically use the GZIP_COMPRESSED_DATA
# store floating point data that couldn't be quantized, instead
# of the UNCOMPRESSED_DATA column. There's no way to control
# this behavior so the only way to determine which behavior will
# be employed is via the CFITSIO version
ttype2 = "GZIP_COMPRESSED_DATA"
# The required format for the GZIP_COMPRESSED_DATA is actually
# missing from the standard docs, but CFITSIO suggests it
# should be 1PB, which is logical.
tform2 = "1QB" if huge_hdu else "1PB"
# Set up the second column for the table that will hold any
# uncompressable data.
self._header.set("TTYPE2", ttype2, "label for field 2", after="TFORM1")
self._header.set(
"TFORM2",
tform2,
"data format of field: variable length array",
after="TTYPE2",
)
col2 = Column(name=ttype2, format=tform2)
# Set up the third column for the table that will hold
# the scale values for quantized data.
self._header.set("TTYPE3", "ZSCALE", "label for field 3", after="TFORM2")
self._header.set(
"TFORM3", "1D", "data format of field: 8-byte DOUBLE", after="TTYPE3"
)
col3 = Column(name=self._header["TTYPE3"], format=self._header["TFORM3"])
# Set up the fourth column for the table that will hold
# the zero values for the quantized data.
self._header.set("TTYPE4", "ZZERO", "label for field 4", after="TFORM3")
self._header.set(
"TFORM4", "1D", "data format of field: 8-byte DOUBLE", after="TTYPE4"
)
after = "TFORM4"
col4 = Column(name=self._header["TTYPE4"], format=self._header["TFORM4"])
# Create the ColDefs object for the table
cols = ColDefs([col1, col2, col3, col4])
else:
# default table has just one 'COMPRESSED_DATA' column
ncols = 1
after = "TFORM1"
# remove any header cards for the additional columns that
# may be left over from the previous data
to_remove = ["TTYPE2", "TFORM2", "TTYPE3", "TFORM3", "TTYPE4", "TFORM4"]
for k in to_remove:
try:
del self._header[k]
except KeyError:
pass
# Create the ColDefs object for the table