Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| <?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.1d1"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">03722</article-id><article-id pub-id-type="doi">10.7554/eLife.03722</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Neuroscience</subject></subj-group></article-categories><title-group><article-title>Variance predicts salience in central sensory processing</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" id="author-11301"><name><surname>Hermundstad</surname><given-names>Ann M</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-15602"><name><surname>Briguglio</surname><given-names>John J</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-6194"><name><surname>Conte</surname><given-names>Mary M</given-names></name><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-6202" equal-contrib="yes"><name><surname>Victor</surname><given-names>Jonathan D</given-names></name><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-1"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-15603" equal-contrib="yes"><name><surname>Balasubramanian</surname><given-names>Vijay</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-3"/><xref ref-type="other" rid="par-4"/><xref ref-type="other" rid="par-5"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-21765" equal-contrib="yes"><name><surname>Tkačik</surname><given-names>Gašper</given-names></name><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-3"/><xref ref-type="fn" rid="con6"/><xref ref-type="fn" rid="conf1"/></contrib><aff id="aff1"><label>1</label><institution content-type="dept">Department of Physics and Astronomy</institution>, <institution>University of Pennsylvania</institution>, <addr-line><named-content content-type="city">Philadelphia</named-content></addr-line>, <country>United States</country></aff><aff id="aff2"><label>2</label><institution content-type="dept">Laboratoire de Physique Théorique</institution>, <institution>École Normale Supérieure</institution>, <addr-line><named-content content-type="city">Paris</named-content></addr-line>, <country>France</country></aff><aff id="aff3"><label>3</label><institution content-type="dept">Brain and Mind Research Institute</institution>, <institution>Weill Cornell Medical College</institution>, <addr-line><named-content content-type="city">New York</named-content></addr-line>, <country>United States</country></aff><aff id="aff4"><label>4</label><institution>Institute of Science and Technology Austria</institution>, <addr-line><named-content content-type="city">Klosterneuburg</named-content></addr-line>, <country>Austria</country></aff><aff id="aff5"><label>5</label><institution content-type="dept">Initiative for the Theoretical Sciences</institution>, <institution>City University of New York Graduate Center</institution>, <addr-line><named-content content-type="city">New York</named-content></addr-line>, <country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Behrens</surname><given-names>Timothy</given-names></name><role>Reviewing editor</role><aff><institution>Oxford University</institution>, <country>United Kingdom</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>annherm@physics.upenn.edu</email></corresp><fn fn-type="con" id="equal-contrib"><label>†</label><p>These authors contributed equally to this work</p></fn></author-notes><pub-date publication-format="electronic" date-type="pub"><day>14</day><month>11</month><year>2014</year></pub-date><pub-date pub-type="collection"><year>2014</year></pub-date><volume>3</volume><elocation-id>e03722</elocation-id><history><date date-type="received"><day>19</day><month>06</month><year>2014</year></date><date date-type="accepted"><day>13</day><month>11</month><year>2014</year></date></history><permissions><copyright-statement>Copyright © 2014, Hermundstad et al</copyright-statement><copyright-year>2014</copyright-year><copyright-holder>Hermundstad et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/4.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife03722.pdf"/><abstract><object-id pub-id-type="doi">10.7554/eLife.03722.001</object-id><p>Information processing in the sensory periphery is shaped by natural stimulus statistics. In the periphery, a transmission bottleneck constrains performance; thus efficient coding implies that natural signal components with a predictably wider range should be compressed. In a different regime—when sampling limitations constrain performance—efficient coding implies that <italic>more</italic> resources should be allocated to informative features that are more variable. We propose that this regime is relevant for sensory cortex when it extracts complex features from limited numbers of sensory samples. To test this prediction, we use central visual processing as a model: we show that visual sensitivity for local multi-point spatial correlations, described by dozens of independently-measured parameters, can be quantitatively predicted from the structure of natural images. This suggests that efficient coding applies centrally, where it extends to higher-order sensory features and operates in a regime in which sensitivity increases with feature variability.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.001">http://dx.doi.org/10.7554/eLife.03722.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.03722.002</object-id><title>eLife digest</title><p>Our senses are constantly bombarded by sights and sounds, but the capacity of the brain to process all these inputs is finite. The stimuli that contain the most useful information must therefore be prioritized for processing by the brain to ensure that we build up as complete a picture as possible of the world around us. However, the strategy that the brain uses to select certain stimuli—or certain features of stimuli—for processing at the expense of others is unclear.</p><p>Hermundstad et al. have now provided new insights into this process by analyzing how humans respond to artificial stimuli that contain controllable mixtures of features that found in natural stimuli. To do this, Hermundstad et al. selected photographs of the natural world, and measured the brightness of individual pixels. After adjusting images in a way that mimics the human retina, the brightest 50% of the pixels in each photograph were colored white and the remaining 50% were colored black.</p><p>Hermundstad et al. then used statistical techniques to calculate the degree to which the color of pixels could be used to predict the color of their neighbors. In this way, it was possible to calculate the amount of variation throughout the images, and then make computer-generated images in which pixel colorings were more or less predictable than in the natural images.</p><p>Volunteers then performed a task in which they had to locate a computer-generated pattern against a background of random noise. The volunteers were able to locate this target most easily when it contained the same kinds of patterns and features that were meaningful about natural images.</p><p>While this shows that the brain is adapted to prioritize features that are more informative about the natural world, understanding exactly how the brain implements this strategy remains a challenge.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.002">http://dx.doi.org/10.7554/eLife.03722.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>natural scene statistics</kwd><kwd>neural coding</kwd><kwd>visual cortex</kwd><kwd>normative theories</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>human</kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/100000053</institution-id><institution content-type="university">National Eye Institute</institution></institution-wrap></funding-source><award-id>EY07977</award-id><principal-award-recipient><name><surname>Victor</surname><given-names>Jonathan D</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/100000001</institution-id><institution>National Science Foundation</institution></institution-wrap></funding-source><award-id>PHY-1058202</award-id><principal-award-recipient><name><surname>Balasubramanian</surname><given-names>Vijay</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100002428</institution-id><institution>Austrian Science Fund</institution></institution-wrap></funding-source><award-id>FWF P25651</award-id><principal-award-recipient><name><surname>Balasubramanian</surname><given-names>Vijay</given-names></name><name><surname>Tkačik</surname><given-names>Gašper</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/100000053</institution-id><institution content-type="university">National Eye Institute</institution></institution-wrap></funding-source><award-id>Vision Training Grant 5-T32-EY007035-32</award-id><principal-award-recipient><name><surname>Balasubramanian</surname><given-names>Vijay</given-names></name></principal-award-recipient></award-group><award-group id="par-5"><funding-source><institution-wrap><institution>Fondation Pierre Gilles de Gennes</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Balasubramanian</surname><given-names>Vijay</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2.0</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>Psychophysical measures of human sensitivity to visual patterns reveal that the brain preferentially processes those features of stimuli that are more variable in the natural world.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec sec-type="intro" id="s1"><title>Introduction</title><p>Sensory receptor neurons encode signals from the environment, which are then transformed by successive neural layers to support diverse and computationally complex cognitive tasks. A normative understanding of these computations begins in the periphery, where the efficient coding principle—the notion that a sensory system is tuned to the statistics of its natural inputs—has been shown to be a powerful organizing framework (<xref ref-type="bibr" rid="bib7">Barlow, 2001</xref>; <xref ref-type="bibr" rid="bib43">Simoncelli, 2002</xref>). Perhaps the best-known example is that of redundancy removal via predictive coding and spatiotemporal decorrelation. In insects, this is carried out by neural processing (<xref ref-type="bibr" rid="bib30">Laughlin, 1981</xref>; <xref ref-type="bibr" rid="bib54">van Hateren, 1992b</xref>); in vertebrates, fixational eye movements—which precede the first step of neural processing (<xref ref-type="bibr" rid="bib45">Srinivasan et al., 1982</xref>; <xref ref-type="bibr" rid="bib1">Atick and Redlich, 1990</xref>; <xref ref-type="bibr" rid="bib2">Atick et al., 1992</xref>)—play a major role (<xref ref-type="bibr" rid="bib29">Kuang et al., 2012</xref>). This approach was later extended to describe population coding, retinal mosaic structure (<xref ref-type="bibr" rid="bib7">Barlow, 2001</xref>; <xref ref-type="bibr" rid="bib26">Karklin and Simoncelli, 2001</xref>; <xref ref-type="bibr" rid="bib9">Borghuis et al., 2008</xref>; <xref ref-type="bibr" rid="bib4">Balasubramanian and Sterling, 2009</xref>; <xref ref-type="bibr" rid="bib32">Liu et al., 2009</xref>; <xref ref-type="bibr" rid="bib22">Garrigan et al., 2010</xref>; <xref ref-type="bibr" rid="bib39">Ratliff et al., 2010</xref>; <xref ref-type="bibr" rid="bib29">Kuang et al., 2012</xref>), adaptation of neural responses (<xref ref-type="bibr" rid="bib10">Brenner et al., 2000</xref>; <xref ref-type="bibr" rid="bib19">Fairhall et al., 2001</xref>; <xref ref-type="bibr" rid="bib42">Schwartz and Simoncelli, 2001</xref>), and early auditory processing (<xref ref-type="bibr" rid="bib44">Smith and Lewicki, 2006</xref>). Taken together, normative theories based on efficient coding have been successful in explaining aspects of processing in the sensory periphery that are tuned to simple statistical features of the natural world.</p><p>Can we extend such theories beyond the sensory periphery to describe cortical sensitivity to complex sensory features? Normative theories have been successful in predicting the response properties of single cells, including receptive fields in V1 (<xref ref-type="bibr" rid="bib35">Olshausen and Field, 1996</xref>; <xref ref-type="bibr" rid="bib8">Bell and Sejnowski, 1997</xref>; <xref ref-type="bibr" rid="bib51">van Hateren and Ruderman, 1998</xref>; <xref ref-type="bibr" rid="bib52">van Hateren and van der Schaaf, 1998</xref>; <xref ref-type="bibr" rid="bib24">Hyvarinen and Hoyer, 2000</xref>; <xref ref-type="bibr" rid="bib61">Vinje and Gallant, 2000</xref>; <xref ref-type="bibr" rid="bib25">Karklin and Lewicki, 2009</xref>) and spectro-temporal receptive fields in primary auditory cortex (<xref ref-type="bibr" rid="bib13">Carlson and DeWeese, 2002</xref>, <xref ref-type="bibr" rid="bib14">2012</xref>), as well as distributions of tuning curves across individual cells in a population (<xref ref-type="bibr" rid="bib31">Lewicki, 2002</xref>; <xref ref-type="bibr" rid="bib21">Ganguli and Simoncelli, 2011</xref>). Some complex features, however, might not be represented by the tuning properties of individual cells in any direct way, but rather emerge from the collective behavior of many cells. Instead of trying to predict individual cell properties, we therefore focus on the sensitivity of the complete neural population. Is there an organizing principle that determines how resources within the population are allocated to representing such complex features?</p><p>When the presence of complex features is predictable (i.e., can be accurately guessed from simpler features along with priors about the environment), mechanisms are best devoted elsewhere (See Discussion, <xref ref-type="bibr" rid="bib53">van Hateren, 1992a</xref>). In contrast, sensory features that are highly variable and not predictable from simpler ones can serve to determine their causes (e.g., to distinguish among materials or objects), a first step in guiding decisions. We will show that these ideas predict a specific organizing principle for aggregate sensitivities arising in cortex: the perceptual salience of complex sensory signals increases with the variability, or unpredictability, of the corresponding signals over the ensemble of natural stimuli.</p><p>To test this hypothesis, we focus on early stages of central visual processing. Here, early visual cortex (V1 and V2) is charged with extracting edges, shapes, and other complex correlations of light between multiple points in space (<xref ref-type="bibr" rid="bib33">Morrone and Burr, 1988</xref>; <xref ref-type="bibr" rid="bib37">Oppenheim and Lim, 1981</xref>; <xref ref-type="bibr" rid="bib62">von der Heydt et al., 1984</xref>). We compare the spatial variation of local patterns of light across natural images with human sensitivity to manipulations of the same patterns in synthetic images. This allows us to determine how sensitivity is distributed across many different features, rather than simply determining the most salient ones. (We will say that a feature is more <italic>salient</italic> if it is more easily discriminated from white noise.) To this end, we parametrize the space of local multi-point correlations in images in terms of a complete set of coordinates, and we measure the probability distribution of coordinate values sampled over a large ensemble of natural scenes. We then use a psychophysical discrimination task to measure human sensitivity to the same correlations in synthetic images, where the correlations can be isolated and manipulated in a mathematically rigorous fashion by varying the corresponding coordinates (<xref ref-type="bibr" rid="bib15">Chubb et al., 2004</xref>; <xref ref-type="bibr" rid="bib58">Victor et al., 2005</xref>; <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>). Comparing the measurements, we show that human sensitivity to these multi-point elements of visual form is tuned to their variation in the natural world. Our result supports a broad hypothesis: cortex invests preferentially in mechanisms that encode unpredictable sensory features that are more variable, and thus more informative about the world. Namely, <italic>variance is salience</italic>.</p></sec><sec sec-type="results" id="s2"><title>Results</title><p>As we recently showed, some informative local correlations of natural scenes are captured by the configurations of luminances seen through a ‘glider’, that is, a window defined by a 2 × 2 square arrangement of pixels (<xref ref-type="bibr" rid="bib48">Tkačik et al., 2010</xref>). We use this observation first as a framework for analyzing the local statistical structure of natural scenes, then to characterize psychophysical sensitivities via a set of synthetic visual texture stimuli, and finally to compare the two.</p><sec id="s2-1"><title>Analyzing local image statistics in natural scenes</title><p>The analysis of natural scenes is schematized in <xref ref-type="fig" rid="fig1">Figure 1</xref>. We collect an ensemble of image patches from the calibrated Penn natural image database (PIDB) (<xref ref-type="bibr" rid="bib49">Tkačik et al., 2011</xref>). We preprocess the image patches as shown in <xref ref-type="fig" rid="fig1">Figure 1A</xref>. This involves first averaging pixel luminances over a square region of <italic>N × N</italic> pixels, which converts an image of size <italic>L</italic><sub><italic>1</italic></sub> <italic>× L</italic><sub><italic>2</italic></sub> pixels into an image of reduced size <italic>L</italic><sub><italic>1</italic></sub><italic>/N × L</italic><sub><italic>2</italic></sub><italic>/N</italic> pixels. Images are then divided into <italic>R × R</italic> square patches of these downsampled pixels and whitened (see ‘Materials and methods’<italic>, Image preprocessing</italic>, for further details). Since the preprocessing depends on a choice of two parameters, the block-average factor <italic>N</italic> and patch size <italic>R</italic>, we report results for multiple image analyses performed using the identical preprocessing pipeline but for various choices of <italic>N</italic> and <italic>R</italic>. After preprocessing, we binarize each patch to have equal numbers of black and white pixels (black = −1, white = +1). We characterize each patch by the histogram of 16 binary colorings (2<sup>2×2</sup>) seen through a square 2 × 2 pixel glider (<xref ref-type="fig" rid="fig1">Figure 1B</xref>). Translation invariance imposes constraints on this histogram, reducing the number of degrees of freedom to 10 (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>). These degrees of freedom can be mapped to a set of image statistic coordinates that separates correlations based on their order: (<italic>i</italic>) one first-order coordinate, <italic>γ</italic>, describes overall luminance, (<italic>ii</italic>) four second-order coordinates, <inline-formula><mml:math id="inf1"><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf2"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf3"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf4"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, describe two-point correlations between pixels arranged vertically, horizontally, or diagonally, (<italic>iii</italic>) four third-order coordinates, <inline-formula><mml:math id="inf5"><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf6"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf7"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf8"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, describe three-point correlations between pixels arranged into ⌞-shapes of different orientations, and (<italic>iv</italic>) one fourth-order coordinate, <italic>α</italic>, describes the single four-point correlation between all four pixels in the glider (<xref ref-type="fig" rid="fig1">Figure 1C</xref>). The binarization step of the preprocessing pipeline forces <italic>γ</italic> to zero, leaving nine coordinates. Each image patch is thus characterized by a vector of coordinate values <inline-formula><mml:math id="inf9"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, that is, a point within the multidimensional space of image statistics. Accumulating these points across patches yields a multidimensional probability distribution that characterizes the local correlations in natural scenes (schematized in <xref ref-type="fig" rid="fig1">Figure 1D</xref>). A total of 724 images (up to 249780 patches, depending on the choice of <italic>N</italic> and <italic>R</italic>), was used to construct this distribution.<fig-group><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.003</object-id><label>Figure 1.</label><caption><title>Extracting image statistics from natural scenes.</title><p>(<bold>A</bold>) We first block-average each image over <italic>N</italic> × <italic>N</italic> pixel squares, then divide it into patches of size <italic>R × R</italic> pixels, then whiten the ensemble of patches by removing the average pairwise structure, and finally binarize each patch about its median intensity value (see ‘Materials and methods’<italic>, Image preprocessing</italic>). (<bold>B</bold>) From each binary patch, we measure the occurrence probability of the 16 possible colorings as seen through a two-by-two pixel glider (red). Translation invariance imposes constraints between the probabilities that reduce the number of degrees of freedom to 10. (<bold>C</bold>) A convenient coordinate basis for these 10° of freedom can be described in terms of correlations between pixels as seen through the glider. These consist of one first-order coordinate (<italic>γ</italic>), four second-order coordinates (<inline-formula><mml:math id="inf10"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>), four third-order coordinates (<inline-formula><mml:math id="inf11"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), and one fourth-order coordinate (<italic>α</italic>). Since the images are binary, with black = −1 and white = +1, these correlations are sums and differences of the 16 probabilities that form the histogram in panel B (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>). (<bold>D</bold>) Each patch is assigned a vector of coordinate values that describes the histogram shown in (<bold>B</bold>). This coordinate vector defines a specific location in the multidimensional space of image statistics. The ensemble of patches is then described by the probability distribution of coordinate values. We compute the degree of variation (standard deviation) along different directions within this distribution (inset). (<bold>E</bold>) Along single coordinate axes, we find that the degree of variation is rank-ordered as <inline-formula><mml:math id="inf12"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mi>α</mml:mi><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, shown separately for different choices of the block-average factor <italic>N</italic> and patch size <italic>R</italic> used during image preprocessing.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.003">http://dx.doi.org/10.7554/eLife.03722.003</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722f001"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.004</object-id><label>Figure 1—figure supplement 1.</label><caption><title>Two-component decomposition of natural image distribution.</title><p>(<bold>A</bold>) The 9-dimensional distribution of natural image statistics is shown projected onto the <inline-formula><mml:math id="inf265"><mml:mrow><mml:mi>α</mml:mi><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> plane, where each point represents a single image patch. Note that it is not possible to see all points in the distribution due to their overlap. (<bold>B</bold>) This distribution is well described by a mixture of two components in which each image patch is assigned to one of the two components. Inspection of the image patches assigned to each component reveals that one component (light gray) contains in-focus patches, while the other component (black) contains blurred patches. Note that the two components are separated in the full 9-dimensional space but appear overlapping when projected onto a single coordinate plane. Insets show semi-transparent versions of the out-of-focus <bold>B</bold>-1 and in-focus <bold>(B</bold>-2 components. We highlight the coordinate values of specific images that are <bold>C</bold> fully in focus, (<bold>D</bold>) blurred due to variations in field of depth, and (<bold>E</bold>) blurred due to camera motion. Spatial distributions of patch assignments (left) and original image patches (right) are shown below each distribution. (<bold>C</bold>) A sharp image is composed of patches that are uniformly assigned to the ‘in-focus’ component. (<bold>D</bold>) An image that is partially out of focus due to variations in field of depth has patches that are assigned to each of the two components. (<bold>E</bold>) An image that is blurred due to camera motion is composed of patches that are uniformly assigned to the “blurry” component.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.004">http://dx.doi.org/10.7554/eLife.03722.004</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs001"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.005</object-id><label>Figure 1—figure supplement 2.</label><caption><title>Filtering via defocus or motion blur reassigns sharp image patches to the ‘blurry’ component.</title><p>(<bold>A</bold>) Images can be blurred due to variations in field of depth (upper row) or camera motion (lower row). A mixture of components (MOC) method separates blurry (black) from in-focus (gray) image patches. Patches assigned to the ‘blurry’ component have larger positive coordinate values (red), showing saturated values of second- and fourth-order coordinates. Blurring due to variations in field of depth tends to uniformly increase all second- and fourth-order statistics. In comparison, motion blurring tends to more strongly increase both the fourth-order statistic and the second-order statistic aligned with the direction of motion (here, <inline-formula><mml:math id="inf266"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>). (<bold>B</bold>) The application of a Gaussian blur filter (middle row) or a motion filter (bottom row) to an in-focus image (top row) produces similar effects; with a sufficiently strong filter (Gaussian blur of <inline-formula><mml:math id="inf267"><mml:mrow><mml:mi>σ</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula> pixels or motion of <inline-formula><mml:math id="inf268"><mml:mrow><mml:mtext>Δ</mml:mtext><mml:mi>h</mml:mi><mml:mo>=</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:math></inline-formula> pixels), all patches in the original ‘in-focus’ image are reassigned to the ‘blurry’ component. Furthermore, both the Gaussian blur and motion filters alter the distribution of image statistics in a consistent manner. Gaussian blur filters increase the values of all second- and fourth-order coordinates, while motion filters more strongly increase the values of the fourth-order coordinate and the second-order coordinate aligned with the direction of motion.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.005">http://dx.doi.org/10.7554/eLife.03722.005</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs002"/></fig><fig id="fig1s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.006</object-id><label>Figure 1—figure supplement 3.</label><caption><title>Image statistics along single coordinate axes for white-noise patches.</title><p>The robustly observed statistical structure of natural scenes (open circles) is completely absent from the same analysis performed on samples of white noise (shaded circles). The inset shows that this holds across analysis parameters.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.006">http://dx.doi.org/10.7554/eLife.03722.006</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs003"/></fig></fig-group></p><p>To summarize this distribution, we compute the degree of variation (standard deviation) along each coordinate axis (<xref ref-type="fig" rid="fig1">Figure 1E</xref>). As is shown, the degree of variation along different coordinate axes exhibits a characteristic rank-ordering, given by <inline-formula><mml:math id="inf13"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mi>α</mml:mi><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>; that is, the most variable correlations are pairwise correlations in the cardinal directions, followed by pairwise correlations in the oblique directions, followed by fourth-order correlations. Interestingly, third-order correlations are the least variable across image patches. An analogous analysis performed on white noise yields a flat distribution with considerably smaller standard deviation values (See ‘Materials and methods’<italic>, Analysis variants for Penn Natural Image Database</italic>, and <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplement 3</xref> for comparison), and performing the analysis on a colored Gaussian noise (e.g. <inline-formula><mml:math id="inf14"><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:msup><mml:mi>f</mml:mi><mml:mi>k</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> spectrum) would also yield a flat distribution because of the whitening stage in the image preprocessing pipeline. These (and subsequent) findings are preserved across different choices of image analysis parameters (shown in <xref ref-type="fig" rid="fig1">Figure 1E</xref> for block-average factors <italic>N =</italic> 2, 4 and patch sizes <italic>R =</italic> 32, 48, 64; see ‘Materials and methods’<italic>, Analysis variants for Penn Natural Image Database</italic>, and <xref ref-type="fig" rid="fig3s5">Figure 3—figure supplement 5A</xref> for a larger set of parameters) and also across other collections of natural images (see ‘Materials and methods’<italic>, Comparison with van Hateren Database</italic>, and <xref ref-type="fig" rid="fig3s5">Figure 3—figure supplement 5B</xref> for a parallel analysis of the van Hateren image dataset (<xref ref-type="bibr" rid="bib52">van Hateren and van der Schaaf, 1998</xref>), which gives similar results).</p></sec><sec id="s2-2"><title>Characterizing visual sensitivity to local image statistics</title><p>To characterize perceptual sensitivity to different statistics, we isolated them in synthetic visual images and used a figure/ground segmentation task (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). We used a four-alternative forced-choice task in which stimuli consisted of a textured target and a binary noise background (or vice-versa). Each stimulus was presented for 120ms and was followed by a noise mask. Subjects were then asked to identify the spatial location (top, bottom, left, or right) of the target. Experiments were carried out for synthetic stimuli in which the target or background was defined by first varying image statistic coordinates independently (<xref ref-type="fig" rid="fig2">Figure 2A</xref> shows examples of gamuts from which stimuli are built). Along each coordinate axis, threshold (1/sensitivity) was defined as the coordinate value required to support a criterion level of performance (<xref ref-type="fig" rid="fig2">Figure 2C</xref>, inset). We then performed further experiments in which the target or background was defined by simultaneously varying pairs of coordinates. For measurements involving each coordinate pair (to which we will refer as a ‘coordinate plane’), we traced out an isodiscrimination contour (<xref ref-type="fig" rid="fig2">Figure 2C</xref>) that describes the threshold values not only along the cardinal directions, but also along oblique directions. Measurements were collected for four individual subjects in each of 11 distinct coordinate planes (representing all distinct coordinate pairs up to 4-fold rotational symmetry; see ‘Materials and methods’<italic>, Psychophysical methods</italic>, for further details). Each subject performed 4320 judgements per plane, for a total of 47,520 trials per subject.<fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.007</object-id><label>Figure 2.</label><caption><title>Measuring human sensitivity to image statistics.</title><p>(<bold>A</bold>) Synthetic binary images can be created that contain specified values of individual image statistic coordinates (as shown here) or specified values of pairs of coordinates (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>). (<bold>B</bold>) To measure human sensitivity to image statistics, we generate synthetic textures with prescribed coordinate values but no additional statistical structure, and we use these synthetic textures in a figure/ground segmentation task (See <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref> and ‘Materials and methods’<italic>, Psychophysical methods</italic>). (<bold>C</bold>) For measurements along coordinate axes, test stimuli are built out of homogeneous samples drawn from the gamuts shown in <bold>A</bold> (e.g. the target shown in B was generated from the portion of the gamut indicated by the red arrow in <bold>A</bold>; See ‘Materials and methods’<italic>, Psychophysical methods</italic>, and <xref ref-type="bibr" rid="bib58">Victor et al., 2005</xref>; <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>). We assess the discriminability of these stimuli from white noise by measuring the threshold value of a coordinate required to achieve performance halfway between chance and perfect (inset). A similar approach is used to measure sensitivity in oblique directions; here, two coordinate values are specified to create the test stimuli. The threshold values along the axes and in oblique directions define an isodiscrimination contour (red dashed ellipse, main panel) in pairwise coordinate planes. (<bold>D</bold>) Along individual coordinate axes, we find that sensitivities (1/thresholds) are rank-ordered as <inline-formula><mml:math id="inf15"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mi>α</mml:mi><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, shown separately for four individual subjects. A single set of perceptual sensitivities is shown for <inline-formula><mml:math id="inf16"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf17"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf18"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, since human subjects are equally sensitive to rotationally-equivalent pairs of second-order coordinates and to all third-order coordinates (<xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.007">http://dx.doi.org/10.7554/eLife.03722.007</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722f002"/></fig></p><p><xref ref-type="fig" rid="fig2">Figure 2D</xref> shows perceptual sensitivities measured along each coordinate axis. For each of four subjects, a similar pattern emerges for sensitivities as was observed for variation in natural image statistics: sensitivities are rank-ordered as <inline-formula><mml:math id="inf19"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>></mml:mo><mml:mi>α</mml:mi><mml:mo>></mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>.</p><p>Note that the difference between the sensitivities in the horizontal and vertical directions (<inline-formula><mml:math id="inf20"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf21"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>) vs the diagonal directions (<inline-formula><mml:math id="inf22"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf23"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>) is not simply an ‘oblique effect’, that is, a greater sensitivity to cardinally- vs obliquely-oriented contours (<xref ref-type="bibr" rid="bib12">Campbell et al., 1966</xref>). Horizontal and vertical pairwise correlations differ from the diagonal pairwise correlations in more than just orientation: pixels involved in horizontal and vertical pairwise correlations share an edge, while pixels involved in diagonal pairwise correlations only share a corner. Correspondingly, the difference in sensitivities for horizontal and vertical correlations vs diagonal correlations is approximately 50%, which is much larger than the size of the classical oblique effect (10–20%) (<xref ref-type="bibr" rid="bib12">Campbell et al., 1966</xref>).</p></sec><sec id="s2-3"><title>Natural scenes predict human sensitivity along single coordinates</title><p><xref ref-type="fig" rid="fig1 fig2">Figures 1E and 2D</xref> show a rank-order correspondence between natural image statistics and perceptual sensitivities. This qualitative comparison can be converted to a quantitative one (<xref ref-type="fig" rid="fig3">Figure 3A</xref>), as a single scaling parameter aligns the standard deviation of natural image statistics with the corresponding perceptual sensitivities. In this procedure, each of the six image analyses is scaled by a single multiplicative factor that minimizes the squared error between the set of standard deviations and the set of subject-averaged sensitivities (see ‘Materials and methods’<italic>, Image preprocessing</italic>, and <xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1</xref> for additional details regarding scaling). The agreement is very good, with the mismatch between image analyses and human psychophysics comparable to the variability from one image analysis to another, or from one human subject to another.<fig-group><fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.008</object-id><label>Figure 3.</label><caption><title>Variation in natural images predicts human perceptual sensitivity.</title><p>(<bold>A</bold>) Scaled degree of variation (standard deviation) in natural image statistics along second- (<italic>β</italic>), third- (<italic>θ</italic>), and fourth-order (<italic>α</italic>) coordinate axes (blue circular markers) are shown in comparison to human perceptual sensitivities measured along the same coordinate axes (red square markers). Degree of variation in natural image statistics is separately shown for different choices of the block-average factor (<italic>N</italic>) and patch size (<italic>R</italic>) used during image preprocessing. Perceptual sensitivities are separately shown for four individual subjects. As in <xref ref-type="fig" rid="fig2">Figure 2C,A</xref> single set of perceptual sensitivities is shown for <inline-formula><mml:math id="inf24"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf25"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf26"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. (<bold>B</bold>) For each pair of coordinates, we compare the precision matrix (blue ellipses) extracted from natural scenes (using <italic>N</italic> = 2, <italic>R</italic> = 32) to human perceptual isodiscrimination contours (red ellipses). Coordinate planes are organized into a grid. The set of ellipses in each pairwise plane is scaled to maximally fill each portion of the grid; agreement between the variation along single coordinate axes and the corresponding human sensitivities (shown in <bold>A</bold>) guarantees that no information is lost by scaling. Across all 36 coordinate planes, there is a correspondence in the shape, size, and orientation of precision matrix contours and perceptual isodiscrimination contours. (<bold>C</bold>) Quantitative comparison of a single image analysis (<italic>N</italic> = 2, <italic>R</italic> = 32) with the subject-averaged psychophysical data. For single coordinates depicted in A, we report the standard deviation in natural image statistics (upper row) and perceptual sensitivities (middle row). For sets of coordinate planes depicted in (<bold>B</bold>), we report the (average eccentricity, angular tilt) of precision matrix contours from natural scenes (upper row) and isodiscrimination contours from psychophysical measurements (middle row). The degree of correspondence between predictions derived from natural image data and the psychophysical measurements can be conveniently summarized as a scalar product (see text), where 1 indicates a perfect match. In all cases, the correspondence is very high (0.938–0.999) and is highly statistically significant (<italic>p</italic> ≤ 0.0003 for both single coordinates and pairwise coordinate planes; see ‘Materials and methods’<italic>, Permutation tests</italic>, for details).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.008">http://dx.doi.org/10.7554/eLife.03722.008</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722f003"/></fig><fig id="fig3s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.009</object-id><label>Figure 3—figure supplement 1.</label><caption><title>Scaling of natural image analyses.</title><p>We scale each image analysis by a single scale factor that minimizes the squared error between the set of nine standard deviations and the set of nine psychophysical sensitivities. The scale factors are shown here as a function of block-average factor <italic>N</italic> for different choices of the patch size <italic>R</italic>. We find that the variance of image statistics decreases with increasing values of <italic>N</italic>, and thus larger values of <italic>N</italic> require a larger scale factor. Similarly, for a given value of <italic>N</italic>, the variance of image statistics increases with increasing <italic>R</italic>, and thus larger values of <italic>R</italic> require a larger scale factor.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.009">http://dx.doi.org/10.7554/eLife.03722.009</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs004"/></fig><fig id="fig3s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.010</object-id><label>Figure 3—figure supplement 2.</label><caption><title>Covariation in natural image statistics predicts human isodiscrimination contours.</title><p>(<bold>A</bold>) For each pair of coordinates, we compare the precision matrix (blue ellipses) extracted from natural scenes (using <italic>N</italic> =2, <italic>R</italic> = 32) to human perceptual isodiscrimination contours (red ellipses). Coordinate planes are organized into a grid, with subject-averaged and subject-specific isodiscrimination contours shown respectively above and below the diagonal of the grid. Across all 36 coordinate planes, there is a correspondence in the shape, size, and orientation of precision matrix contours and perceptual isodiscrimination contours. The quality of the match is quantified by computing the angular tilt (<bold>B</bold>) and eccentricity (<bold>C</bold>) of image-statistic contours (blue circular markers; shown for variations in the block-average factor (<italic>N</italic>) and patch size (<italic>R</italic>) used during image preprocessing) and of perceptual isodiscrimination contours (red square markers; shown for individual subjects). Since contours are highly similar within subsets of coordinate planes (denoted by blocks in A; e.g. the set of <inline-formula><mml:math id="inf269"><mml:mrow><mml:mi>θ</mml:mi><mml:mi>α</mml:mi></mml:mrow></mml:math></inline-formula> planes), contour properties have been averaged within such subsets. Angular tilt and eccentricity are highly consistent between precision matrix contours and perceptual isodiscrimination contours (except for near-circular contours, for which tilt is poorly-defined, as in the case denoted by an arrow).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.010">http://dx.doi.org/10.7554/eLife.03722.010</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs005"/></fig><fig id="fig3s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.011</object-id><label>Figure 3—figure supplement 3.</label><caption><title>Principal axes of variation in natural images predict principal axes of perceptual sensitivity.</title><p>Principal axes <inline-formula><mml:math id="inf270"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of variation in the distribution of natural image statistics are shown in comparison to the principal axes <inline-formula><mml:math id="inf271"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of human sensitivity. Each of the nine principal axes is represented by a vertical gray/white column. Markers (circular = variation in natural image coordinates; square = human perceptual sensitivity) represent the fractional power of the contributions of (<bold>A</bold>) second-order cardinal (<inline-formula><mml:math id="inf272"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>), (<bold>B</bold>) second-order oblique (<inline-formula><mml:math id="inf273"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>), (<bold>C</bold>) third-order (<italic>θ</italic>), and (<bold>D</bold>) fourth-order (<italic>α</italic>) coordinates to each principal axis; all contributions within each column sum to 1. Principal axes components, and the range of variability observed across image analysis variants or across subjects (see legend), are shown in blue for natural scene statistics and in red for perceptual sensitivities.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.011">http://dx.doi.org/10.7554/eLife.03722.011</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs006"/></fig><fig id="fig3s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.012</object-id><label>Figure 3—figure supplement 4.</label><caption><title>Mapping ellipse shapes to the quarter unit sphere.</title><p>We describe an ellipse by the unit vector <inline-formula><mml:math id="inf274"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi><mml:mtext> cos </mml:mtext><mml:mi>δ</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">x</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>+</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi><mml:mtext> sin </mml:mtext><mml:mi>δ</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>+</mml:mo><mml:mtext>cos </mml:mtext><mml:mi>α</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">z</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="inf275"><mml:mrow><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>=</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi></mml:mrow></mml:math></inline-formula> is the eccentricity and <italic>δ</italic> is the angular tilt. In spherical coordinates, the tilt <italic>δ</italic> is the polar angle defined in the <italic>x−y</italic> plane, and the angle <inline-formula><mml:math id="inf276"><mml:mrow><mml:mi>α</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mtext>sin</mml:mtext></mml:mrow><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the azimuthal angle measured from the <italic>z</italic>-direction. In this representation, the unit vector <inline-formula><mml:math id="inf277"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">z</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> corresponds to a circle, and the unit vectors <inline-formula><mml:math id="inf278"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">x</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf279"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> correspond, respectively, to the ellipses that have been maximally elongated (i.e., into lines) in the <inline-formula><mml:math id="inf280"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">x</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf281"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> directions. Points between the equator (in the <italic>x−y</italic> plane) and the pole correspond to ellipses of intermediate eccentricities.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.012">http://dx.doi.org/10.7554/eLife.03722.012</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs007"/></fig><fig id="fig3s5" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.013</object-id><label>Figure 3—figure supplement 5.</label><caption><title>Single coordinate axes: variation in natural images predicts human perceptual sensitivities.</title><p>Scaled variation in natural image statistics measured along second- (<italic>β</italic>), third- (<italic>θ</italic>), and fourth-order (<italic>α</italic>) coordinate axes (blue circular markers) are shown in comparison to human perceptual sensitivities measured along the same coordinates (red square markers). Natural image statistics are extracted from the Penn natural image database (<bold>A</bold>) and the van Hateren image database (<bold>B</bold>). Ranges of variation and human sensitivities are robustly rank-ordered as <inline-formula><mml:math id="inf282"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub><mml:mo>></mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub><mml:mo>></mml:mo><mml:mi>α</mml:mi><mml:mo>></mml:mo><mml:mi>θ</mml:mi></mml:mrow></mml:math></inline-formula>. When each image analysis is scaled by a single factor, ranges match sensitivities.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.013">http://dx.doi.org/10.7554/eLife.03722.013</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs008"/></fig><fig id="fig3s6" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.014</object-id><label>Figure 3—figure supplement 6.</label><caption><title>Pairwise coordinate planes in Penn Natural Image Database: covariation in natural images predicts human isodiscrimination contours.</title><p>(<bold>A</bold>) For each pair of coordinates, we compare the precision matrix (blue ellipses) extracted from natural scenes (using <italic>N</italic> = 2<italic>, R</italic> = 32) to human perceptual isodiscrimination contours (red ellipses). A precision matrix is represented by the contour lines of its inverse (the covariance matrix <italic>M</italic>); these are the points <italic>(x,y)</italic> at which <inline-formula><mml:math id="inf283"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mi>x</mml:mi><mml:mi>y</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>y</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo></mml:mrow></mml:math></inline-formula> constant. A short distance of the blue contour from the origin thus indicates a large value of <italic>M</italic> and a small value of the precision matrix. This in turn denotes a direction in which prior knowledge of the image statistic is imprecise. Our prediction is that psychophysical thresholds (red ellipses) should match these contours. Coordinate planes are organized into a grid, with subject-averaged and subject-specific isodiscrimination contours shown respectively above and below the diagonal of the grid. Across all 36 pairwise coordinate planes, there is a correspondence in the shape, size, and orientation of precision matrix contours and perceptual isodiscrimination contours. The quality of the match is quantified by computing the (<bold>B</bold>) angular tilt and (<bold>C</bold>) eccentricity of image-statistic contours (circular markers) and of perceptual isodiscrimination contours (square markers). Since contours are highly similar within subsets of pairwise planes (denoted by blocks in A; e.g. the set of <inline-formula><mml:math id="inf284"><mml:mrow><mml:mi>θ</mml:mi><mml:mi>α</mml:mi></mml:mrow></mml:math></inline-formula> planes), contour properties have been averaged within such subsets. Angular tilt and eccentricity are highly consistent between precision matrix contours and perceptual isodiscrimination contours.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.014">http://dx.doi.org/10.7554/eLife.03722.014</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs009"/></fig><fig id="fig3s7" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.015</object-id><label>Figure 3—figure supplement 7.</label><caption><title>Pairwise coordinate planes in van Hateren Image Database: covariation in natural images predicts human isodiscrimination contours.</title><p>(<bold>A</bold>) For each pair of coordinates, we compare the precision matrix (blue ellipses) extracted from natural scenes (using <italic>N</italic> = 2, <italic>R</italic> = 32) to human perceptual isodiscrimination contours (red ellipses). A precision matrix is represented by the contour lines of its inverse (the covariance matrix <italic>M</italic>); these are the points <italic>(x,y)</italic> at which <inline-formula><mml:math id="inf285"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mi>x</mml:mi><mml:mi>y</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>y</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo></mml:mrow></mml:math></inline-formula> constant. A short distance of the blue contour from the origin thus indicates a large value of <italic>M</italic> and a small value of the precision matrix. This in turn denotes a direction in which prior knowledge of the image statistic is imprecise. Our prediction is that psychophysical thresholds (red ellipses) should match these contours. Coordinate planes are organized into a grid, with subject-averaged and subject-specific isodiscrimination contours shown respectively above and below the diagonal of the grid. Across all 36 pairwise coordinate planes, there is a correspondence in the shape, size, and orientation of precision matrix contours and perceptual isodiscrimination contours. The quality of the match is quantified by computing the (<bold>B</bold>) angular tilt and (<bold>C</bold>) eccentricity of image-statistic contours (circular markers) and of perceptual isodiscrimination contours (square markers). Since contours are highly similar within subsets of pairwise planes (denoted by blocks in A; e.g. the set of <inline-formula><mml:math id="inf286"><mml:mrow><mml:mi>θ</mml:mi><mml:mi>α</mml:mi></mml:mrow></mml:math></inline-formula> planes), contour properties have been averaged within such subsets. Angular tilt and eccentricity are highly consistent between precision matrix contours and perceptual isodiscrimination contours. Coordinates extracted from the van Hateren database show larger variability in the <inline-formula><mml:math id="inf287"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf288"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> planes than those extracted from the Penn Natural Image Database (<xref ref-type="fig" rid="fig3s6">Figure 3—figure supplement 6</xref>), exhibiting a larger number of low-eccentricity contours for which tilt is poorly defined.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.015">http://dx.doi.org/10.7554/eLife.03722.015</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs010"/></fig><fig id="fig3s8" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.016</object-id><label>Figure 3—figure supplement 8.</label><caption><title>Principal axes of variation across natural images predict principal axes of human perceptual sensitivity in the full coordinate space.</title><p>Principal axes <inline-formula><mml:math id="inf289"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of variation in the distribution of natural image statistics are shown in comparison to the principal axes <inline-formula><mml:math id="inf290"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of human sensitivity. Each of the nine principal axes is represented by a vertical gray/white column. Markers (circular = variation in natural image coordinates; square = human perceptual sensitivity) represent the fractional power of the contributions of (<bold>A</bold>, <bold>E</bold>) second-order cardinal (<inline-formula><mml:math id="inf291"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:msub><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>), (<bold>B</bold>, <bold>F</bold>) second-order oblique (<inline-formula><mml:math id="inf292"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>/</mml:mo><mml:mo>\</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>), C, G third-order (θ), and (<bold>D</bold>, <bold>H</bold>) fourth-order (<italic>α</italic>) coordinates to each principal axis; all contributions within each column sum to 1. Principal axes components, and the range of variability observed across image analysis variants or across subjects (see legend), are shown in blue for natural scene statistics and in red for perceptual sensitivities. There is an excellent match between the blue and red components for both the Penn and van Hateren image databases.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.016">http://dx.doi.org/10.7554/eLife.03722.016</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs011"/></fig><fig id="fig3s9" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.017</object-id><label>Figure 3—figure supplement 9.</label><caption><title>Asymmetries in natural image statistics.</title><p>(<bold>A</bold>) Probability distributions of natural image statistics. Projections of the distribution along second- and fourth-order coordinate axes are asymmetric about the origin, being shifted toward positive values. (<bold>B</bold>) We compute the ratio of standard deviations measured along positive vs negative coordinate axes (circular markers) to the ratio of human sensitivities measured along positive vs negative coordinate axes (square markers). Natural images show larger asymmetries in second- and fourth-order coordinate values than is observed in human sensitivities. This is particularly notable for the <italic>α</italic> coordinate, which shows a 2–6 fold asymmetry in natural images variation but at most a 1.2-fold asymmetry in human sensitivity.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.017">http://dx.doi.org/10.7554/eLife.03722.017</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs012"/></fig></fig-group></p><p>We quantify the correspondence between image analyses and psychophysical analyses by computing the scalar product between the normalized vector of standard deviations (extracted separately from each image analysis) and the normalized vector of subject-averaged sensitivities (extracted from the set of psychophysical analyses). A value of 1 indicates perfect correspondence, and 0 indicates no correspondence. This value ranges from 0.987 to 0.999 across image analyses and is consistently larger than the value measured under the null hypothesis that the apparent correspondence between statistics and sensitivities is chance (<italic>p</italic> ≤ .0003 for each image analysis; see <xref ref-type="table" rid="tbl1 tbl2">Tables 1–2</xref> and ‘Materials and methods’<italic>, Permutation tests</italic>, for details regarding statistical tests).</p><p>These findings support our hypothesis that human perceptual sensitivity measured along single coordinate axes (assessed using synthetic binary textures) is predicted by the degree of variation along the same coordinate axes in natural scenes.</p></sec><sec id="s2-4"><title>Natural scenes predict human sensitivity to joint variations of all pairs of coordinates</title><p>The correspondence shown in <xref ref-type="fig" rid="fig3">Figure 3A</xref> considers each image statistic coordinate in isolation. However, it is known that image statistics covary substantially in natural images (as diagrammed in <xref ref-type="fig" rid="fig1">Figure 1D</xref>) and also that they interact perceptually (as diagrammed in <xref ref-type="fig" rid="fig2">Figure 2C</xref>). When pairs of natural image statistics covary, thus sampling oblique directions not aligned with the coordinate axes in the space of image statistics, our hypothesis predicts that human perceptual sensitivity is matched to both the degree and the direction of that covariation (we are referring here to the orientation of a distribution in the coordinate plane of a pair of image statistics, and not to an orientation in physical space). To test this idea, we proceeded as follows.</p><p>First, we fit the distribution of image statistics with a multidimensional Gaussian. When projected into pairwise coordinate planes, the isoprobability contours of this Gaussian capture the in-plane shape and orientation of the covariation of the distribution. Along single coordinate axes, the variation in natural image statistics predicts human perceptual sensitivities, as we have shown (<xref ref-type="fig" rid="fig3">Figure 3A</xref>). More generally, we would predict that sensitivity should be be high along directions in which the distribution of natural image statistics has high standard deviation, because in those directions, the position of a sample cannot be guessed. Within coordinate planes, the quantitative statement of this idea is that the inverse covariance matrix, or precision matrix, predicts perceptual isodiscrimination contours. Sensitivity is expected to be low (and therefore threshold high) along directions in which the precision matrix has a high value and the position of a sample can be guessed a priori.</p><p>Results in each coordinate plane are shown in <xref ref-type="fig" rid="fig3">Figure 3B</xref>. Across all subjects and all coordinate planes, we find that the shape and orientation of perceptual isodiscrimination contours (red ellipses) are predicted by the distribution of image statistics extracted from natural scenes (blue ellipses). As in <xref ref-type="fig" rid="fig3">Figure 3A</xref>, the correspondence is very good, with mismatch that is comparable to the variability observed across image analyses and across subjects.</p><p>To quantify the correspondence between natural image and psychophysical analyses, we describe each ellipse by a single vector <inline-formula><mml:math id="inf27"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> that combines information about shape (eccentricity) and orientation (angular tilt), and we compute the scalar product between the image analysis vector <inline-formula><mml:math id="inf28"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and the subject-averaged psychophysical vector <inline-formula><mml:math id="inf29"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>. This value, averaged across coordinate planes, ranges from 0.953 to 0.977 across image analyses. We compared this correspondence to that obtained under the null hypotheses that (<italic>i</italic>) the apparent correspondence between image statistic covariances and isodiscrimination contours is chance, or (<italic>ii</italic>) the apparent covariances in image statistics are due to chance. The observed correspondence is much greater than the value measured under either null hypothesis (<italic>p</italic> ≤.0003 for each image analysis under both hypotheses; see ‘Materials and methods’<italic>, Analysis of image statistics in pairwise coordinate planes</italic>, and <xref ref-type="fig" rid="fig3s2">Figure 3—figure supplement 2</xref> for comparisons of eccentricity and tilt, and <xref ref-type="table" rid="tbl1 tbl2 tbl3">Tables 1–3</xref> and ‘Materials and methods’<italic>, Permutation tests</italic>, for statistical tests).</p><p>These findings confirm that the shape and orientation of human isodiscrimination contours, measured across all pairwise combinations of coordinates, can be quantitatively predicted from the covariation of image statistics extracted from natural scenes. The observed correspondence is maintained within the full 9-dimensional coordinate space (see ‘Materials and methods’<italic>, Analysis of the full 9-dimensional distribution of image statistics</italic>, and <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplement 3</xref> for principal component analyses, and <xref ref-type="table" rid="tbl1 tbl2 tbl3">Tables 1–3</xref> and ‘Materials and methods‘, <italic>Permutation tests</italic>, for statistical tests), confirming that our hypothesis describes human sensitivity in the full 9-dimensional space of local image statistics extracted from natural scenes.</p></sec></sec><sec sec-type="discussion" id="s3"><title>Discussion</title><p>How should neural mechanisms be distributed to represent a diverse set of informative sensory features? We argued that, when performance requires inferences limited by sampling of the statistics of input features, resources should be devoted in proportion to feature variability. A basic idea here is that features that take a wider range of possible values are less predictable, and will better distinguish between contexts in the face of input noise. We used this hypothesis to successfully predict human sensitivity to elements of visual form arising from spatial multi-point correlations in images. This result is notable for several reasons. First, we successfully predicted dozens of independent parameters that describe human perceptual sensitivity. The only free parameter was a scale that converted between perceptual sensitivities and natural image statistics. Moreover, predictions about the rank ordering of sensitivities (<xref ref-type="fig" rid="fig3">Figure 3A</xref>) and the shape and orientation of isodiscrimination contours (<xref ref-type="fig" rid="fig3">Figure 3B</xref>) do not even require a scale factor. Second, the theoretical predictions and their psychophysical test were derived from two very different sources. Psychophysical stimuli consisted of mathematically-defined synthetic binary textures with precisely-controlled correlational structure that is unlikely to occur outside of the laboratory. In contrast, the efficient coding predictions were derived from calibrated photographs of natural scenes in which many types of correlations are simultaneously present. Third, predictions refer to multi-point (and not just pairwise) correlations, which are critical for defining local features such as lines and edges (<xref ref-type="bibr" rid="bib37">Oppenheim and Lim, 1981</xref>; <xref ref-type="bibr" rid="bib33">Morrone and Burr, 1988</xref>). In contrast, previous normative theories have have mainly focused on explaining the linear receptive fields of neurons in primary visual (<xref ref-type="bibr" rid="bib35">Olshausen and Field, 1996</xref>; <xref ref-type="bibr" rid="bib8">Bell and Sejnowski, 1997</xref>; <xref ref-type="bibr" rid="bib51">van Hateren and Ruderman, 1998</xref>; <xref ref-type="bibr" rid="bib52">van Hateren and van der Schaaf, 1998</xref>; <xref ref-type="bibr" rid="bib24">Hyvarinen and Hoyer, 2000</xref>; <xref ref-type="bibr" rid="bib61">Vinje and Gallant, 2000</xref>; <xref ref-type="bibr" rid="bib25">Karklin and Lewicki, 2009</xref>) and auditory cortex (<xref ref-type="bibr" rid="bib13">Carlson and DeWeese, 2002</xref>, <xref ref-type="bibr" rid="bib14">2012</xref>), or on deriving symmetry- and coverage-based mesoscopic models of cortical map formation in V1 (<xref ref-type="bibr" rid="bib63">Wolf and Geisel, 1998</xref>; <xref ref-type="bibr" rid="bib47">Swindale et al., 2000</xref>; <xref ref-type="bibr" rid="bib27">Kaschube et al., 2011</xref>). Finally, the efficient coding prediction of greater sensitivity to more variable multipoint correlations is closely tied to the statistical structure of natural visual images. Specifically, this regime applies to highly variable multipoint correlations that cannot be predicted from simpler ones. Some other multipoint correlations (defined on configurations other than a 2 × 2 glider) are also highly variable, but they are predictable from simpler correlations. For these multipoint correlations, visual sensitivity is very low (<xref ref-type="bibr" rid="bib48">Tkačik et al., 2010</xref>), and efficient coding is not applicable in the form proposed here.</p><p>In sum, the surprising predictive power and the high statistical significance of our results provide strong support for the proposed application of the efficient coding hypothesis to cortical processing of complex sensory features.</p><sec id="s3-1"><title>Perceptual salience of multi-point correlations likely arises in cortex</title><p>Although we did not record cortical responses directly, several lines of evidence indicate that that the perceptual thresholds we measured are determined by cortical processes. First, the stimuli had high contrast (100%) and consisted of pixels that were readily visible (14 arcmin), so retinal limitations of contrast sensitivity and resolution were eliminated. Second, the task requires pooling of information over wide areas (100–200 pixels, that is, a region whose diameter is 10–15 times the width of an image element; see Figure 7 in <xref ref-type="bibr" rid="bib56">Victor and Conte, 2005</xref>). Retinal receptive fields are unlikely to do this, as the ratio of their spatial extent (surround size) to their resolution (center size) is typically no more than 4:1 (<xref ref-type="bibr" rid="bib16">Croner and Kaplan, 1995</xref>; <xref ref-type="bibr" rid="bib28">Kremers et al., 1995</xref>). Third, to account for the specificity of sensitivity to three- and four-point correlations, a cascade of two linear-nonlinear stages is required (<xref ref-type="bibr" rid="bib55">Victor and Conte, 1991</xref>); retinal responses are quite well-captured by a single nonlinear stage (<xref ref-type="bibr" rid="bib34">Nirenberg and Pandarinath, 2012</xref>), and cat retinal populations show no sensitivity to the four-point correlations studied used here (<xref ref-type="bibr" rid="bib60">Victor, 1986</xref>) while simultaneous cortical field potentials do. Conversely, macaque visual cortical neurons (<xref ref-type="bibr" rid="bib38">Purpura et al., 1994</xref>), especially those in V2, manifest responses to three- and four-point correlations (<xref ref-type="bibr" rid="bib65">Yu et al., 2013</xref>).</p></sec><sec id="s3-2"><title>Cortex faces a different class of challenges than the sensory periphery</title><p>Successive stages of sensory processing share the same broad goals: invest resources in encoding stimulus features that are sufficiently informative, and suppress less-informative ones. In the periphery, this is exemplified by the well-known suppression of very low spatial frequencies; in cortex, this is exemplified by insensitivity to high-order correlations that are predictable from lower-order ones. Previous work has shown that such higher-order correlations can be separated into two groups—informative and uninformative—and only the informative ones are encoded (<xref ref-type="bibr" rid="bib48">Tkačik et al., 2010</xref>). We used this finding to select an informative subspace for the present study, and we asked how resources should be efficiently allocated amongst features within this informative subspace.</p><p>A simple model of efficient coding by neural populations is shown in <xref ref-type="fig" rid="fig4">Figure 4A</xref> (details in ‘Materials ans methods’<italic>, Two regimes of efficient coding</italic>). Here, to enable analytical calculations, we used linear filters of variable gain and subject to Gaussian noise to model a population of neural channels encoding different features. The optimal allocation of resources to maximize information transmitted by the population depends on the amount of input noise, the amount of output noise, the input signal variability, and the total resources available to the system, here quantified as a constraint on the total output power (i.e., sum of response variances) in the neural population. The constrained output power and the output noise together determine the ‘bandwidth’ of the system—that is, the expressive capacity of its outputs. Consider a neural population with input noise, output noise, and a fixed amount of output power. We find that when input signal variability is sufficiently large compared to the input noise, the gain of neurons should <italic>decrease</italic> with the variance of the input (regions to the right of the peaks in the right-hand panel of <xref ref-type="fig" rid="fig4">Figure 4A</xref>). This is a regime where the output bandwidth is low compared to the input range, and efficient coding predicts that signals should be ‘whitened’ by equalizing the variance in different channels. Conversely, consider input signals with a smaller range, which are thus more disrupted by input noise. In this case, the gain of neurons should <italic>increase</italic> with the variance of the input (regions to the left of the peaks in the right-hand panel of <xref ref-type="fig" rid="fig4">Figure 4A</xref>). This is a regime where the input noise dominates, and efficient coding predicts that the system should invest more resources in more variable, and hence more easily detectable, input signals. The relative sizes of input and output noise (controlled by <inline-formula><mml:math id="inf30"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> in <xref ref-type="fig" rid="fig4">Figure 4A</xref>) determines the input ranges over which the two qualitatively different regimes of efficient coding apply.<fig-group><fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.018</object-id><label>Figure 4.</label><caption><title>Regimes of efficient coding.</title><p>(<bold>A</bold>) To analyze different regimes of efficient coding, we consider a set of channels, where the <inline-formula><mml:math id="inf31"><mml:mrow><mml:msup><mml:mi>k</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> channel carries an input signal with variability <inline-formula><mml:math id="inf32"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. Gaussian noise is added to the input. The result is passed through a linear filter with gain <inline-formula><mml:math id="inf33"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula>, and then Gaussian noise is added to the filter output. We impose a constraint on the total power output of all channels, that is, a constraint on its total resources. With these assumptions, the set of gains that maximizes the transmitted information can be determined (see ‘Materials and methods’<italic>, Two regimes of efficient coding</italic>, and (<xref ref-type="bibr" rid="bib53">van Hateren, 1992a</xref>; <xref ref-type="bibr" rid="bib17">Doi and Lewicki, 2011</xref>; <xref ref-type="bibr" rid="bib18">Doi and Lewicki, 2014</xref>)). This set of gains depends on the relative strengths of input and output noise and on the severity of the power constraint, quantified here by the dimensionless parameter <inline-formula><mml:math id="inf34"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> (right-hand panel). As <inline-formula><mml:math id="inf35"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> decreases from 1 to 0, the system moves from a regime in which output noise is limiting to one in which input noise is limiting. (<bold>B</bold>) The efficient coding model applied to the sensory periphery. Raw luminances from natural images are corrupted with noise (e.g. shot noise resulting from photon incidence) and passed through a linear filter. The resulting signal is carried by the optic nerve, which imposes a strong constraint on output capacity. In the bandwidth limited case where output noise dominates over input noise (e.g., under high light conditions when photon noise is not limiting), the optimal gain decreases as signal variability increases. Since channel input and channel gain vary reciprocally, channel outputs are approximately equalized, resulting in a ‘whitening’, or decorrelation. (<bold>C</bold>) The efficient coding model applied to cortical processing. Informative image features resulting from early cortical processing, caricatured by our preprocessing pipeline as applied to the retinal output, are sampled from a spatial region of the image. This sampling acts as a kind of input noise, because it only provides limited count-based estimates for the true statistical properties of the image source. When this input noise is limiting, the optimal gain <italic>increases</italic> as signal variability increases. Rather than whiten, the output signals preserve the correlational structure of the input. Note that in both regimes (<bold>B</bold>) and (<bold>C</bold>), there is a range of signals that are not encoded at all. These are the signals that are not sufficiently informative to warrant an allocation of resources.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.018">http://dx.doi.org/10.7554/eLife.03722.018</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722f004"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.019</object-id><label>Figure 4—figure supplement 1.</label><caption><title>Schematic representation of channel optimization problem.</title><p>We consider a set of channels, each of which is dedicated to processing an independent signal <inline-formula><mml:math id="inf293"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. Sampling noise (taken here to be unity) is added to the signal <inline-formula><mml:math id="inf294"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, which is then passed through a linear filter <inline-formula><mml:math id="inf295"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> with gain <inline-formula><mml:math id="inf296"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula>. Channel noise (taken here to be unity) is added to the output of <inline-formula><mml:math id="inf297"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. The total dynamic range of all channels is constrained.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.019">http://dx.doi.org/10.7554/eLife.03722.019</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs013"/></fig><fig id="fig4s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.020</object-id><label>Figure 4—figure supplement 2.</label><caption><title>Optimal coding regimes.</title><p>Optimal gain <inline-formula><mml:math id="inf298"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> is shown as a function of signal strength <inline-formula><mml:math id="inf299"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for different choices of the output constraint <inline-formula><mml:math id="inf300"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula>. For signals below a critical strength <inline-formula><mml:math id="inf301"><mml:mrow><mml:msqrt><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>, the optimal gain is zero, and signals are not encoded. The limit <inline-formula><mml:math id="inf302"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> from below defines the transmission-limited regime, while the limit <inline-formula><mml:math id="inf303"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> from above defines the sampling-limited regime. (<bold>A</bold>) Transmission-limited regime. For signal strengths much larger than the critical value, the main constraint is output power, and the optimal gain is inversely proportional to the signal strength (as indicated by the dotted line with negative slope). As <inline-formula><mml:math id="inf304"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>, there is an increasingly sharp transition between signals that are not encoded, and signals that are encoded in inverse proportion to their size (‘whitened’). (<bold>B</bold>) Sampling-limited regime. As <inline-formula><mml:math id="inf305"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>, there is a broadening of the transition between signals that are not encoded, and signals that are whitened. This broadening results in a regime in which sampling-noise is the dominant constraint, and the optimal gain <italic>increases</italic> with signal strength (as indicated by the dotted line with positive slope).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.020">http://dx.doi.org/10.7554/eLife.03722.020</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs014"/></fig><fig id="fig4s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.021</object-id><label>Figure 4—figure supplement 3.</label><caption><title>Noise-dependent transition between efficient coding regimes.</title><p>Total noise is the sum of sampling noise (x-axis) and channel noise (y-axis). In the case considered here (<italic>d</italic> = 2), channel noise cannot exceed 0.5, but sampling noise can. For total noise below 0.5, the optimal filter <bold>L</bold> is antialigned with the signal, and the optimal strategy is decorrelation via whitening (white region, transmission-limited regime). For total noise above 0.5, the optimal filter is aligned with the signal (black region, sampling-limited regime), consistent with our findings that perceptual sensitivity is tuned to the direction and degree of variation in natural image statistics.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.021">http://dx.doi.org/10.7554/eLife.03722.021</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs015"/></fig><fig id="fig4s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.03722.022</object-id><label>Figure 4—figure supplement 4.</label><caption><title>Optimal filter shape and orientation.</title><p>Tilt and eccentricity of the optimal linear filter <inline-formula><mml:math id="inf306"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>∗</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula> for random choices of the input signal <bold>s</bold>. As the magnitudes of sampling and channel noises vary, there emerge two regimes of efficient coding: a transmission-limited regime (<bold>A</bold>–<bold>B</bold>) and a sampling-limited regime (<bold>C</bold>–<bold>D</bold>). In the transmission-limited regime, the maximum filter eigendirection is aligned with the minimum signal eigendirection (and hence there is a difference in tilt of <inline-formula><mml:math id="inf307"><mml:mrow><mml:mi>π</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula>). In contrast, in the sampling-limited regime, the maximum filter eigendirection is aligned with the <italic>maximum</italic> signal eigendirection. Note that a direct comparison of eccentricities between these two regimes can be misleading, due to a reversal of the maximal eigendirections. (<bold>A</bold>) Sampling noise <inline-formula><mml:math id="inf308"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>, channel noise <inline-formula><mml:math id="inf309"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0.2</mml:mn></mml:mrow></mml:math></inline-formula>. The optimal strategy is decorrelation via whitening using a filter aligned perpendicularly to the input signal (right panel) with an eccentricity that matches that of the input signal (dashed line, left panel). (<bold>B</bold>) Sampling noise <inline-formula><mml:math id="inf310"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:mrow></mml:math></inline-formula>, channel noise <inline-formula><mml:math id="inf311"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>. At very low total noise, even with zero channel noise, the optimal strategy is still decorrelation (right panel) using a filter whose eccentricity is less than the eccentricity of the input signal. (<bold>C</bold>) Sampling noise <inline-formula><mml:math id="inf312"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:math></inline-formula>, channel noise <inline-formula><mml:math id="inf313"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> (the low input SNR regime identified in (<xref ref-type="bibr" rid="bib53">van Hateren, 1992a</xref>)). The tilt of the optimal filter is aligned to the tilt of the signal (right panel), and the filter eccentricity is approaching the prediction of the square-root gain relation (curved dotted line, left panel) with decreasing SNR. (<bold>D</bold>) Sampling noise <inline-formula><mml:math id="inf314"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0.4</mml:mn></mml:mrow></mml:math></inline-formula>, channel noise <inline-formula><mml:math id="inf315"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0.35</mml:mn></mml:mrow></mml:math></inline-formula> (dominating sampling noise). For increasing sampling noise strength, the filter eccentricities match the signal eccentricities (dashed line, left panel).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.022">http://dx.doi.org/10.7554/eLife.03722.022</ext-link></p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="elife03722fs016"/></fig></fig-group></p><p>To make these abstract considerations concrete, we first considered coding in the sensory periphery. A common strategy employed in the periphery is ‘whitening’, where relatively fewer resources are devoted (yielding lower gain) to features with more variation (<xref ref-type="bibr" rid="bib35">Olshausen and Field, 1996</xref>). As an example, within the spatial frequency range that the retina captures well, sensitivity is greater for high spatial frequencies than for low ones, that is, sensitivity is inversely related to the degree of variation in natural scenes (the well-known <inline-formula><mml:math id="inf36"><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:msup><mml:mi>f</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> power spectrum [<xref ref-type="bibr" rid="bib35">Olshausen and Field, 1996</xref>]). <xref ref-type="fig" rid="fig4">Figure 4B</xref> illustrates how this strategy can emerge from the simple efficient coding scheme discussed above as applied to peripheral sensory processing. Spatiotemporal correlations of light undergo filtering before passing through the optic nerve bottleneck (a constraint on bandwidth). Such a constraint on bandwidth is equivalently understood as a regime where output noise is relatively large compared to input noise. In this limit, where output noise dominates over input noise, the optimal strategy is whitening (See <xref ref-type="bibr" rid="bib45">Srinivasan et al., 1982</xref> and <xref ref-type="fig" rid="fig4">Figure 4A</xref>). Of course, real neural systems contend with both input and output noise; indeed recent work has shown that simply whitening to deal with output noise underestimates the optimal performance that the sensory periphery can achieve (<xref ref-type="bibr" rid="bib18">Doi and Lewicki, 2014</xref>).</p><p>An alternative regime arises when input noise limits performance. In this regime, relatively <italic>more</italic> resources are devoted to features with more variation. This regime was discussed in early work of <xref ref-type="bibr" rid="bib53">van Hateren, (1992a)</xref> and was also recognized in (<xref ref-type="bibr" rid="bib17">Doi and Lewicki, 2011</xref>, <xref ref-type="bibr" rid="bib18">2014</xref>), although it has received much less attention than the ‘whitening’ regime. Our results suggest that this is the regime is relevant to cortex, where it predicts the relative allocation of resources to higher-order image statistics. <xref ref-type="fig" rid="fig4">Figure 4C</xref> illustrates the simple efficient coding scheme in this context. We use our image preprocessing pipeline to mimic early visual processing, and we consider the downstream coding of higher-order image features. Because these features must be sampled from a finite patch of an image, they are subject to <italic>input</italic> noise arising from fluctuations in statistical estimation. When such input noise is limiting, the ability to detect a signal from noise increases with the variability of that signal. In this limit, efficient coding predicts that resources should be allocated in proportion to feature variability (<xref ref-type="fig" rid="fig4">Figure 4C</xref>). This captures the intuition that when signal reliability is in question, more reliable signals warrant more resources. Furthermore, if two or more channels have covarying signals, resources should be devoted in relation to the direction and degree of maximum covariance (see ‘Materials and methods’<italic>, Two regimes of efficient coding</italic>, <xref ref-type="fig" rid="fig4s3">Figure 4—figure supplement 3</xref>, and <xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4</xref>).</p><p>The difference between these two efficient coding regimes is a consequence of the form of noise—output vs input noise—that is limiting. Our finding that cortex operates in a different regime than the well-known peripheral whitening reflects the fact that different stages and kinds of processing can face different constraints. While information transmission by the visual periphery is limited by a bottleneck in the optic nerve, cortex faces no such transmission constraint. Furthermore, while faithful encoding may be an immediate goal of early visual processing, cortical circuits have to interpret image features from a complex and crowded visual scene and perform statistical inference. For example, to discriminate between various textures, the cortex cannot perform pixel-by-pixel comparisons, but must rely on the estimation of local correlations (image statistics) instead. Because these correlations must be sampled from a finite patch of the visual scene, any estimate will be limited by sampling fluctuations.</p></sec><sec id="s3-3"><title>Sampling constraints vs resource constraints</title><p>Sampling fluctuations constitute a source of input noise, the magnitude of which depends on the size of the sampled region. For natural images, this gives rise to a tradeoff: small regions lead to large fluctuations in the estimated statistics, while large regions blur over local details. This blurring may obscure the boundaries between objects with different surface properties. While the brain must implement such sampling, the size, scale, and potentially dynamic nature of the sampling region is not known. Interestingly, our predictions of human sensitivities do not change substantially over a wide range of spatial scales and image patch sizes, perhaps reflecting a scaling property of natural images (<xref ref-type="bibr" rid="bib46">Stephens et al., 2013</xref>). An avenue for future research is to determine whether there is an optimal region size, and if so, whether it could be estimated from images themselves.</p><p>Sampling limitations alone do not suffice to account for the observed differential sensitivity of the brain to local image statistics. Were sampling limitations the only consideration, perceptual sensitivity would be the same along each coordinate axis, and perceptual isodiscrimination contours would be circular in each coordinate plane. This follows from an ideal observer calculation (See Appendix B of <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>). In contrast, we find that human observers have a severalfold variability in sensitivity along different coordinate axes (<xref ref-type="fig" rid="fig3">Figure 3A</xref>) and have isodiscrimination contours that are elongated in oblique directions (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). The efficient coding principle can account for these findings by taking into consideration the fact that a real observer has finite processing resources. In this context (finite resources <italic>and</italic> substantial input noise), the efficient coding principle predicts that resources are invested in relation to the range of signal values that are typically present (<xref ref-type="bibr" rid="bib53">van Hateren, 1992a</xref>), as we find. Interestingly, resource limitations seem to play an important role in the cortex despite the vast expansion in the number of neurons compared to the optic nerve. Presumably, this reflects the large number of complex features that could be computed and the corresponding need for a large overrepresentation of the stimulus space (<xref ref-type="bibr" rid="bib36">Olshausen and Field, 1997</xref>).</p></sec><sec id="s3-4"><title>Clues to neural mechanisms</title><p>While we find a close match between the variation in natural image statistics and human psychophysical performance, some aspects of the distribution of natural image statistics do not match psychophysical data.</p><p>These differences are not readily apparent when we examine the variances and covariances (<xref ref-type="fig" rid="fig3">Figure 3</xref>) of the distribution of natural image statistics but emerge only when one considers its detailed shape (see ‘Materials and methods’<italic>, Asymmetries in distributions of natural image statistics</italic>). For example, the distribution of <italic>α</italic>-coordinate values has a longer tail in the positive vs negative direction (see <xref ref-type="fig" rid="fig3s9">Figure 3—figure supplement 9</xref> and (<xref ref-type="bibr" rid="bib48">Tkačik et al., 2010</xref>)). In contrast, human perceptual sensitivity is symmetric, or very nearly so (within <inline-formula><mml:math id="inf37"><mml:mrow><mml:mo>∼</mml:mo><mml:mn>20</mml:mn><mml:mtext>%</mml:mtext></mml:mrow></mml:math></inline-formula>), for positive vs negative values of <italic>α</italic> (<xref ref-type="bibr" rid="bib58">Victor et al., 2005</xref>; <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>). This suggests that limitations imposed by ‘neural hardware’ force the system to use heuristics instead of matching the natural image distribution exactly. For example, an opponent mechanism responsible for detecting variations along, example, the <italic>α</italic> coordinate, might be a useful and easy (although imperfect) way to process the asymmetric distribution of four-point correlations found in natural scenes. Such a mechanism could be matched to the variance of the natural image distribution along the <italic>α</italic> coordinate, but not to its skew or other odd moments. An opponent mechanism would necessarily give rise to equal sensitivities to positive vs negative values of <italic>α</italic>, as observed in psychophysical results. Further study of deviations from a perfect match to the distribution of natural image statistics might provide additional insight into these or other possible neural mechanisms, and into the goals of the computations. Independently, our results also raise an interesting theoretical question about the optimal representation of non-gaussian, multidimensional signals under resource-limited conditions.</p></sec><sec id="s3-5"><title>Outlook</title><p>Looking forward, we hypothesize that the principle of efficient coding might apply to cortical processing at higher levels. For example, more complex image features, such as shapes, are represented as conjunctions of contour fragments (<xref ref-type="bibr" rid="bib11">Brincat and Connor, 2004</xref>), where each contour fragment is a local image object defined by particular multi-point correlations. We might speculate that the joint statistics of contour fragments in natural scenes can predict, through appropriate formulation of the same efficient coding principle used here, the properties of neurons in area IT (<xref ref-type="bibr" rid="bib23">Hung et al., 2012</xref>; <xref ref-type="bibr" rid="bib64">Yau et al., 2012</xref>) or the associated perceptual sensitivities of human observers.</p><p>Finally, although we have focused on perception of image statistics, we do this with the premise that this process is in the service of inferring the materials and objects that created an image and ultimately, guiding action. Thus, it is notable that we found a tight correspondence between visual perception and natural scene statistics without considering a specific task or behavioral set. Indeed, the emergence of higher-order percepts without explicit task specification was the original hope of the efficient coding framework as first put forward by Barlow and Attneave (<xref ref-type="bibr" rid="bib3">Attneave, 1954</xref>; <xref ref-type="bibr" rid="bib5">Barlow, 1959</xref>, <xref ref-type="bibr" rid="bib6">1961</xref>). Doubtless, these ‘top-down’ factors also influence the visual computations that underlie perception, and the nature and site of this influence are an important focus of future research.</p></sec></sec><sec sec-type="materials|methods" id="s4"><title>Materials and methods</title><sec id="s4-1"><title>Image preprocessing</title><sec id="s4-1-1"><title>UPenn Natural Image Database</title><p>A database of images was collected in the Okavango Delta, a savannah habitat of Botswana (<xref ref-type="bibr" rid="bib49">Tkačik et al., 2011</xref>). Panoramic, eye-level shots were taken with a Nikon D70 camera during the dry season in midday illumination. Trichromatic images were then converted to equivalent log-luminance images. From this database, we selected a set of 924 images with minimal amounts of sky (see following paragraph).</p></sec><sec id="s4-1-2"><title>Image selection</title><p>Natural images were taken from two different databases: the UPenn Natural Image Database (shown <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref>) and the van Hateren Natural Image Dataset (shown in ‘Materials and methods’<italic>, Comparison with van Hateren Database</italic>). Images from the UPenn Natural Image Database were selected by hand to ensure that they contained no man-made objects. We required that images contained minimal (less that one-third of the total image area) amounts of sky, as the contribution of sky to the overall power spectrum of natural images is well-documented (<xref ref-type="bibr" rid="bib50">Torralba and Oliva, 2003</xref>) and is not the focus of the present study. Images from the van Hateren Natural Image Dataset were chosen subject to the additional constraint that scenery which was clearly the result of human landscaping (e.g. trees all in a line) be excluded. The analyses presented here were performed using the logarithms of the pixel intensities, a standard procedure in the study of natural images (<xref ref-type="bibr" rid="bib40">Ruderman and Bialek, 1994</xref>). However, the results were unchanged if absolute pixel intensities were used instead. For more details about the construction of the images from these sources, see (<xref ref-type="bibr" rid="bib49">Tkačik et al., 2011</xref>) (UPenn dataset) and <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="http://www.kyb.tuebingen.mpg.de/?id=227">http://www.kyb.tuebingen.mpg.de/?id=227</ext-link> (van Hateren dataset).</p></sec><sec id="s4-1-3"><title>Block averaging</title><p>Images of size <italic>L</italic><sub><italic>1</italic></sub> <italic>× L</italic><sub><italic>2</italic></sub> are block-averaged by a factor of <italic>N</italic>, which involves averaging the intensities of pixels arranged into contiguous <italic>N × N</italic> squares. The resulting image is of size <italic>L</italic><sub><italic>1</italic></sub><italic>/N × L</italic><sub><italic>2</italic></sub><italic>/N</italic>. To the extent that natural images are scale invariant (a well-supported hypothesis (<xref ref-type="bibr" rid="bib20">Field, 1987</xref>; <xref ref-type="bibr" rid="bib41">Ruderman, 1997</xref>; <xref ref-type="bibr" rid="bib46">Stephens et al., 2013</xref>)), this procedure leaves the underlying statistics invariant. In our analyses, we block average images by at least a factor of two (thereby eliminating the Nyquist frequencies) in order to avoid sampling artifacts imposed by the camera matrix during image acquisition. In <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref>, we presented two values of <italic>N</italic>: <italic>N</italic> = 2, 4. In ‘Materials and methods’<italic>, Analysis variants for Penn Natural Image Database</italic>, we show that our results are consistent when <italic>N</italic> is extended to include <italic>N =</italic> 8, 12, 16, 20.</p></sec><sec id="s4-1-4"><title>Fourier whitening</title><p>We divide each block-averaged image into square <italic>R × R</italic> patches. In <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref>, we presented results using three values of <italic>R</italic>: <italic>R =</italic> 32, 48, 64. In ‘Materials and methods’<italic>, Analysis variants for Penn Natural Image Database</italic>, we show that our results are consistent when <italic>R</italic> is extended to include <italic>R</italic> = 80, 128.</p><p>To remove global correlations in natural images, we whiten the set of image patches by flattening the Fourier power spectrum of the image patch ensemble. This procedure removes expected ensemble-average (and thus predictable) pairwise correlations, but non-zero pairwise correlations may still exist within individual patches; such correlations are the subject of this study. To carry out this procedure, the whitening filter is the inverse square-root of the ensemble-averaged Fourier power spectrum. For the natural image analyses presented here, the filter has a center-surround structure similar to that observed in the retina.</p><p>Following the whitening procedure, we binarize each image patch about its median pixel intensity. This creates image patches with equal numbers of black and white pixels.</p></sec><sec id="s4-1-5"><title>Removal of blurry images</title><p>In any image database, there will be blurring due to camera motion and focus artifacts. Because we are interested in the statistics computed from in-focus image patches, we use a mixture of components (MOC) method to separate blurred from in-focus image patches.</p><p>To perform this separation, we first examined the 9-dimensional distribution of natural image statistics (see <xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1A</xref> for the projection of the distribution onto the (α, β<sub>−</sub>) plane). When projected onto various coordinate planes, the structure of the distribution suggested that the distribution could be well-described by a weighted sum of two components. We explored this two-component description by running a standard maximum likelihood MOC inference that described each component by a Gaussian distribution. This inference method returned the mean, covariance, and relative weighting of each putative Gaussian component. In this process, each image patch was assigned to one of the two components (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1B</xref>; note that the two components are separated in the 9-dimensional space, although they appear overlapping in this particular projection). After inspecting the clustering of patches into each of the two components, we observed that one of the components contained image patches that are sharp (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1C</xref>), while the other contained patches that are blurry (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1D–E</xref>).</p><p>We performed several controls to show that this separation is precise and effective. We first confirmed, based on visual inspection of a large number of images, that this method reliably separates blurred from in-focus patches. For example, images that were uniformly composed of patches assigned to the ‘blurry’ component were fully blurred due, example, to camera motion (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1E</xref>). Similarly, images in which a large percentage of patches were assigned to the ‘blurry’ component contained large regions that were blurred due to motion or camera focus artifacts (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1D</xref>). Furthermore, the spatial boundary between blurred and in-focus regions in the original image matched the boundary between patches assigned to the ‘blurry’ vs ‘in-focus’ component.</p><p>We additionally tested this method by incrementally removing images that were significantly blurred and then re-running the MOC method. After the removal of each subsequent image, the MOC method returned a mixture of components that was incrementally more strongly weighted toward the ‘in-focus’ component.</p><p>Finally, we tested this method by applying motion and Gaussian blur filters to sharp images (<xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2B</xref>). With a sufficiently strong blurring transformation, all of the patches within a sharp image changed assignment from the ‘in-focus’ to the ‘blurry’ component. Successive block averaging removes the effects of small blur, such that a larger blurring transformation is required to change the assignment of patches from the ‘in-focus’ to the ‘blurred’ component. Furthermore, the application of motion and blur filters altered the spatial distribution of natural image statistics in a manner consistent with the statistics observed in image patches assigned to the ‘blurry’ component via the MOC method (<xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2A</xref>). Both types of blurring increased the values of second- and fourth-order statistics, but they did so in different manners. Camera motion strongly increased both the fourth-order statistic and the second-order statistic aligned parallel to the direction of motion. In comparison, camera focus artifacts (arising, e.g., from variations in field of depth) more uniformly increased all second- and fourth-order statistics.</p></sec><sec id="s4-1-6"><title>Scaling image analyses</title><p>To compare between natural image and psychophysical analyses, we scale the set of 9 standard deviations extracted from a given image analysis by a multiplicative factor that minimizes the squared error between the set of nine standard deviations and the set of nine psychophysical sensitivities. <xref ref-type="fig" rid="fig3s1">Figure 3—figure supplement 1</xref> shows the value of the scale factor for different choices of the block-average factor <italic>N</italic> and patch size <italic>R</italic>. This scaling places the greatest weight on the match between statistics with high variation/sensitivity (i.e. <inline-formula><mml:math id="inf38"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf39"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>). Note that a different choice of scaling factor can shift this weight to different statistics; for example, a scaling factor that minimizes the least squares error between inverse standard deviation and thresholds will place larger weight on the match between statistics with low variation/sensitivity (i.e. <italic>θ</italic> components).</p></sec></sec><sec id="s4-2"><title>Psychophysical methods</title><p>We determined perceptual sensitivity to local image statistics via a texture segmentation paradigm adapted from (<xref ref-type="bibr" rid="bib15">Chubb et al., 2004</xref>), and in standard use in our lab (<xref ref-type="bibr" rid="bib58">Victor et al., 2005</xref>; <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>); we describe it briefly here. These measurements were carried out in parallel with the natural scene analysis described above. Some of the psychophysical results have been previously reported (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>); see ‘Subjects’ below.</p><sec id="s4-2-1"><title>Stimuli</title><p>The basic stimulus consisted of a 64 × 64 black-and-white array of square image elements (‘checks’), in which a target 16 × 64 rectangle of checks was embedded, positioned eight checks from one of the four edges of the array. The target was distinguished from the rest of the array by its local statistical structure (see <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref> for details on the synthesis of these images), which was varied as described below.</p><p>Individual experimental sessions consisted of threshold measurements for each of a pair of image statistic coordinates (i.e., two choices from <inline-formula><mml:math id="inf40"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>), and their pairwise interactions. For the trials used to determine the sensitivity along a coordinate axis, the coordinate was set to one of five equally spaced values; lower-order coordinates were set to 0, and higher-order coordinates were set to their maximum-entropy values (0 for all cases except the <inline-formula><mml:math id="inf41"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>β</mml:mi><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> pair; see (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>) for further details on this point). The highest coordinate value tested was determined from pilot experiments, and was set at 0.45 for <inline-formula><mml:math id="inf42"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf43"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, 0.75 for <inline-formula><mml:math id="inf44"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf45"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, 1.0 for the <italic>θ'</italic>s, and 0.85 for <italic>α</italic>. For the trials used to determine the sensitivity to pairwise combinations of coordinates, each coordinate was given a nonzero value; all sign combinations were used. The ratio of the coordinate magnitudes was fixed, and chosen in approximate proportion to the above maximum values. Two values for each sign combination were studied.</p><p>To ensure that the response was driven by figure/ground segmentation (rather than, say, a texture gradient), two kinds of trials were randomly intermixed: (1) trials in which the target contained the nonzero value(s) of the coordinates and the background was random (i.e., all coordinates set to 0), and (2) trials in which the background had the nonzero values, and the target was random. Targets were equally likely to appear in any of the four possible locations. All trials were intermixed. This amounted to a total of 288 trials per block along eight rays. We collected 15 such blocks per subject (4320 trials) for each coordinate pair, and tested 11 pairs, for a total of 47,520 trials per subject: <inline-formula><mml:math id="inf46"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf47"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf48"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf49"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf50"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf51"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf52"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf53"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf54"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf55"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf56"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. These pairs encompass all the distinct coordinate pairs, up to 4-fold rotational symmetry. Since there was no detectable dependence on the orientation of pairwise or third-order correlations related by rotational symmetry in pilot experiments, measurements along coordinate axes and coordinate planes related by rotation are pooled in <xref ref-type="fig" rid="fig3">Figure 3</xref> and in <xref ref-type="fig" rid="fig3s5 fig3s6 fig3s7 fig3s8">Figure 3—figure supplements 5–8</xref>.</p><p>Stimuli were presented on a mean-gray background, followed by a random mask. The display was an LCD monitor with control signals provided by a Cambridge Research ViSaGe system; mean luminance of 23 cd/m<sup>2</sup> and refresh rate was 100 Hz. The stimulus size was 15° × 15° (check size of 14 min), contrast was 1.0, and viewing distance was 1m. Presentation time was 120 ms.</p></sec><sec id="s4-2-2"><title>Subjects</title><p>Four normal subjects (2 male, 2 female), ages 23 to 54 participated. One subject (MC) was a very experienced observer (several thousand hours); the other three had modest viewing experience (10–100 hr) prior to the experiment. JD and DF were naive to the purposes of the experiment. All subjects had visual acuities (corrected if necessary) of 20/20 or better. For subjects MC and DT, data from all coordinate planes other than the <inline-formula><mml:math id="inf57"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>-plane were previously reported (<xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>; <xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>). For subjects JD and DF, data from the seven pairs of coordinates not containing <italic>α</italic> were previously reported (<xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>).</p></sec><sec id="s4-2-3"><title>Procedure</title><p>Subjects were asked to indicate the position of the target (4-alternative forced choice), by pressing one of four buttons. They were informed that the target was equally likely to appear in any of four locations (top, right, bottom, left), and were shown examples of stimuli of both types (target structured/background random and target random/background structured) prior to the experiment. Subjects were instructed to fixate centrally and not scan the stimulus. During training but not data collection, auditory feedback for incorrect responses was given. After performance stabilized (approx. 3 hrs for a new subject), data collection began. Within blocks, trial order was random. Block order was counterbalanced across subjects.</p></sec><sec id="s4-2-4"><title>Determination of sensitivity</title><p>To summarize the psychophysical performance, we fit Weibull functions to the fraction correct (FC) for each subject and each kind of block (i.e., each pair of coordinates). In the first step of the analysis of each dataset, maximum-likelihood fits were obtained separately for each of its eight rays <italic>r</italic> (the rays consisted of the positive and negative values for the two coordinates, and the four diagonal directions):<disp-formula id="equ1"><label>(0.1)</label><mml:math id="m1"><mml:mrow><mml:mtext>FC</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mi>x</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>4</mml:mn></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>3</mml:mn><mml:mn>4</mml:mn></mml:mfrac><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>/</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>where <italic>x</italic> is the Euclidean distance from the coordinate vector to the origin, <italic>a</italic><sub><italic>r</italic></sub> is the distance at which FC = 0.625 (halfway between chance and perfect), and <inline-formula><mml:math id="inf58"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is a shape parameter, controlling the slope of the psychophysical curve. Since the shape parameter <italic>b</italic><sub><italic>r</italic></sub> was usually in the range 2.2–2.7 for each pairwise coordinate plane, we then fit the entire dataset within each plane by a set of Weibull functions constrained to share a common exponent <italic>b</italic>, but allowing the parameter <italic>a</italic><sub><italic>r</italic></sub> to vary across rays. For each on-axis ray, we averaged the value of <inline-formula><mml:math id="inf59"><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> obtained from all planes that included the ray (these were mutually consistent (<xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>)) to obtain a final value for the perceptual sensitivity.</p></sec><sec id="s4-2-5"><title>Determination of isodiscrimination ellipsoids</title><p>To determine the isodiscrimination ellipsoids, we first parameterized them by a quadratic <inline-formula><mml:math id="inf60"><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi>c</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, where <italic>c</italic><sub><italic>i</italic></sub> and <italic>c</italic><sub><italic>j</italic></sub> each represent one of the local coordinates <inline-formula><mml:math id="inf61"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <italic>Q</italic><sub><italic>ij</italic></sub> is the symmetric matrix for which criterion performance (FC = 0.625) is reached at <inline-formula><mml:math id="inf62"><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi>c</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>. The values of Q<sub>ij</sub> were obtained by minimizing:<disp-formula id="equ2"><label>(0.2)</label><mml:math id="m2"><mml:mrow><mml:mi>F</mml:mi><mml:mo>=</mml:mo><mml:munder><mml:mstyle displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mi>r</mml:mi></mml:munder><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:munder><mml:mstyle displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:munder><mml:mtext> </mml:mtext><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>where <italic>T</italic><sub><italic>r</italic></sub> is the texture along the ray <italic>r</italic> at which criterion performance is reached (i.e., the texture at a distance <italic>a</italic><sub><italic>r</italic></sub> from the origin, where <italic>a</italic><sub>r</sub> is the sensitivity along the ray <italic>r</italic>, as determined above), and <inline-formula><mml:math id="inf63"><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>T</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the value of the <italic>i</italic>th coordinate for the texture <italic>T</italic><sub><italic>r</italic></sub>. This minimization is a linear least-squares procedure in the <italic>Q</italic><sub><italic>ij</italic></sub>. Deviation of the fitted values of <inline-formula><mml:math id="inf64"><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> from unity, which corresponds to deviation of the fitted ellipsoidal surface from the measured points of criterion performance, ranged from 7–10% (root-mean-squared) across subjects. The ellipses shown in <xref ref-type="fig" rid="fig3">Figure 3B</xref>, <xref ref-type="fig" rid="fig3s6">Figure 3—figure supplement 6</xref>, and <xref ref-type="fig" rid="fig3s7">Figure 3—figure supplement 7</xref> correspond to loci at which <inline-formula><mml:math id="inf65"><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi>c</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is constant, and the eigenvectors described in <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplements 3</xref> and <xref ref-type="fig" rid="fig3s8">8</xref> are the eigenvectors of <italic>Q</italic>.</p></sec></sec><sec id="s4-3"><title>Analysis of image statistics in pairwise coordinate planes</title><p>In pairwise coordinate planes, our hypothesis predicts that the inverse covariance matrix, or precision matrix, matches human isodiscrimination contours. A precision matrix is represented by the contour lines of its inverse (the covariance matrix <italic>M</italic>); these are the points <italic>(x, y)</italic> at which <inline-formula><mml:math id="inf66"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mi>x</mml:mi><mml:mi>y</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>y</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo></mml:mrow></mml:math></inline-formula> constant. A short distance of this contour from the origin thus indicates a large value of <italic>M</italic> and a small value of the precision matrix. This in turn denotes a direction in which prior knowledge of the image statistic is imprecise.</p><p><xref ref-type="fig" rid="fig3">Figure 3B</xref> shows a correspondence between contours of the precision matrix (extracted from natural images) and human isodiscrimination contours. This is shown again here in <xref ref-type="fig" rid="fig3s2">Figure 3—figure supplement 2A</xref> for subject-specific (lower half grid) and subject-averaged (upper half grid) isodiscrimination contours. This correspondence can be made quantitative by computing the angular tilt (<xref ref-type="fig" rid="fig3s2">Figure 3—figure supplement 2B</xref>) and eccentricity (<xref ref-type="fig" rid="fig3s2">Figure 3—figure supplement 2C</xref>) of each ellipse. Across all 36 pairwise coordinate planes, we find a detailed quantitative match between the shape and orientation of precision matrix contours and human isodiscrimination contours.</p></sec><sec id="s4-4"><title>Analysis of the full 9-dimensional distribution of image statistics</title><sec id="s4-4-1"><title>Principal component decomposition</title><p>Here, we verify our hypothesis within the full 9-dimensional space of image statistics using an approach that does not single out coordinate axes, either individually or in pairs. Just as the projections of the natural image distribution can be fit by a bivariate Gaussian in each coordinate plane, the entire distribution can be fit by a multivariate Gaussian in the full 9-dimensional space. Similarly, the full set of perceptual isodiscrimination contours can be fit by a single 9-dimensional ellipsoid. Our hypothesis predicts that these two 9-dimensional ellipsoids have the corresponding shape and orientation.</p><p>To test this, we compare the principal axes <inline-formula><mml:math id="inf67"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of variation in natural scenes with the principal axes <inline-formula><mml:math id="inf68"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of human sensitivity inferred from the ellipsoidal isodiscrimination surface (<xref ref-type="bibr" rid="bib59">Victor et al., 2013</xref>). To aid in this comparison, we first align the two sets of principal axes based on eigenvalue rank and symmetry considerations (discussed below). We then compute the fractional contribution <italic>f</italic> of sets of coordinates to each principal axis <inline-formula><mml:math id="inf69"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, therein grouping coordinates with similar ranges of variation. <xref ref-type="fig" rid="fig3s3">Figures 3—figure supplement 3A–D</xref> respectively show the fractional contributions <inline-formula><mml:math id="inf70"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf71"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf72"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf73"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mi>α</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> to <inline-formula><mml:math id="inf74"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> (blue bars) vs <inline-formula><mml:math id="inf75"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> (red bars).</p><p>We find that the principal axes of variation in natural scenes match the principal axes of human sensitivity. As observed in <xref ref-type="fig" rid="fig3">Figure 3</xref>, the correspondence is within the range of variability observed across image analyses and human subjects.</p><p>We quantify the overlap between each image analysis and the set of psychophysical analyses by computing the scalar product between each principal component vector <inline-formula><mml:math id="inf76"><mml:mrow><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> extracted from natural images and the corresponding subject-averaged psychophysical vector <inline-formula><mml:math id="inf77"><mml:mrow><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="inf78"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>θ</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf79"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mi>α</mml:mi></mml:msub><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. This overlap, averaged across principal components, ranges from 0.991 to 0.996 across image analyses and is consistently larger than the overlap measured under null hypotheses in which patch labels and coordinate labels are independently shuffled (<italic>p</italic> ≤.0004 for each image analysis under both hypotheses; see Appendix 4 for details).</p></sec><sec id="s4-4-2"><title>Alignment of principal components</title><p>As described in the previous subsection, we use principal component analysis for the multivariate comparison of natural image statistics and perceptual sensitivities. In addition to the standard approach of ordering components by percentage of variance explained within each dataset, followed by comparing components of corresponding rank, we use an additional tool: the symmetries in the definitions of the image statistic coordinates. As detailed below, we use these symmetries to group principal components into symmetry classes, and we then rank-order the components within each class. By matching components based on both symmetry and rank order of explained variance, we avoid ambiguities that would otherwise occur if only explained variance was considered. The four symmetry classes are defined as follows:</p><p>1. 4-D subspace in which statistics are invariant under 90<sup>°</sup> rotations in the plane (here, designated ‘SYM’). This is spanned by:<list list-type="simple"><list-item><p>(i) <inline-formula><mml:math id="inf80"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf81"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0,0,0,0,0,0,0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item><list-item><p>(ii) <inline-formula><mml:math id="inf82"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf83"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0,0,0,0,0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item><list-item><p>(iii) <inline-formula><mml:math id="inf84"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf85"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0,0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item><list-item><p>(iv) <inline-formula><mml:math id="inf86"><mml:mrow><mml:mi>α</mml:mi><mml:mo>≠</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>, all else 0 ([0,0,0,0,0,0,0,0,1])</p></list-item></list></p><p>2. 2-D subspace in which coordinate values are negated after a horizontal or vertical mirror (here, designated ‘HVI’). This is spanned by:<list list-type="simple"><list-item><p>(i) <inline-formula><mml:math id="inf87"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf88"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0,0,0,0,0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item><list-item><p>(ii) <inline-formula><mml:math id="inf89"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf90"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0,0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item></list></p><p>3. 2-D subspace spanned by two vectors <inline-formula><mml:math id="inf91"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf92"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> for which a 90<sup>°</sup> rotation transforms <inline-formula><mml:math id="inf93"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> to <inline-formula><mml:math id="inf94"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf95"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> to <inline-formula><mml:math id="inf96"><mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> (here, designated ‘ROT’). This is spanned by:<list list-type="simple"><list-item><p>(i) <inline-formula><mml:math id="inf97"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf98"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0,0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0,0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item><list-item><p>(ii) <inline-formula><mml:math id="inf99"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf100"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mn>0,0,0,0,0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item></list></p><p>4. 1-D subspace in which a diagonal mirror negates coordinates (here, designated “DII”). This is spanned by:<list list-type="simple"><list-item><p>(i) <inline-formula><mml:math id="inf101"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, all else 0 <inline-formula><mml:math id="inf102"><mml:mrow><mml:mo>(</mml:mo><mml:mo>[</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mn>0,0,0,0,0,0,0</mml:mn><mml:mo>]</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item></list></p><p>We compute the normalized principal axes <inline-formula><mml:math id="inf103"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of variability in natural image statistics and principal axes <inline-formula><mml:math id="inf104"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> of human perceptual sensitivity. We then assign each set of components to the above symmetry classes by maximizing the total overlap between <inline-formula><mml:math id="inf105"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and the above classes. This is accomplished by computing the size of the projection of each individual component <inline-formula><mml:math id="inf106"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> into each of the above subspaces, and then assigning the component into the subspace that contains the largest projection. In one case where two components with nearly degenerate eigenvalues could not clearly be assigned to symmetry classes (analysis <italic>N</italic> = 20, <italic>R</italic> = 32 in the PIDB, shown in <xref ref-type="fig" rid="fig3s8">Figure 3—figure supplement 8A–D</xref> below), we force symmetry by performing a 45<sup>°</sup> rotation in the plane spanned by the degenerate components.</p><p>Once all components have been assigned to symmetry classes, we rank-order components within each class. This resulted in unambiguous pairing between natural image dataset and psychophysics in all but one pair of components in three image analyses (out of a total of 9 components for each of 31 separate image analyses). In those analyses (image analyses <italic>N</italic> = 2, <italic>R</italic> = 48, 64, 128 in the van Hateren database), there were two nearly-degenerate SYM components in the image dataset; we paired these components with the psychophysics data by maximizing their overlap.</p><p>To compare between natural image and psychophysics analyses, we compute the fractional contribution <inline-formula><mml:math id="inf107"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>[</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>θ</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>α</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> of sets of coordinates to each principal component, where the components of <inline-formula><mml:math id="inf108"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> are given by:<disp-formula id="equ3"><label>(0.3)</label><mml:math id="m3"><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></disp-formula><disp-formula id="equ4"><label>(0.4)</label><mml:math id="m4"><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></disp-formula><disp-formula id="equ5"><label>(0.5)</label><mml:math id="m5"><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mi>θ</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></disp-formula><disp-formula id="equ6"><label>(0.6)</label><mml:math id="m6"><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mi>α</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mi>ξ</mml:mi><mml:mi>α</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></disp-formula>and <inline-formula><mml:math id="inf109"><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>θ</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>α</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> for each normalized component <inline-formula><mml:math id="inf110"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p><p>The principal components shown in <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplement 3</xref> are rank-ordered within each symmetry class, where the four classes were ordered as follows: SYM (<inline-formula><mml:math id="inf111"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>−</mml:mo><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>4</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>), HVI (<inline-formula><mml:math id="inf112"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>5</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf113"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>6</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>), ROT (<inline-formula><mml:math id="inf114"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>7</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf115"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>8</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>), DII (<inline-formula><mml:math id="inf116"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>9</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>). Note that while the comparisons between psychophysics and natural images are based on the squares of the principal components coordinates (<xref ref-type="disp-formula" rid="equ3 equ4 equ5 equ6">equations 0.3–0.6</xref>) and is insensitive to their signs, the classification of principal components by symmetry classes guarantees that we are only comparing psychophysical and natural-image components for which the signs within each coordinate set (<inline-formula><mml:math id="inf117"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf118"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf119"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>) covary in the same fashion.</p></sec></sec><sec id="s4-5"><title>Permutation tests</title><p>Our results, shown in <xref ref-type="fig" rid="fig3">Figure 3</xref> for single coordinates and pairwise coordinate planes, and extended to the full 9-dimensional distribution in <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplement 3</xref>, show a consistent match between the variation in natural image statistics and psychophysical sensitivities. We quantify this match by first assigning vectors to the quantities shown in <xref ref-type="fig" rid="fig3">Figure 3</xref> and <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplement 3</xref>, and then computing the overlap between natural image vectors and the corresponding psychophysical vectors. We consider the following vector quantities:<list list-type="order"><list-item><p>Single coordinates: We describe the range of variation in natural image statistics by the normalized 9-component vector of standard deviations <inline-formula><mml:math id="inf120"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="inf121"><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mover accent="true"><mml:mi>v</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the L2 norm <inline-formula><mml:math id="inf122"><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:msubsup><mml:mrow><mml:msup><mml:mstyle displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mtext>​</mml:mtext></mml:msup></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:msubsup><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> of a vector <inline-formula><mml:math id="inf123"><mml:mrow><mml:mover accent="true"><mml:mi>v</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>. Similarly, we describe the set of perceptual sensitivities by the normalized vector <inline-formula><mml:math id="inf124"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>s</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>s</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. In both cases, the vector components are measured with respect to the coordinates <inline-formula><mml:math id="inf125"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>.</p></list-item><list-item><p>Pairwise coordinate planes: We describe each ellipse by the unit vector <inline-formula><mml:math id="inf126"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> that is a combined measure of eccentricity (<italic>∈</italic>) and tilt (<italic>δ</italic>). We define <inline-formula><mml:math id="inf127"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> on one quarter of the unit sphere: <inline-formula><mml:math id="inf128"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi><mml:mtext> cos </mml:mtext><mml:mi>δ</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">x</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>+</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi><mml:mtext> sin </mml:mtext><mml:mi>δ</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>+</mml:mo><mml:mtext>cos </mml:mtext><mml:mi>α</mml:mi><mml:mtext> </mml:mtext><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">z</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="inf129"><mml:mrow><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>=</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf130"><mml:mrow><mml:mtext>cos </mml:mtext><mml:mi>δ</mml:mi></mml:mrow></mml:math></inline-formula> are defined on the interval [0,1] (the second follows from the 180<sup>°</sup> rotational symmetry of ellipses). Note that this definition of <inline-formula><mml:math id="inf131"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> captures the ellipse property that when <inline-formula><mml:math id="inf132"><mml:mrow><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>=</mml:mo><mml:mtext>sin </mml:mtext><mml:mi>α</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> (circular ellipses), <italic>δ</italic> is not defined. See <xref ref-type="fig" rid="fig3s4">Figure 3—figure supplement 4</xref> for a schematic of this representation.</p></list-item><list-item><p>Principal components: We consider two related measures for describing principal components. As shown in <xref ref-type="fig" rid="fig3s3">Figure 3—figure supplement 3</xref>, we describe each principal component <inline-formula><mml:math id="inf133"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> by the normalized vector <inline-formula><mml:math id="inf134"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>/</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, which measures the fractional contribution of sets of statistics to the principal components <inline-formula><mml:math id="inf135"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. For a more detailed comparison, we can similarly describe each principal component by the normalized vector <inline-formula><mml:math id="inf136"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>/</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="inf137"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>[</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>α</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>. This measures the fractional contribution of individual statistics (rather than sets of statistics) to the principal components <inline-formula><mml:math id="inf138"><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mi>ξ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p></list-item></list></p><p>For each vector quantity (<inline-formula><mml:math id="inf139"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf140"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf141"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf142"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>), we compute the scalar product between a given image analysis vector and the subject-averaged psychophysical vector. We then report the overlap values (scalar products) measured for the six image analyses considered <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> (<italic>N</italic> = 2, 4 and <italic>R</italic> = 32, 48, 64). In computing the scalar product between <inline-formula><mml:math id="inf143"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf144"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, we report the overlap averaged over all 36 pairwise coordinate planes. Similarly, in computing the overlap between <inline-formula><mml:math id="inf145"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf146"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and between <inline-formula><mml:math id="inf147"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf148"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, we report the overlap averaged over all 9 principal components. Note that, for each vector <inline-formula><mml:math id="inf149"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf150"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf151"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf152"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, the maximum overlap is 1.</p><p>We find that natural image analyses show consistently high overlap with the set of psychophysical results (see <xref ref-type="table" rid="tbl1 tbl2 tbl3">Tables 1–3</xref>). The overlap, as measured across image analyses, ranges from 0.988 to 0.999 for single coordinates (<inline-formula><mml:math id="inf153"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>), from 0.953 to 0.977 for pairwise coordinate planes (<inline-formula><mml:math id="inf154"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>), from 0.987 to 0.993 for fractional principal axes (<inline-formula><mml:math id="inf155"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>), and from 0.829 to 0.917 for the full principal axes (<inline-formula><mml:math id="inf156"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>). We test the significance of this overlap by comparing our results to the following two null models:<list list-type="simple"><list-item><p>1A. Shuffled coordinate labels: sets of coordinates. This model (and model 1b) tests the null hypothesis that the apparent correspondence between image statistic covariances and isodiscrimination contours is chance. We examine the 23 permutations of the sets of coordinates <inline-formula><mml:math id="inf157"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. We apply these permutations to the psychophysical data, as human subjects are equally sensitive to coordinates within each set (<inline-formula><mml:math id="inf158"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf159"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and all <italic>θ</italic>'s). This shuffling creates a new set of subjects whose second-order cardinal, second-order oblique, third-order, and fourth-order coordinate values are randomly permuted (transforming the original vector <inline-formula><mml:math id="inf160"><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>α</mml:mi><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> into, example, the shuffled vector <inline-formula><mml:math id="inf161"><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>). If the correspondence between quantities derived from image analysis and psychophysics is statistically significant, we expect that the shuffled vectors <inline-formula><mml:math id="inf162"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf163"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf164"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf165"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> will show less overlap with the image analysis vectors than do the original psychophysical vectors (note that the limited number of permutations restricts the minimum <italic>p</italic>-value to be 0.04).</p></list-item><list-item><p>1B. Shuffled coordinate labels: individual coordinates. Here, we expand the test described in 1a to randomly shuffle the full set of coordinate labels <inline-formula><mml:math id="inf166"><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> <inline-formula><mml:math id="inf167"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. In an analogous manner to that described in 2A, we expect that the shuffled vectors <inline-formula><mml:math id="inf168"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf169"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf170"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf171"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> will show less overlap with the image analysis vectors than do the original psychophysical vectors if the correspondence between quantities derived from image analysis and psychophysics is statistically significant.</p></list-item><list-item><p>2. Shuffled patch labels. This model tests the null hypothesis that the apparent covariances in image statistics are due to chance. For each coordinate, we randomly shuffle image patch labels. This shuffling creates a new set of null patches whose second-, third-, and fourth-order coordinate values are randomly drawn from a subset of the original image patches (e.g. a given null patch can be described by a <inline-formula><mml:math id="inf172"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>-value measured from patch <italic>m</italic> but an <italic>α</italic> value measured from patch <italic>n</italic>). This shuffling destroys correlations between coordinate values measured within individual patches. Note that this shuffling does not alter the range of variation measured along single coordinate axes and will therefore not alter the values of precision matrix ellipses measured along coordinate axes. As a result, this test is not applicable to <inline-formula><mml:math id="inf173"><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, which measures natural image variation and human sensitivities along individual coordinate axes. However, shuffling will destroy correlations along oblique directions in coordinate planes, thereby aligning each ellipse along a single coordinate axis. Note that the eccentricity of each ellipse (in, e.g., the A-B plane) is then trivially related to the ratio of variances <inline-formula><mml:math id="inf174"><mml:mrow><mml:msup><mml:mi>σ</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> measured along the corresponding coordinate axes: <inline-formula><mml:math id="inf175"><mml:mrow><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msubsup><mml:mi>σ</mml:mi><mml:mi>A</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>/</mml:mo><mml:msubsup><mml:mi>σ</mml:mi><mml:mi>B</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>. We therefore expect that this shuffling will most strongly affect the tilt and eccentricity in pairwise planes in which ellipses are oriented along oblique directions (<inline-formula><mml:math id="inf176"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf177"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf178"><mml:mrow><mml:mi>θ</mml:mi><mml:mi>θ</mml:mi></mml:mrow></mml:math></inline-formula> planes). Finally, in destroying correlations between pairs of coordinates, this shuffling creates a diagonal covariance matrix, such that principal components are aligned with single coordinate axes. If the correspondence between quantities derived from image analysis and psychophysics is statistically significant, we expect that the shuffled vectors <inline-formula><mml:math id="inf179"><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="inf180"><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="inf181"><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> will show less overlap with the psychophysical vectors than do the original image analysis vectors.</p></list-item></list></p><p>Each null model is constructed by randomly selecting permuted indices that independently shuffle coordinate labels for subject-averaged psychophysical data (Null Model 1) and independently shuffle image patch labels for a given statistic (Null Model 2). For null model 1a, we perform the full set of 23 non-identity permutations. For models 1B and 2, we perform 10,000 permutations.</p><p>For each permutation, we compute a set of shuffled vectors <inline-formula><mml:math id="inf182"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and we measure the overlap (defined as the scalar product <inline-formula><mml:math id="inf183"><mml:mrow><mml:msub><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mtext>*</mml:mtext><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">→</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mtext>*</mml:mtext><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">→</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>) between each shuffled vector and the corresponding subject-averaged psychophysical vector. Note that, when assigning shuffled principal components to symmetry classes, no hand-tuning was performed. However, as described previously, such hand-tuning was only applied to a very small fraction of components for select image analyses.</p><p>When repeated for many permutations, this procedure yields a distribution of shuffled overlap values against which we measure the significance of the true (observed) overlap. Significance values (p-values) are estimated by computing the fraction of permutations for which the shuffled overlap exceeds the true overlap.</p><p>We find that the original image analyses show significantly higher overlap with psychophysical data than do the analyses produced by either of the null models. Results are significant for each measure of overlap and for each of the six analyses presented in <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> (<italic>p</italic> <0.0005, or as small as possible given the number of possible permutations, in all cases); see <xref ref-type="table" rid="tbl1 tbl2 tbl3">Tables 1–3</xref> for full results.<table-wrap id="tbl1" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.023</object-id><label>Table 1.</label><caption><p>Permutation tests for null model 1a: shuffled coordinate labels</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.023">http://dx.doi.org/10.7554/eLife.03722.023</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th rowspan="2">Measures of overlap</th><th colspan="2" rowspan="2">Image analysis</th><th rowspan="2">Observed overlap</th><th colspan="4">Shuffled overlap Values</th><th rowspan="2">Significance</th></tr><tr><th>Mean</th><th>std</th><th>min</th><th>max</th></tr></thead><tbody><tr><td rowspan="6">Range/Sensitivity <inline-formula><mml:math id="inf316"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>s</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.999</td><td>0.859</td><td>0.9 × 10<sup>−1</sup></td><td>0.704</td><td>0.983</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.993</td><td>0.832</td><td>1.1 × 10<sup>−1</sup></td><td>0.651</td><td>0.978</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.987</td><td>0.809</td><td>1.1 × 10<sup>−1</sup></td><td>0.614</td><td>0.974</td><td><0.04</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.998</td><td>0.825</td><td>1.1 × 10<sup>−1</sup></td><td>0.638</td><td>0.969</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.994</td><td>0.812</td><td>1.1 × 10<sup>−1</sup></td><td>0.646</td><td>0.990</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.991</td><td>0.794</td><td>1.1 × 10<sup>−1</sup></td><td>0.617</td><td>0.985</td><td><0.04</td></tr><tr><td rowspan="6">Inverse Range/Threshold <inline-formula><mml:math id="inf317"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.971</td><td>0.709</td><td>1.5 × 10<sup>−1</sup></td><td>0.508</td><td>0.924</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.969</td><td>0.692</td><td>1.6 × 10<sup>−1</sup></td><td>0.469</td><td>0.924</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.953</td><td>0.685</td><td>1.7 × 10<sup>−1</sup></td><td>0.450</td><td>0.913</td><td><0.04</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.967</td><td>0.679</td><td>1.7 × 10<sup>−1</sup></td><td>0.447</td><td>0.908</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.975</td><td>0.632</td><td>1.5 × 10<sup>−1</sup></td><td>0.400</td><td>0.880</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.977</td><td>0.648</td><td>1.6 × 10<sup>−1</sup></td><td>0.411</td><td>0.894</td><td><0.04</td></tr><tr><td rowspan="6">Fractional Principal Components <inline-formula><mml:math id="inf318"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.994</td><td>0.382</td><td>1.5 × 10<sup>−1</sup></td><td>0.160</td><td>0.657</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.995</td><td>0.485</td><td>1.2 × 10<sup>−1</sup></td><td>0.287</td><td>0.727</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.991</td><td>0.487</td><td>0.7 × 10<sup>−1</sup></td><td>0.372</td><td>0.632</td><td><0.04</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.995</td><td>0.459</td><td>1.4 × 10<sup>−1</sup></td><td>0.238</td><td>0.732</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.996</td><td>0.444</td><td>1.0 × 10<sup>−1</sup></td><td>0.277</td><td>0.601</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.996</td><td>0.450</td><td>1.1 × 10<sup>−1</sup></td><td>0.279</td><td>0.614</td><td><0.04</td></tr><tr><td rowspan="6">Full Principal Components <inline-formula><mml:math id="inf319"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.917</td><td>0.316</td><td>1.3 × 10<sup>−1</sup></td><td>0.123</td><td>0.578</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.828</td><td>0.401</td><td>1.0 × 10<sup>−1</sup></td><td>0.228</td><td>0.611</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.911</td><td>0.363</td><td>0.7 × 10<sup>−1</sup></td><td>0.282</td><td>0.532</td><td><0.04</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.882</td><td>0.376</td><td>1.2 × 10<sup>−1</sup></td><td>0.180</td><td>0.618</td><td><0.04</td></tr><tr><td>R = 48</td><td>0.917</td><td>0.362</td><td>1.0 × 10<sup>−1</sup></td><td>0.201</td><td>0.520</td><td><0.04</td></tr><tr><td>R = 64</td><td>0.919</td><td>0.357</td><td>1.0 × 10<sup>−1</sup></td><td>0.196</td><td>0.522</td><td><0.04</td></tr></tbody></table><table-wrap-foot><fn><p>We separately permute the sets of coordinate labels <inline-formula><mml:math id="inf320"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mo>−</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mo>\</mml:mo><mml:mo>/</mml:mo></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. We apply these permutations to the psychophysical data, therein examining all 23 non-identity permutations of the four labels. This shuffling significantly decreases the overlap between image analyses and psychophysical data. Results are significant across all six analyses considered in <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> (<italic>N</italic> = 2, 4 and <italic>R</italic> = 32, 48, 64). p-values, estimated as the fraction of permutations for which the shuffled overlap exceeds the true overlap, are less than 0.04 (the minimum value given 23 permutations) for each image analysis.</p></fn></table-wrap-foot></table-wrap><table-wrap id="tbl2" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.024</object-id><label>Table 2.</label><caption><p>Permutation Tests for null model 1b: shuffled coordinate labels</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.024">http://dx.doi.org/10.7554/eLife.03722.024</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th rowspan="2">Measures of overlap</th><th colspan="2" rowspan="2">Image analysis</th><th rowspan="2">Observed overlap</th><th colspan="4">Shuffled overlap Values</th><th rowspan="2">Significance</th></tr><tr><th>Mean</th><th>std</th><th>min</th><th>max</th></tr></thead><tbody><tr><td rowspan="6">Range/Sensitivity <inline-formula><mml:math id="inf184"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>s</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.999</td><td>0.806</td><td>6.8 × 10<sup>−2</sup></td><td>0.659</td><td>0.999</td><td>0.0003</td></tr><tr><td>R = 48</td><td>0.993</td><td>0.775</td><td>7.7 × 10<sup>−2</sup></td><td>0.610</td><td>0.993</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.987</td><td>0.762</td><td>8.0 × 10<sup>−2</sup></td><td>0.579</td><td>0.987</td><td><0.0001</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.998</td><td>0.828</td><td>6.0 × 10<sup>−2</sup></td><td>0.707</td><td>0.998</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.994</td><td>0.798</td><td>7.1 × 10<sup>−2</sup></td><td>0.660</td><td>0.994</td><td>0.0002</td></tr><tr><td>R = 64</td><td>0.991</td><td>0.780</td><td>7.6 × 10<sup>−2</sup></td><td>0.630</td><td>0.991</td><td><0.0001</td></tr><tr><td rowspan="6">Inverse Range/Threshold <inline-formula><mml:math id="inf185"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.971</td><td>0.693</td><td>8.1 × 10<sup>−2</sup></td><td>0.499</td><td>0.972</td><td>0.0002</td></tr><tr><td>R = 48</td><td>0.969</td><td>0.682</td><td>8.4 × 10<sup>−2</sup></td><td>0.476</td><td>0.969</td><td>0.0003</td></tr><tr><td>R = 64</td><td>0.953</td><td>0.671</td><td>8.5 × 10<sup>−2</sup></td><td>0.446</td><td>0.954</td><td>0.0002</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.967</td><td>0.696</td><td>7.6 × 10<sup>−2</sup></td><td>0.521</td><td>0.964</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.975</td><td>0.692</td><td>8.0 × 10<sup>−2</sup></td><td>0.509</td><td>0.976</td><td>0.0002</td></tr><tr><td>R = 64</td><td>0.977</td><td>0.689</td><td>8.2 × 10<sup>−2</sup></td><td>0.493</td><td>0.978</td><td>0.0003</td></tr><tr><td rowspan="6">Fractional Principal Components <inline-formula><mml:math id="inf186"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.994</td><td>0.592</td><td>1.2 × 10<sup>−1</sup></td><td>0.271</td><td>0.995</td><td>0.0003</td></tr><tr><td>R = 48</td><td>0.995</td><td>0.604</td><td>1.3 × 10<sup>−1</sup></td><td>0.281</td><td>0.995</td><td>0.0004</td></tr><tr><td>R = 64</td><td>0.991</td><td>0.591</td><td>1.2 × 10<sup>−1</sup></td><td>0.278</td><td>0.991</td><td>0.0003</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.995</td><td>0.590</td><td>1.2 × 10<sup>−1</sup></td><td>0.218</td><td>0.995</td><td>0.0001</td></tr><tr><td>R = 48</td><td>0.996</td><td>0.577</td><td>1.2 × 10<sup>−1</sup></td><td>0.251</td><td>0.996</td><td>0.0002</td></tr><tr><td>R = 64</td><td>0.996</td><td>0.581</td><td>1.2 × 10<sup>−1</sup></td><td>0.266</td><td>0.996</td><td>0.0004</td></tr><tr><td rowspan="6">Full Principal Components <inline-formula><mml:math id="inf187"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.917</td><td>0.391</td><td>1.2 × 10<sup>−1</sup></td><td>0.100</td><td>0.927</td><td>0.0002</td></tr><tr><td>R = 48</td><td>0.828</td><td>0.391</td><td>1.2 × 10<sup>−1</sup></td><td>0.086</td><td>0.856</td><td>0.0008</td></tr><tr><td>R = 64</td><td>0.911</td><td>0.396</td><td>1.2 × 10<sup>−1</sup></td><td>0.120</td><td>0.953</td><td>0.0003</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.882</td><td>0.381</td><td>1.2 × 10<sup>−1</sup></td><td>0.066</td><td>0.989</td><td>0.0003</td></tr><tr><td>R = 48</td><td>0.917</td><td>0.380</td><td>1.2 × 10<sup>−1</sup></td><td>0.090</td><td>0.902</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.919</td><td>0.387</td><td>1.2 × 10<sup>−1</sup></td><td>0.095</td><td>0.937</td><td>0.0004</td></tr></tbody></table><table-wrap-foot><fn><p>We separately permute all nine coordinate labels <inline-formula><mml:math id="inf188"><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> <inline-formula><mml:math id="inf189"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. This shuffling, applied to the psychophysical data, significantly decreases the overlap between image analyses and psychophysical data. Results are significant across all six analyses considered in <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> (<italic>N</italic> = 2, 4 and <italic>R</italic> = 32, 48, 64). p-values, estimated as the fraction of permutations for which the shuffled overlap exceeds the true overlap, are less than 0.0005 for all image analyses.</p></fn></table-wrap-foot></table-wrap><table-wrap id="tbl3" position="float"><object-id pub-id-type="doi">10.7554/eLife.03722.025</object-id><label>Table 3.</label><caption><p>Permutation tests for null model 2: shuffled patch labels</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.03722.025">http://dx.doi.org/10.7554/eLife.03722.025</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th rowspan="2">Comparisons</th><th colspan="2" rowspan="2">Image analysis</th><th rowspan="2">Observed overlap</th><th colspan="4">Shuffled overlap Values</th><th rowspan="2">Significance</th></tr><tr><th>Mean</th><th>std</th><th>min</th><th>max</th></tr></thead><tbody><tr><td rowspan="6">Inverse Range/Threshold <inline-formula><mml:math id="inf190"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.971</td><td>0.924</td><td>0.70 × 10<sup>−3</sup></td><td>0.921</td><td>0.926</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.969</td><td>0.921</td><td>1.1 × 10<sup>−3</sup></td><td>0.917</td><td>0.925</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.953</td><td>0.912</td><td>1.3 × 10<sup>−3</sup></td><td>0.908</td><td>0.917</td><td><0.0001</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.967</td><td>0.919</td><td>1.7 × 10<sup>−3</sup></td><td>0.914</td><td>0.926</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.975</td><td>0.922</td><td>1.9 × 10<sup>−3</sup></td><td>0.916</td><td>0.930</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.977</td><td>0.924</td><td>2.8 × 10<sup>−3</sup></td><td>0.916</td><td>0.935</td><td><0.0001</td></tr><tr><td rowspan="6">Fractional Principal Components <inline-formula><mml:math id="inf191"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.994</td><td>0.806</td><td>9.1 × 10<sup>−6</sup></td><td>0.806</td><td>0.806</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.995</td><td>0.806</td><td>8.3 × 10<sup>−6</sup></td><td>0.806</td><td>0.806</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.991</td><td>0.806</td><td>3.7 × 10<sup>−6</sup></td><td>0.806</td><td>0.806</td><td><0.0001</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.995</td><td>0.807</td><td>2.5 × 10<sup>−4</sup></td><td>0.806</td><td>0.809</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.996</td><td>0.807</td><td>4.1 × 10<sup>−4</sup></td><td>0.806</td><td>0.810</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.996</td><td>0.807</td><td>3.5 × 10<sup>−4</sup></td><td>0.806</td><td>0.810</td><td><0.0001</td></tr><tr><td rowspan="6">Full Principal Components <inline-formula><mml:math id="inf192"><mml:mrow><mml:mo>〈</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>NI</mml:mtext></mml:mrow></mml:msub><mml:mo>⋅</mml:mo><mml:msub><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mtext>PP</mml:mtext></mml:mrow></mml:msub><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula></td><td rowspan="3">N = 2</td><td>R = 32</td><td>0.917</td><td>0.448</td><td>5.8 × 10<sup>−2</sup></td><td>0.406</td><td>0.596</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.828</td><td>0.502</td><td>5.9 × 10<sup>−2</sup></td><td>0.408</td><td>0.675</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.911</td><td>0.458</td><td>4.8 × 10<sup>−2</sup></td><td>0.407</td><td>0.591</td><td><0.0001</td></tr><tr><td rowspan="3">N = 4</td><td>R = 32</td><td>0.881</td><td>0.489</td><td>4.9 × 10<sup>−2</sup></td><td>0.409</td><td>0.638</td><td><0.0001</td></tr><tr><td>R = 48</td><td>0.917</td><td>0.454</td><td>3.0 × 10<sup>−2</sup></td><td>0.408</td><td>0.637</td><td><0.0001</td></tr><tr><td>R = 64</td><td>0.919</td><td>0.492</td><td>4.2 × 10<sup>−2</sup></td><td>0.411</td><td>0.648</td><td><0.0001</td></tr></tbody></table><table-wrap-foot><fn><p>Within each image analyses, we separately permute image patch labels along individual coordinate axes. This shuffling does not alter the range of variation observed along individual coordinates; as a result, this test only applies to <inline-formula><mml:math id="inf193"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>ω</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:math></inline-formula> ,<inline-formula><mml:math id="inf194"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="inf195"><mml:mrow><mml:mrow><mml:mover accent="true"><mml:mi>F</mml:mi><mml:mo>→</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:math></inline-formula>. We find that this shuffling significantly decreases the overlap between image analyses and psychophysical data. Results are significant across all six analyses considered in <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> (<italic>N</italic> = 2, 4 and <italic>R</italic> = 32, 48, 64). p-values, estimated as the fraction of permutations for which the shuffled overlap exceeds the true overlap, are less than 0.0001 for each image analysis.</p></fn></table-wrap-foot></table-wrap></p></sec><sec id="s4-6"><title>Analysis variants for Penn Natural Image Database</title><p>In <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref>, we reported results using image analyses with varying values of the block-average factor <italic>N</italic> (<italic>N</italic> = 2, 4) and patch size <italic>R</italic> (<italic>R</italic> = 32, 48, 64). In <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplement 3</xref>, we show that the relative variation in different image statistics (first shown in <xref ref-type="fig" rid="fig1">Figure 1E</xref>) is not an artifact of our image analysis pipeline, as the pattern of variation is destroyed if white-noise image patches are instead used. In Figures 3–figure supplement 5-3–figure supplement 8, we show that the comparison between natural image and psychophysical analyses is consistent across a wider range of image preprocessing parameters: <italic>N</italic> = 2, 4, 8, 12, 16, 20 and <italic>R</italic> = 32, 48, 64, 80, 128. Note that sampling limitations restrict some combinations of <italic>N</italic> and <italic>R</italic> (e.g. for sufficiently large <italic>N</italic>, we must choose sufficiently small <italic>R</italic> to have a statistically significant number of image patches).</p></sec><sec id="s4-7"><title>Comparison with van Hateren Database</title><p>All analyses reported in Results and shown in <xref ref-type="fig" rid="fig1 fig3">Figures 1 and 3</xref> were performed on a set of images from the UPenn Natural Image Database (<xref ref-type="bibr" rid="bib49">Tkačik et al., 2011</xref>). Here, we extend our analyses to a set of 2300 images from the van Hateren image database (<xref ref-type="bibr" rid="bib52">van Hateren and van der Schaaf, 1998</xref>), using the same set of parameters used to analyze images from the UPenn database, with block-average factors <italic>N</italic> = 2, 4, 8, 12, 16, 20 and patch sizes <italic>R</italic> = 32, 48, 64, 80, 128. Note that we are able to perform a larger number of analyses (specific combinations of <italic>N</italic> and <italic>R</italic>) than was performed using the Penn database, as we have a larger selection of images and therefore do not face the same sampling limitations. Figures 3—figure supplement 5-3–figure supplement 8 confirm that our results are consistent across image databases.</p></sec><sec id="s4-8"><title>Asymmetries in distributions of natural image statistics</title><p>We find systematic asymmetries in the distributions of natural image statistics when examined beyond their second moments. <xref ref-type="fig" rid="fig3s9">Figure 3—figure supplement 9</xref> shows the distributions of single coordinates for the image analysis <italic>N</italic> = 2, <italic>R</italic> = 32. All distributions are shifted toward positive coordinate values, and there is larger variation in positive vs negative coordinate values. We assess this asymmetry in natural image analyses by computing the ratio of the standard deviations measured along positive vs negative coordinate axes. We similarly assess asymmetry in psychophysical analyses by computing the ratio of human sensitivities to positive vs negative deviations of coordinate values. This comparison is shown in <xref ref-type="fig" rid="fig3s9">Figure 3—figure supplement 9</xref>. The mismatch provides potential clues for the neural mechanisms responsible for processing local image statistics (See Discussion).</p></sec><sec id="s4-9"><title>Two regimes of efficient coding</title><p>In this section, we illustrate how two contrasting regimes emerge from the efficient coding principle: (<italic>i</italic>) the well-known transmission-limited regime, in which ‘whitening’ is optimal, and (<italic>ii</italic>) the sampling-limited regime, which is the focus of this paper. To enable exact calculations of optimal behavior, we consider a simplified scenario, in which all signals and noises are Gaussian, and all filters are linear.</p><p>We consider a set of channels dedicated to processing independent signals of varying sizes. The channels, which are indexed by <italic>k</italic>, are abstract and general. For example, each <italic>k</italic> can represent a different spatial or temporal frequency in the input, as in the traditional analysis of visual coding in the periphery. Here, we take the signal on each channel <italic>k</italic> to represent a complex image feature, that is the result of a specific local nonlinear transformation applied to the input image.</p><p><xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref> shows the setup of a single channel dedicated to processing the signal <inline-formula><mml:math id="inf196"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. Sampling noise, which is assumed to be identical for each channel, is added to this signal; without loss of generality, we can take its value to be unity. Note that for the parametrization of local image statistics used here, sampling noise is in fact identical for each parameter at the origin of the parameter space (see Equations B19-B20 in <xref ref-type="bibr" rid="bib57">Victor and Conte, 2012</xref>).</p><p>The result is passed through a linear filter <inline-formula><mml:math id="inf197"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, characterized by a gain <inline-formula><mml:math id="inf198"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula>. The output of <inline-formula><mml:math id="inf199"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> then has intrinsic channel noise added, and the total dynamic range of all channels is constrained. All channels are assumed to have the same intrinsic noise. Again, without loss of generality, we take this value to be unity (as any scale associated with this noise can be absorbed into an overall multiplier for the filters <inline-formula><mml:math id="inf200"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and the constraint on total dynamic range of the channels).</p><p>We seek to find the optimal set of gains <inline-formula><mml:math id="inf201"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> that maximize the mutual information <inline-formula><mml:math id="inf202"><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:math></inline-formula> between the signals <inline-formula><mml:math id="inf203"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and the channel input, subject to a constraint <italic>Q</italic> on total output power. Using a Lagrange multiplier <inline-formula><mml:math id="inf204"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> for the constraint, the problems translates into extremizing <inline-formula><mml:math id="inf205"><mml:mrow><mml:mi>P</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:msub><mml:mo>∑</mml:mo><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mstyle><mml:mo>+</mml:mo><mml:mtext>Λ</mml:mtext><mml:mi>Q</mml:mi></mml:mrow></mml:math></inline-formula> by setting <inline-formula><mml:math id="inf206"><mml:mrow><mml:mo>∂</mml:mo><mml:mi>P</mml:mi><mml:mo>/</mml:mo><mml:mo>∂</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>.</p><p>The solution can be found in Equation 8 of <xref ref-type="bibr" rid="bib53">van Hateren, (1992a)</xref>, noting the following correspondences between the setup of <xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref> and the scenario considered in that paper. Referring to the notation in (<xref ref-type="bibr" rid="bib53">van Hateren, 1992a</xref>), the input and channel noises, <italic>N</italic><sub><italic>p</italic></sub> and <italic>N</italic><sub>c</sub>, respectively correspond here to the sampling and channel noises (both taken to be unity). The prefiltered stimulus power <italic>S</italic><sub><italic>p</italic></sub> corresponds here to signal variance <inline-formula><mml:math id="inf207"><mml:mrow><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>. The power transfer function <italic>p</italic><sub><italic>n</italic></sub> of the neural filter corresponds here to the filter power <inline-formula><mml:math id="inf208"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msup><mml:mo>|</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula>. Finally, the negative Lagrange multiplier <inline-formula><mml:math id="inf209"><mml:mrow><mml:mo>−</mml:mo><mml:mi>λ</mml:mi></mml:mrow></mml:math></inline-formula> corresponds here to the positive Lagrange multiplier <inline-formula><mml:math id="inf210"><mml:mrow><mml:mo>+</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow></mml:math></inline-formula>. With these correspondences, the optimal filter for channel <italic>k</italic> has a gain <inline-formula><mml:math id="inf211"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> given by:<disp-formula id="equ7"><label>(0.7)</label><mml:math id="m7"><mml:mrow><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo>−</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mo>+</mml:mo><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msqrt><mml:mrow><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>4</mml:mn></mml:msubsup><mml:mo>+</mml:mo><mml:mn>4</mml:mn><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>/</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow></mml:msqrt></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>provided that the above quantity is non-negative, and has a gain of zero otherwise. The range of values of <inline-formula><mml:math id="inf212"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for which the above quantity is <inline-formula><mml:math id="inf213"><mml:mrow><mml:mo>≤</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> corresponds to signals that are not worthwhile to code, because the signal-to-noise is too small given the constraint on the channel dynamic range. More specifically, the above quantity is positive (and hence <inline-formula><mml:math id="inf214"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> is nonzero) provided that <inline-formula><mml:math id="inf215"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>></mml:mo><mml:msqrt><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>. Note that this critical value becomes infinite as <inline-formula><mml:math id="inf216"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> approaches one from below, indicating that <inline-formula><mml:math id="inf217"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> near one is the transmission-limited regime. Conversely, the critical value of <inline-formula><mml:math id="inf218"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> approaches zero as <inline-formula><mml:math id="inf219"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> approaches zero from above, indicating that this is the sampling-limited regime. We further discuss these regimes below.</p><sec id="s4-9-1"><title>Transmission-limited regime</title><p>As mentioned, the transmission-limited regime corresponds to the limit of <inline-formula><mml:math id="inf220"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> from below. For signals below the critical level of <inline-formula><mml:math id="inf221"><mml:mrow><mml:msqrt><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>, the optimal gain is zero, and signals are not encoded. For signals that are large compared to this cutoff, the main limitation is output power. In this regime, the optimal gain is inversely proportional to the signal strength (<xref ref-type="fig" rid="fig4s2">Figure 4—figure supplement 2A</xref>), as the asymptotic behavior of <xref ref-type="disp-formula" rid="equ7">Equation 0.7</xref> in the limit of large signal strength <inline-formula><mml:math id="inf222"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is:<disp-formula id="equ8"><label>(0.8)</label><mml:math id="m8"><mml:mrow><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>∼</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mtext>Λ</mml:mtext><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula></p><p>This is the classic ‘whitening’ regime, namely small signals are enhanced so that output power is equalized across channels: <inline-formula><mml:math id="inf223"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:mtext> </mml:mtext><mml:mo>∼</mml:mo><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for large <inline-formula><mml:math id="inf224"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>.</p><p>Note that when <inline-formula><mml:math id="inf225"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> is close to 1, there is an abrupt transition between signals that are encoded in inverse proportion to their size, and signals that are too small to be encoded at all (<xref ref-type="fig" rid="fig4s2">Figure 4—figure supplement 2A</xref>).</p></sec><sec id="s4-9-2"><title>Sampling-limited regime</title><p>When <inline-formula><mml:math id="inf226"><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>→</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> from above, the transition between signals that are not encoded at all, and signals that are encoded in inverse proportion to their size, undergoes a broadening. This results in a regime in which the optimal gain <italic>increases</italic> with signal strength (<xref ref-type="fig" rid="fig4s2">Figure 4—figure supplement 2B</xref>). This regime covers signals that are only modestly above the critical level of <inline-formula><mml:math id="inf227"><mml:mrow><mml:msqrt><mml:mrow><mml:mtext>Λ</mml:mtext><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:mtext>Λ</mml:mtext></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>, that is signals for which sampling noise (rather than output capacity) is the dominant constraint. The extent of this regime increases as the relative importance of the output constraint <inline-formula><mml:math id="inf228"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula> decreases toward 0.</p><p>We determine the limiting dependence of <inline-formula><mml:math id="inf229"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> on <inline-formula><mml:math id="inf230"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> from the asymptotic behavior of <xref ref-type="disp-formula" rid="equ7">Equation 0.7</xref> in the limit of small <inline-formula><mml:math id="inf231"><mml:mtext>Λ</mml:mtext></mml:math></inline-formula>:<disp-formula id="equ9"><label>(0.9)</label><mml:math id="m9"><mml:mrow><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>∼</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:mfrac><mml:msqrt><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>Λ</mml:mtext></mml:mfrac></mml:mrow></mml:msqrt></mml:mrow></mml:math></disp-formula></p><p>For signals that are small compared to the sampling noise (<inline-formula><mml:math id="inf232"><mml:mrow><mml:msqrt><mml:mtext>Λ</mml:mtext></mml:msqrt><mml:mo><</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo><</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>), the optimal filter is proportional to the square root of the signal strength, <inline-formula><mml:math id="inf233"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:mtext> </mml:mtext><mml:mo>∼</mml:mo><mml:msubsup><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:msup><mml:mtext>Λ</mml:mtext><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p></sec></sec><sec id="s4-10"><title>Correspondence with perceptual sensitivity to local image statistics</title><p>We interpret the gain <inline-formula><mml:math id="inf234"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> as representing the amount of resources devoted to a given signal <inline-formula><mml:math id="inf235"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. Since it is a direct measure of signal-to-noise for a unit-size input, it therefore corresponds to perceptual sensitivity.</p><p>In the psychophysical experiments here, we measure sensitivity for each of the image statistic coordinates <inline-formula><mml:math id="inf236"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, using a highly artificial set of stimuli. As predicted from the sampling-limited regime, we find that gains <inline-formula><mml:math id="inf237"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></inline-formula> are larger for the channels in which the natural environment provides larger values of the signal <inline-formula><mml:math id="inf238"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>.</p><p>While this analysis provides a rigorous identification of a regime in which gain increases with signal strength, we caution that it is an asymptotic analysis of a simplified model of feature coding. It therefore stops short of making the quantitative prediction that gain (sensitivity) is proportional to the square root of the signal strength of each image statistic.</p><p>On the other hand, the analysis does translate into a quantitative prediction about perceptual axes (i.e., about the orientations of the isodiscrimination contours). As shown in <xref ref-type="fig" rid="fig3">Figure 3</xref> (blue contours), the image statistic coordinates <inline-formula><mml:math id="inf239"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mo>|</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>−</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>/</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mo>\</mml:mo></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌜</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌝</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌟</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi mathvariant="normal">⌞</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>α</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> have substantial covariances. A rotation of the coordinates will thus yield a new set of coordinates with zero covariance and independent sampling errors. If these new coordinates are independently coded, then the perceptual axes will share the same axes as the image statistics which is what we find (<xref ref-type="fig" rid="fig3">Figure 3B</xref>).</p><sec id="s4-10-1"><title>Numerical optimization in two dimensions</title><p>Here, we numerically show that in the 2-dimensional case, the axes of the optimal encoder will be aligned with the principal axes of the input statistics. As shown in <xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref>, the response <bold>r</bold> is given by:<disp-formula id="equ10"><label>(0.10)</label><mml:math id="m10"><mml:mrow><mml:mi mathvariant="bold">r</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">L</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold">s</mml:mi><mml:mo>+</mml:mo><mml:mi>ξ</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>η</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>where <italic>ξ</italic> is the sampling noise, <italic>η</italic> is the intrinsic channel noise, and <bold>s</bold> is a <italic>d</italic>-dimensional signal from natural scenes (each dimension corresponds to one of our image statistic coordinates; for simplicity, let <italic>d =</italic> 2, that is, we examine one pairwise plane). <bold>L</bold> is the linear transformation that we are looking for: this is essentially a ‘gain’ plus ‘rotation’ transformation. The axes of perceptual isodiscrimination contours should then be given by the eigenvalues of <inline-formula><mml:math id="inf240"><mml:mrow><mml:mi mathvariant="bold">L</mml:mi><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula>. The covariance of the stimuli is <inline-formula><mml:math id="inf241"><mml:mrow><mml:mi mathvariant="bold">S</mml:mi><mml:mo>=</mml:mo><mml:mo>〈</mml:mo><mml:mi mathvariant="bold">s</mml:mi><mml:msup><mml:mi mathvariant="bold">s</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>〉</mml:mo></mml:mrow></mml:math></inline-formula>. Noise is assumed IID, given by <inline-formula><mml:math id="inf242"><mml:mrow><mml:mo>〈</mml:mo><mml:mi>ξ</mml:mi><mml:msup><mml:mi>ξ</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>〉</mml:mo><mml:mo>=</mml:mo><mml:mtext>Ξ</mml:mtext><mml:mi mathvariant="bold">I</mml:mi></mml:mrow></mml:math></inline-formula> at the input and <inline-formula><mml:math id="inf243"><mml:mrow><mml:mo>〈</mml:mo><mml:mi>η</mml:mi><mml:msup><mml:mi>η</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>〉</mml:mo><mml:mo>=</mml:mo><mml:mtext>Σ</mml:mtext><mml:mi mathvariant="bold">I</mml:mi></mml:mrow></mml:math></inline-formula> at the output, where <bold>I</bold> is a 2 × 2 identity matrix and <inline-formula><mml:math id="inf244"><mml:mtext>Ξ</mml:mtext></mml:math></inline-formula> and <inline-formula><mml:math id="inf245"><mml:mtext>Σ</mml:mtext></mml:math></inline-formula> are noise magnitudes. With this notation, the total noise covariance matrix of the output is given by:<disp-formula id="equ11"><label>(0.11)</label><mml:math id="m11"><mml:mrow><mml:mi mathvariant="bold">N</mml:mi><mml:mo>=</mml:mo><mml:mtext>Σ</mml:mtext><mml:mi mathvariant="bold">I</mml:mi><mml:mo>+</mml:mo><mml:mtext>Ξ</mml:mtext><mml:mi mathvariant="bold">L</mml:mi><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula></p><p>The total variance at the output is:<disp-formula id="equ12"><label>(0.12)</label><mml:math id="m12"><mml:mrow><mml:msup><mml:mi mathvariant="bold">r</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mi>d</mml:mi><mml:mtext>Σ</mml:mtext><mml:mo>+</mml:mo><mml:mtext>Ξ Tr </mml:mtext><mml:mi mathvariant="bold">L</mml:mi><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>+</mml:mo><mml:mtext>Tr </mml:mtext><mml:mi mathvariant="bold">L</mml:mi><mml:mi mathvariant="bold">S</mml:mi><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula></p><p>By analogy to the van Hateren derivation, we fix the output power. Without loss of generality, we choose its value to be unity, which sets the unit for all power measures in the system. The information for a Gaussian multivariate channel in a standard form, <inline-formula><mml:math id="inf246"><mml:mrow><mml:mi mathvariant="bold">r</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">L</mml:mi><mml:mo>′</mml:mo><mml:mi mathvariant="bold">s</mml:mi><mml:mo>+</mml:mo><mml:mi>η</mml:mi></mml:mrow></mml:math></inline-formula> is:<disp-formula id="equ13"><label>(0.13)</label><mml:math id="m13"><mml:mrow><mml:mi>I</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mtext>log det</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold">I</mml:mi><mml:mo>+</mml:mo><mml:msup><mml:mi mathvariant="bold">S</mml:mi><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:msup><mml:mo>′</mml:mo><mml:mi>T</mml:mi></mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>′</mml:mo><mml:msup><mml:mi mathvariant="bold">S</mml:mi><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>but this is only valid when the noise <italic>η</italic> is IID unit variance. In the present study, this is not the case: first, the noise, <bold>N</bold>, is correlated in the two channels, because the sampling noise is mixed by <bold>L</bold>; second, the variances are not the same in the two channels. We can, however, make a change of variables, <bold>r</bold>′ = <bold>Or</bold>, such that the noise for the new output <bold>r</bold>′ is IID unit variance. To do this, we decompose <inline-formula><mml:math id="inf247"><mml:mrow><mml:mi mathvariant="bold">N</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">V</mml:mi><mml:mi mathvariant="bold">D</mml:mi><mml:msup><mml:mi mathvariant="bold">V</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> into its eigensystem, make <inline-formula><mml:math id="inf248"><mml:mrow><mml:mi mathvariant="bold">O</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi mathvariant="bold">D</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:msup><mml:mi mathvariant="bold">V</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula>, and identify <inline-formula><mml:math id="inf249"><mml:mrow><mml:mi mathvariant="bold">L</mml:mi><mml:mo>′</mml:mo><mml:mo>=</mml:mo><mml:mi mathvariant="bold">O</mml:mi><mml:mi mathvariant="bold">L</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi mathvariant="bold">D</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:msup><mml:mi mathvariant="bold">V</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mi mathvariant="bold">L</mml:mi></mml:mrow></mml:math></inline-formula>, so that we can use the standard result given in <xref ref-type="disp-formula" rid="equ13">Equation (0.13)</xref>. The optimal linear filter is given by:<disp-formula id="equ14"><label>(0.14)</label><mml:math id="m14"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mtext>*</mml:mtext></mml:msup><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mtext>argmax</mml:mtext></mml:mrow><mml:mrow><mml:mi mathvariant="bold">L</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi mathvariant="bold">r</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mtext> </mml:mtext><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mtext>log det</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold">I</mml:mi><mml:mo>+</mml:mo><mml:msup><mml:mi mathvariant="bold">S</mml:mi><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:msup><mml:mo>′</mml:mo><mml:mi>T</mml:mi></mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>′</mml:mo><mml:msup><mml:mi mathvariant="bold">S</mml:mi><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula></p><p>Since the output power is limited to 1 and channel noise <inline-formula><mml:math id="inf250"><mml:mtext>Σ</mml:mtext></mml:math></inline-formula> feeds directly into the output power, there is no solution for <bold>L</bold> for <inline-formula><mml:math id="inf251"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>></mml:mo><mml:mn>0.5</mml:mn></mml:mrow></mml:math></inline-formula> (since <italic>d</italic> = 2 and <inline-formula><mml:math id="inf252"><mml:mtext>Σ</mml:mtext></mml:math></inline-formula> is the noise in each of the channels, the total output power is taken up by channel noise at <inline-formula><mml:math id="inf253"><mml:mrow><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0.5</mml:mn></mml:mrow></mml:math></inline-formula>). The magnitude of the sampling noise can be unbounded, since one can always select the gain in <bold>L</bold> to be low enough so that the constraint on total output power is satisfied. Because the gain rescales the input, we can fix the total power of the input signal (the trace of <bold>S</bold>) to be unity. With this choice, the remaining parameters of the problem are the magnitude of the channel noise (<inline-formula><mml:math id="inf254"><mml:mtext>Σ</mml:mtext></mml:math></inline-formula>) and the magnitude of the sampling noise relative to the input power (i.e. 1/SNR at the input).</p><p>Given these two parameters that determine the sampling and channel noise magnitudes, we generate input signal covariances <bold>S</bold> with total power of unity but with randomly selected ‘tilts’ (angles of the leading eigenvector of <bold>S</bold> measured relative to the horizontal) and ‘eccentricities’ (<inline-formula><mml:math id="inf255"><mml:mrow><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msubsup><mml:mi>g</mml:mi><mml:mrow><mml:mtext>min</mml:mtext></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>/</mml:mo><mml:msubsup><mml:mi>g</mml:mi><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:msqrt></mml:mrow></mml:math></inline-formula>, where <italic>g</italic> are the eigenvalues of <bold>S</bold>); these quantities can be directly estimated from natural scenes. We then use constrained optimization to numerically identify the optimal transformation <inline-formula><mml:math id="inf256"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>∗</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula>. For each such solution for <inline-formula><mml:math id="inf257"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>∗</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula>, we compute the eigensystem of <inline-formula><mml:math id="inf258"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>∗</mml:mo></mml:msup><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mrow><mml:mo>∗</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, extract its eccentricity and tilt as describe above, and compare these values to the eccentricity and tilt of the input signal.</p><p>We identify the following efficient coding regimes that depend the total noise and on the relative magnitudes of sampling and channel noises (<xref ref-type="fig" rid="fig4s3">Figure 4—figure supplement 3</xref>):</p></sec></sec><sec id="s4-11"><title>Transmission-limited regime (total noise <0.5)</title><p><inline-formula><mml:math id="inf259"><mml:mrow><mml:mn>0</mml:mn><mml:mo>≤</mml:mo><mml:mtext>Ξ</mml:mtext><mml:mo>≪</mml:mo><mml:mtext>Σ</mml:mtext></mml:mrow></mml:math></inline-formula> (dominating channel noise). The optimal strategy is decorrelation by whitening (<xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4A</xref>); the tilt of the filter relative to the signal is <inline-formula><mml:math id="inf260"><mml:mrow><mml:mi>π</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula>, and the eccentricities are equal (i.e., the small eigenvalue of <inline-formula><mml:math id="inf261"><mml:mrow><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mo>∗</mml:mo></mml:msup><mml:msup><mml:mi mathvariant="bold">L</mml:mi><mml:mrow><mml:mo>∗</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> is proportional to the inverse of the large eigenvalue of <bold>S</bold> and vice versa, indicating that the gain scales as the inverse of the input power).</p><p><inline-formula><mml:math id="inf262"><mml:mrow><mml:mn>0</mml:mn><mml:mo><</mml:mo><mml:mtext>Ξ</mml:mtext><mml:mo>≪</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> (zero channel noise, small sampling noise). The optimal strategy is still decorrelation (<xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4B</xref>) with signal components of higher power being suppressed by the gain, but the suppression does not follow the inverse law as above.</p></sec><sec id="s4-12"><title>Sampling-limited regime (total noise >0.5)</title><p><inline-formula><mml:math id="inf263"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>≥</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mtext>Σ</mml:mtext><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> (zero channel noise, large sampling noise). The tilt of the filter matches the tilt of the signal, and the gain scales with input power. For high sampling noise and zero channel noise, the gain scales as the square-root of the input power (<xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4C</xref>).</p><p><inline-formula><mml:math id="inf264"><mml:mrow><mml:mtext>Ξ</mml:mtext><mml:mo>></mml:mo><mml:mi>Σ</mml:mi><mml:mo>></mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> (dominating sampling noise). In a broad regime of noise strengths where sampling noise dominates over non-zero channel noise, the tilt of the gain matches the tilt of the signal, and the gain roughly scales with the input power (<xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4D</xref>). This regime is consistent with the correspondence that we observe between the natural scenes statistics and the psychophysical measurements.</p></sec></sec></body><back><ack id="ack"><title>Acknowledgements</title><p>We thank Jason Prentice and Eizaburo Doi for valuable discussions. This work was supported by NIH EY07977, NSF PHY-1058202, FWF P25651, NEI Vision Training Grant 5-T32-EY007035-32, and the Fondation Pierre Gilles de Gennes.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>The authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>AMH, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con2"><p>JJB, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con3"><p>MMC, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con4"><p>JDV, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con5"><p>VB, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con6"><p>GT, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn></fn-group><fn-group content-type="ethics-information"><title>Ethics</title><fn fn-type="other"><p>Human subjects: The human subjects research (visual psychophysics) was approved by the Institutional Review Board of the Weill Cornell Medical College, and was in accord with the World Medical Association Declaration of Helsinki. Informed consent was obtained from each subject prior to the experimental sessions, and consent to publish was obtained from Mary Conte (MC), the one subject who is potentially identifiable by the initials since she is also an author.</p></fn></fn-group></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Atick</surname><given-names>J</given-names></name><name><surname>Redlich</surname><given-names>AN</given-names></name></person-group><year>1990</year><article-title>Toward a theory of early visual processing</article-title><source>Neural Computation</source><volume>2</volume><fpage>308</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1162/neco.1990.2.3.308</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Atick</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>Z</given-names></name><name><surname>Redlich</surname><given-names>AN</given-names></name></person-group><year>1992</year><article-title>Understanding retinal color coding from first principles</article-title><source>Neural Computation</source><volume>4</volume><fpage>449</fpage><lpage>572</lpage><pub-id pub-id-type="doi">10.1162/neco.1992.4.4.559</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Attneave</surname><given-names>F</given-names></name></person-group><year>1954</year><article-title>Some informational aspects of visual perception</article-title><source>Psychological Review</source><volume>61</volume><fpage>183</fpage><lpage>193</lpage><pub-id pub-id-type="doi">10.1037/h0054663</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Balasubramanian</surname><given-names>V</given-names></name><name><surname>Sterling</surname><given-names>P</given-names></name></person-group><year>2009</year><article-title>Receptive fields and the functional architecture in the retina</article-title><source>The Journal of Physiology</source><volume>587</volume><fpage>2753</fpage><lpage>2767</lpage><pub-id pub-id-type="doi">10.1113/jphysiol.2009.170704</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Barlow</surname><given-names>HB</given-names></name></person-group><year>1959</year><article-title>Sensory mechanisms, the reduction of redundancy, and intelligence</article-title><comment>Proceedings of the 17 Symposium on the Mechanization of Thought Processes</comment><volume>Vol. 2</volume><publisher-name>HM Stationery Office, London</publisher-name><fpage>537</fpage><lpage>574</lpage></element-citation></ref><ref id="bib6"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Barlow</surname><given-names>HB</given-names></name></person-group><year>1961</year><article-title>Possible principles underlying the transformation of sensory messages</article-title><source>Sensory Communication</source><publisher-name>MIT Press, Cambridge</publisher-name><fpage>217</fpage><lpage>234</lpage></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barlow</surname><given-names>H</given-names></name></person-group><year>2001</year><article-title>Redundancy reduction revisited</article-title><source>Network</source><volume>12</volume><fpage>241</fpage><lpage>253</lpage><pub-id pub-id-type="doi">10.1088/0954-898X/12/3/301</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bell</surname><given-names>AJ</given-names></name><name><surname>Sejnowski</surname><given-names>TJ</given-names></name></person-group><year>1997</year><article-title>The “independent components” of natural scenes are edge filters</article-title><source>Vision Research</source><volume>37</volume><fpage>3327</fpage><lpage>3338</lpage><pub-id pub-id-type="doi">10.1016/S0042-6989(97)00121-1</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Borghuis</surname><given-names>BG</given-names></name><name><surname>Ratliff</surname><given-names>CP</given-names></name><name><surname>Smith</surname><given-names>RG</given-names></name><name><surname>Sterling</surname><given-names>P</given-names></name><name><surname>Balasubramanian</surname><given-names>V</given-names></name></person-group><year>2008</year><article-title>Design of a neuronal array</article-title><source>Jhe Journal of Neuroscience</source><volume>28</volume><fpage>3178</fpage><lpage>3189</lpage><pub-id pub-id-type="doi">10.1523/JNEUROSCI.5259-07.2008</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brenner</surname><given-names>N</given-names></name><name><surname>Bialek</surname><given-names>W</given-names></name><name><surname>de Ruyter van Steveninck</surname><given-names>R</given-names></name></person-group><year>2000</year><article-title>Adaptive rescaling maximizes information transmission</article-title><source>Neuron</source><volume>26</volume><fpage>695</fpage><lpage>702</lpage><pub-id pub-id-type="doi">10.1016/S0896-6273(00)81205-2</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brincat</surname><given-names>SL</given-names></name><name><surname>Connor</surname><given-names>CE</given-names></name></person-group><year>2004</year><article-title>Underlying principles of visual shape selectivity in posterior inferotemporal cortex</article-title><source>Nature Neuroscience</source><volume>7</volume><fpage>880</fpage><lpage>886</lpage><pub-id pub-id-type="doi">10.1038/nn1278</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Campbell</surname><given-names>FW</given-names></name><name><surname>Kulikowski</surname><given-names>JJ</given-names></name><name><surname>Levinson</surname><given-names>J</given-names></name></person-group><year>1966</year><article-title>The effect of orientation on the visual resolution of gratings</article-title><source>The Journal of physiology</source><volume>187</volume><fpage>427</fpage><lpage>436</lpage></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carlson</surname><given-names>NL</given-names></name><name><surname>DeWeese</surname><given-names>MR</given-names></name></person-group><year>2002</year><article-title>Learning of sparse auditory receptive fields</article-title><comment>Proceedings of the 2002 International Joint Conference on Neural Networks</comment><volume>2</volume><fpage>1103</fpage><lpage>1108</lpage></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carlson</surname><given-names>NL</given-names></name><name><surname>DeWeese</surname><given-names>MR</given-names></name></person-group><year>2012</year><article-title>Sparse codes for speech predict spectrotemporal receptive fields in the inferior colliculus</article-title><source>PLOS Computational Biology</source><volume>8</volume><fpage>e1002594</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1002594</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chubb</surname><given-names>C</given-names></name><name><surname>Landy</surname><given-names>MS</given-names></name><name><surname>Econopouly</surname><given-names>J</given-names></name></person-group><year>2004</year><article-title>A visual mechanism tuned to black</article-title><source>Vision Research</source><volume>44</volume><fpage>3223</fpage><lpage>3232</lpage><pub-id pub-id-type="doi">10.1016/j.visres.2004.07.019</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Croner</surname><given-names>LJ</given-names></name><name><surname>Kaplan</surname><given-names>E</given-names></name></person-group><year>1995</year><article-title>Receptive fields of p and m ganglion cells across the primate retina</article-title><source>Vision Research</source><volume>35</volume><fpage>7</fpage><lpage>24</lpage><pub-id pub-id-type="doi">10.1016/0042-6989(94)E0066-T</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doi</surname><given-names>E</given-names></name><name><surname>Lewicki</surname><given-names>MS</given-names></name></person-group><year>2011</year><article-title>Characterization of minimum error linear coding with sensory and neural noise</article-title><source>Neural Computation</source><volume>23</volume><fpage>2498</fpage><lpage>2510</lpage><pub-id pub-id-type="doi">10.1162/NECO_a_00181</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doi</surname><given-names>E</given-names></name><name><surname>Lewicki</surname><given-names>MS</given-names></name></person-group><year>2014</year><article-title>Simple model of optimal population coding for sensory systems</article-title><source>PLOS Computational Biology</source><volume>10</volume><fpage>e1003761</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1003761</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fairhall</surname><given-names>AL</given-names></name><name><surname>Lewen</surname><given-names>GD</given-names></name><name><surname>Bialek</surname><given-names>W</given-names></name><name><surname>de Ruyter van Steveninck</surname><given-names>R</given-names></name></person-group><year>2001</year><article-title>Efficiency and ambiguity in an adaptive neural code</article-title><source>Nature</source><volume>412</volume><fpage>787</fpage><lpage>792</lpage><pub-id pub-id-type="doi">10.1038/35090500</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Field</surname><given-names>DJ</given-names></name></person-group><year>1987</year><article-title>Relations between the statistics of natural images and the response properties of cortical cells</article-title><source>Journal of the Optical Society of America A, Optics and Image Science</source><volume>4</volume><fpage>2379</fpage><lpage>2394</lpage><pub-id pub-id-type="doi">10.1364/JOSAA.4.002379</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ganguli</surname><given-names>D</given-names></name><name><surname>Simoncelli</surname><given-names>EP</given-names></name></person-group><year>2011</year><article-title>Implicit encoding of prior probabilities in optimal neural populations</article-title><source>Advances in Neural Information Processing Systems</source><person-group person-group-type="editor"><name><surname>Lafferty</surname><given-names>J</given-names></name><name><surname>Williams</surname><given-names>C</given-names></name><name><surname>Zemel</surname><given-names>R</given-names></name><name><surname>Shawe-Taylor</surname><given-names>J</given-names></name><name><surname>Culotta</surname><given-names>A</given-names></name></person-group><volume>Vol. 23</volume><publisher-name>MIT Press</publisher-name></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garrigan</surname><given-names>P</given-names></name><name><surname>Ratliff</surname><given-names>CP</given-names></name><name><surname>Klein</surname><given-names>JM</given-names></name><name><surname>Sterling</surname><given-names>P</given-names></name><name><surname>Brainard</surname><given-names>DH</given-names></name><name><surname>Balasubramanian</surname><given-names>V</given-names></name></person-group><year>2010</year><article-title>Design of a trichromatic cone array</article-title><source>PLOS Computational Biology</source><volume>6</volume><fpage>e1000677</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1000677</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hung</surname><given-names>CC</given-names></name><name><surname>Carlson</surname><given-names>ET</given-names></name><name><surname>Connor</surname><given-names>CE</given-names></name></person-group><year>2012</year><article-title>Medial axis shape coding in macaque inferotemporal cortex</article-title><source>Neuron</source><volume>12</volume><fpage>1099</fpage><lpage>1113</lpage><pub-id pub-id-type="doi">10.1016/j.neuron.2012.04.029</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hyvarinen</surname><given-names>A</given-names></name><name><surname>Hoyer</surname><given-names>P</given-names></name></person-group><year>2000</year><article-title>Emergence of phase and shift invariant features by decomposition of natural images into independent feature subspaces</article-title><source>Neural Computation</source><volume>12</volume><fpage>1705</fpage><lpage>1720</lpage><pub-id pub-id-type="doi">10.1162/089976600300015312</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karklin</surname><given-names>Y</given-names></name><name><surname>Lewicki</surname><given-names>MS</given-names></name></person-group><year>2009</year><article-title>Emergence of complex cell properties by learning to generalize in natural scenes</article-title><source>Nature</source><volume>457</volume><fpage>83</fpage><lpage>86</lpage><pub-id pub-id-type="doi">10.1038/nature07481</pub-id></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karklin</surname><given-names>Y</given-names></name><name><surname>Simoncelli</surname><given-names>EP</given-names></name></person-group><year>2001</year><article-title>Efficient coding of natural images with a population of noisy, non-linear neurons</article-title><source>Advances in Neural Information Processing Systems</source><volume>24</volume><fpage>999</fpage><lpage>1007</lpage></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaschube</surname><given-names>M</given-names></name><name><surname>Schnabel</surname><given-names>M</given-names></name><name><surname>Lowel</surname><given-names>S</given-names></name><name><surname>Coppola</surname><given-names>DM</given-names></name><name><surname>White</surname><given-names>LE</given-names></name><name><surname>Wolf</surname><given-names>F</given-names></name></person-group><year>2011</year><article-title>Universality in the evolution of orientation columns in the visual cortex</article-title><source>Science</source><volume>330</volume><fpage>1113</fpage><lpage>1116</lpage><pub-id pub-id-type="doi">10.1126/science.1194869</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kremers</surname><given-names>J</given-names></name><name><surname>Lee</surname><given-names>BB</given-names></name><name><surname>Yeh</surname><given-names>T</given-names></name></person-group><year>1995</year><article-title>Receptive field dimensions of macaque retinal ganglion cells</article-title><source>Colour Vision Deficiencies</source><comment>Documenta Ophthalmologica Proceedings Series</comment><fpage>399</fpage><lpage>405</lpage></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kuang</surname><given-names>X</given-names></name><name><surname>Poletti</surname><given-names>M</given-names></name><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Rucci</surname><given-names>M</given-names></name></person-group><year>2012</year><article-title>Temporal encoding of spatial information during active visual fixation</article-title><source>Current Biology</source><volume>22</volume><fpage>510</fpage><lpage>514</lpage><pub-id pub-id-type="doi">10.1016/j.cub.2012.01.050</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Laughlin</surname><given-names>S</given-names></name></person-group><year>1981</year><article-title>A simple coding procedure enhances a neuron's information capacity</article-title><source>Zeitschrift für Naturforschung C A journal of biosciences</source><volume>36</volume><fpage>910</fpage><lpage>912</lpage></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lewicki</surname><given-names>MS</given-names></name></person-group><year>2002</year><article-title>Efficient coding of natural sounds</article-title><source>Nature</source><volume>5</volume><fpage>356</fpage><lpage>363</lpage></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>YS</given-names></name><name><surname>Stevens</surname><given-names>CF</given-names></name><name><surname>Sharpee</surname><given-names>TO</given-names></name></person-group><year>2009</year><article-title>Predictable irregularities in retinal receptive fields</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>106</volume><fpage>16499</fpage><lpage>16504</lpage><pub-id pub-id-type="doi">10.1073/pnas.0908926106</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morrone</surname><given-names>MC</given-names></name><name><surname>Burr</surname><given-names>DC</given-names></name></person-group><year>1988</year><article-title>Feature detection in human vision: a phase-dependent energy model</article-title><source>Proceedings of the Royal Society of London Series B, Containing Papers of a Biological Character</source><volume>235</volume><fpage>221</fpage><lpage>245</lpage></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nirenberg</surname><given-names>S</given-names></name><name><surname>Pandarinath</surname><given-names>C</given-names></name></person-group><year>2012</year><article-title>Retinal prosthetic strategy with the capacity to restore normal vision</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>109</volume><fpage>15012</fpage><lpage>15017</lpage><pub-id pub-id-type="doi">10.1073/pnas.1207035109</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Olshausen</surname><given-names>BA</given-names></name><name><surname>Field</surname><given-names>DJ</given-names></name></person-group><year>1996</year><article-title>Emergence of simple-cell receptive field properties by learning a sparse code for neural images</article-title><source>Nature</source><volume>381</volume><fpage>607</fpage><lpage>609</lpage><pub-id pub-id-type="doi">10.1038/381607a0</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Olshausen</surname><given-names>BA</given-names></name><name><surname>Field</surname><given-names>DJ</given-names></name></person-group><year>1997</year><article-title>Sparse coding with an overcomplete basis set: a strategy employed by v1?</article-title> <source>Vision Research</source><volume>37</volume><fpage>3311</fpage><lpage>3325</lpage><pub-id pub-id-type="doi">10.1016/S0042-6989(97)00169-7</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oppenheim</surname><given-names>AV</given-names></name><name><surname>Lim</surname><given-names>JS</given-names></name></person-group><year>1981</year><article-title>The importance of phase in signals</article-title><source>Proceedings of the IEEE</source><volume>69</volume><fpage>529</fpage><lpage>541</lpage><pub-id pub-id-type="doi">10.1109/PROC.1981.12022</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Purpura</surname><given-names>K</given-names></name><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Katz</surname><given-names>E</given-names></name></person-group><year>1994</year><article-title>Striate cortex extracts higher-order spatial correlations from visual textures</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>91</volume><fpage>8482</fpage><lpage>8486</lpage><pub-id pub-id-type="doi">10.1073/pnas.91.18.8482</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ratliff</surname><given-names>CP</given-names></name><name><surname>Borghuis</surname><given-names>BG</given-names></name><name><surname>Kao</surname><given-names>Y-H</given-names></name><name><surname>Sterling</surname><given-names>P</given-names></name><name><surname>Balasubramanian</surname><given-names>V</given-names></name></person-group><year>2010</year><article-title>Retina is structured to process an excess of darkness in natural scenes</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>107</volume><fpage>17368</fpage><lpage>17373</lpage><pub-id pub-id-type="doi">10.1073/pnas.1005846107</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ruderman</surname><given-names>DL</given-names></name><name><surname>Bialek</surname><given-names>W</given-names></name></person-group><year>1994</year><article-title>Statistics of natural images: scaling in the woods</article-title><source>Physical Review Letters</source><volume>73</volume><fpage>814</fpage><lpage>817</lpage></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ruderman</surname><given-names>DL</given-names></name></person-group><year>1997</year><article-title>Origins of scaling in natural images</article-title><source>Vision Research</source><volume>37</volume><fpage>3385</fpage><lpage>3398</lpage><pub-id pub-id-type="doi">10.1103/PhysRevLett.73.814</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schwartz</surname><given-names>O</given-names></name><name><surname>Simoncelli</surname><given-names>EP</given-names></name></person-group><year>2001</year><article-title>Natural signal statistics and sensory gain control</article-title><source>Nature Neuroscience</source><volume>4</volume><fpage>819</fpage><lpage>825</lpage><pub-id pub-id-type="doi">10.1038/90526</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Simoncelli</surname><given-names>EP</given-names></name></person-group><year>2002</year><article-title>Vision and the statistics of the visual environment</article-title><source>Current Opinion in Neurobiology</source><volume>13</volume><fpage>144</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.1016/S0959-4388(03)00047-3</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname><given-names>EC</given-names></name><name><surname>Lewicki</surname><given-names>MS</given-names></name></person-group><year>2006</year><article-title>Efficient auditory coding</article-title><source>Nature</source><volume>439</volume><fpage>978</fpage><lpage>982</lpage><pub-id pub-id-type="doi">10.1038/nature04485</pub-id></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Srinivasan</surname><given-names>MV</given-names></name><name><surname>Laughlin</surname><given-names>SB</given-names></name><name><surname>Dubs</surname><given-names>A</given-names></name></person-group><year>1982</year><article-title>Predictive coding: a fresh view of inhibition in the retina</article-title><source>Poceedings of the Royal Society of London Series B, Containing papers of a Biological character</source><volume>216</volume><fpage>427</fpage><lpage>459</lpage><pub-id pub-id-type="doi">10.1098/rspb.1982.0085</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stephens</surname><given-names>GJ</given-names></name><name><surname>Mora</surname><given-names>T</given-names></name><name><surname>Tkacik</surname><given-names>G</given-names></name><name><surname>Bialek</surname><given-names>W</given-names></name></person-group><year>2013</year><article-title>Statistical thermodynamics of natural images</article-title><source>Physical Review Letters</source><volume>110</volume><fpage>018701</fpage><pub-id pub-id-type="doi">10.1103/PhysRevLett.110.018701</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Swindale</surname><given-names>NV</given-names></name><name><surname>Shoham</surname><given-names>D</given-names></name><name><surname>Grinvald</surname><given-names>A</given-names></name><name><surname>Bonhoeffer</surname><given-names>T</given-names></name><name><surname>Hubener</surname><given-names>M</given-names></name></person-group><year>2000</year><article-title>Visual cortex maps are optimized for uniform coverage</article-title><source>Nature Neuroscience</source><volume>3</volume><fpage>822</fpage><lpage>826</lpage><pub-id pub-id-type="doi">10.1038/77731</pub-id></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tkačik</surname><given-names>G</given-names></name><name><surname>Prentice</surname><given-names>JS</given-names></name><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Balasubramanian</surname><given-names>V</given-names></name></person-group><year>2010</year><article-title>Local statistics in natural scenes predict the saliency of synthetic textures</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>107</volume><fpage>18149</fpage><lpage>18154</lpage><pub-id pub-id-type="doi">10.1073/pnas.0914916107</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tkačik</surname><given-names>G</given-names></name><name><surname>Ratliff</surname><given-names>CP</given-names></name><name><surname>Milčinski</surname><given-names>G</given-names></name><name><surname>Klein</surname><given-names>JM</given-names></name><name><surname>Seyfarth</surname><given-names>LH</given-names></name><name><surname>Sterling</surname><given-names>P</given-names></name><name><surname>Brainard</surname><given-names>DH</given-names></name><name><surname>Balasubramanian</surname><given-names>V</given-names></name></person-group><year>2011</year><article-title>Natural images from the birthplace of the human eye</article-title><source>PLOS ONE</source><volume>6</volume><fpage>e20409</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0020409</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torralba</surname><given-names>A</given-names></name><name><surname>Oliva</surname><given-names>A</given-names></name></person-group><year>2003</year><article-title>Statistics of natural image categories</article-title><source>Network</source><volume>14</volume><fpage>391</fpage><lpage>412</lpage><pub-id pub-id-type="doi">10.1088/0954-898X/14/3/302</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Hateren</surname><given-names>JH</given-names></name><name><surname>Ruderman</surname><given-names>DL</given-names></name></person-group><year>1998</year><article-title>Independent component analysis of natural image sequences yields spatiotemporal filters similar to simple cells in primary visual cortex</article-title><source>Proceedings of the Royal Society of London Series B, Containing papers of a Biological character</source><volume>265</volume><fpage>2315</fpage><lpage>2320</lpage></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Hateren</surname><given-names>JH</given-names></name><name><surname>van der Schaaf</surname><given-names>A</given-names></name></person-group><year>1998</year><article-title>Independent component filters of natural images compared with simple cells in primary visual cortex</article-title><source>Proceedings Biological sciences/The Royal Society</source><volume>265</volume><fpage>359</fpage><lpage>366</lpage></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Hateren</surname><given-names>JH</given-names></name></person-group><year>1992a</year><article-title>A theory of maximizing sensory information</article-title><source>Biological Cybernetics</source><volume>68</volume><fpage>23</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1007/BF00203134</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van Hateren</surname><given-names>JH</given-names></name></person-group><year>1992b</year><article-title>Theoretical predictions of spatiotemporal receptive fields of fly lmcs, and experimental validation</article-title><source>Journal of Comparative Physiology A</source><volume>171</volume><fpage>157</fpage><lpage>170</lpage><pub-id pub-id-type="doi">10.1007/BF00188924</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Conte</surname><given-names>MM</given-names></name></person-group><year>1991</year><article-title>Spatial organization of nonlinear interactions in form perception</article-title><source>Vision Research</source><volume>31</volume><fpage>1457</fpage><lpage>1488</lpage><pub-id pub-id-type="doi">10.1016/0042-6989(91)90125-O</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Conte</surname><given-names>MM</given-names></name></person-group><year>2005</year><article-title>Local processes and spatial pooling in texture and symmetry detection</article-title><source>Vision Research</source><volume>45</volume><fpage>1063</fpage><lpage>1073</lpage><pub-id pub-id-type="doi">10.1016/j.visres.2004.10.012</pub-id></element-citation></ref><ref id="bib57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Conte</surname><given-names>MM</given-names></name></person-group><year>2012</year><article-title>Local image statistics: maximum-entropy constructions and perceptual salience</article-title><source>Journal of the Optical Society of America A, Optics, Image Science, and Vision</source><volume>29</volume><fpage>1313</fpage><lpage>1345</lpage><pub-id pub-id-type="doi">10.1364/JOSAA.29.001313</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Chubb</surname><given-names>C</given-names></name><name><surname>Conte</surname><given-names>MM</given-names></name></person-group><year>2005</year><article-title>Interaction of luminance and higher-order statistics in texture discrimination</article-title><source>Vision Research</source><volume>45</volume><fpage>311</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1016/j.visres.2004.08.013</pub-id></element-citation></ref><ref id="bib59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name><name><surname>Thengone</surname><given-names>DJ</given-names></name><name><surname>Conte</surname><given-names>MM</given-names></name></person-group><year>2013</year><article-title>Perception of second- and third-order orientation signals, and their interactions</article-title><source>Journal of Vision</source><volume>13</volume><fpage>1</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1167/13.4.21</pub-id></element-citation></ref><ref id="bib60"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Victor</surname><given-names>JD</given-names></name></person-group><year>1986</year><article-title>Isolation of components due to intracortical processing in the visual evoked potential</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>83</volume><fpage>7984</fpage><lpage>7988</lpage><pub-id pub-id-type="doi">10.1073/pnas.83.20.7984</pub-id></element-citation></ref><ref id="bib61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vinje</surname><given-names>WE</given-names></name><name><surname>Gallant</surname><given-names>JL</given-names></name></person-group><year>2000</year><article-title>Sparse coding and decorrelation in primary visual cortex</article-title><source>Science</source><volume>287</volume><fpage>1273</fpage><lpage>1276</lpage><pub-id pub-id-type="doi">10.1126/science.287.5456.1273</pub-id></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>von der Heydt</surname><given-names>R</given-names></name><name><surname>Peterhans</surname><given-names>E</given-names></name><name><surname>Baumgartner</surname><given-names>G</given-names></name></person-group><year>1984</year><article-title>Illusory contours and cortical neuron responses</article-title><source>Science</source><volume>224</volume><fpage>1260</fpage><lpage>1262</lpage><pub-id pub-id-type="doi">10.1126/science.6539501</pub-id></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolf</surname><given-names>F</given-names></name><name><surname>Geisel</surname><given-names>T</given-names></name></person-group><year>1998</year><article-title>Spontaneous pinwheel annihilation during visual development</article-title><source>Nature</source><volume>395</volume><fpage>73</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.1038/25736</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yau</surname><given-names>JM</given-names></name><name><surname>Pasupathy</surname><given-names>A</given-names></name><name><surname>Brincat</surname><given-names>SL</given-names></name><name><surname>Connor</surname><given-names>CE</given-names></name></person-group><year>2012</year><article-title>Curvature processing dynamics in macaque area v4</article-title><source>Cerebral Cortex</source><volume>23</volume><fpage>198</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1093/cercor/bhs004</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname><given-names>Y</given-names></name><name><surname>Schmid</surname><given-names>AM</given-names></name><name><surname>Victor</surname><given-names>JD</given-names></name></person-group><year>2013</year><article-title>The laminar origin of sensitivity to high-order image statistics in macaque visual cortex</article-title><comment>Proceedings of the CoSyNe Annual Meeting</comment></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.03722.026</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Behrens</surname><given-names>Timothy</given-names></name><role>Reviewing editor</role><aff><institution>Oxford University</institution>, <country>United Kingdom</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>Thank you for sending your work entitled “Variance is salience: efficient coding in central sensory processing” for consideration at <italic>eLife</italic>. Your article has been evaluated by Eve Marder (Senior editor), Timothy Behrens (Reviewing editor), and 3 reviewers.</p><p>The Reviewing editor and the other reviewers discussed their comments before we reached this our decision, and the Reviewing editor has assembled the following comments to help you prepare a revision.</p><p>All reviewers found the data interesting, but all had concerns about whether they addressed the central hypotheses outlined in the abstract and introduction. In brief we do not find a convincing argument that the results 1) reflect a central mechanism, 2) relate to behavior, or 3) make different predictions than what one would predict for the sensory periphery given the efficient coding hypothesis. These questions are specified in detail in the reviews below. The consensus that emerged from the consultation session is that clarification and rewriting will be needed to ensure that the contribution of this work can be understood. Ordinarily <italic>eLife</italic> does not provide the full reviews, but in this case we believe it is important that you see the concerns of the reviewers as you prepare your revision.</p><p><italic>Reviewer #1</italic></p><p>The authors show that psychophysical detection thresholds for figure/ground segmentation of image patches in noise are extremely well predicted by the extent to which the features defining the patch are variable in natural images. Both follow a characteristic pattern in which cardinal, then oblique features are the most variable over images (and the most detectable), followed by fourth order statistics, followed by third order (L shapes in a 4-pixel glider). The correspondence between lines of isosensitivity and the precision matrix for pairwise comparisons in 10 dimensions are very striking. The authors interpret their findings as showing that efficient coding - heightened sensitivity to the most informative features of the world - occurs not only at a sensory level but at a central decision level.</p><p>I liked the paper and approach very much. I was a little more doubtful about the novelty of the findings. My concerns are as follows:</p><p>1) I wasn't sure how the paper demonstrated that efficient coding operates at a 'central' level, as the authors claim in the abstract In fact, the paper doesn't really have anything to do with what most cognitive scientists think of as 'central processing'. It shows that we are most sensitive to patches defined by cardinal orientations, and that variability along this axis is an informative way to characterise natural scenes.</p><p>2) The psychophysical results seems in part to restate the well-known oblique effect. It is known that we are most sensitive to cardinal orientations, and the neural underpinnings of this effect (in visual cortex) have been much discussed. The work of Simoncelli and colleagues (cited appropriately here) has been key in pointing out that sensitivity to cardinal directions matches the prevalence of cardinal information in the world. I think the work here extends that finding by demonstrating a particularly good correspondence between the spatial correlations in image statistics and this sensitivity over a number of basic features.</p><p>In short, I think this is an interestingly, well-presented paper that I would encourage others to read, and that I would cite in my own work. However, I am not sure whether it is telling us something we didn't know, or just demonstrating something we did know in a particularly elegant fashion.</p><p><italic>Reviewer #2</italic></p><p>This paper describes a robust and interesting observation that links properties of natural images to psychophysical properties of the visual system. While this observation is certainly worthwhile publishing, I'm less convinced about the author's conclusion with regard to “variance is salience” as an efficient coding principle for central sensory processing.</p><p>While the authors correctly note that central processing must be concerned with behaviorally relevant aspects of the inputs, the 'variance' in their principle is completely independent of the organism's behavior and purely defined in terms of the stimulus. In fact, none of their discussion nor references to the earlier van Hateren papers that the authors invoke as a framework for interpreting their findings involves behavior. I suggest to simply drop that part of their claim and the suggestion that their principle might account for central processing and describe it as a principle governing early sensory processing where different constraints apply than in the sensory periphery. (Unless I'm missing a central part of their argument, of course. Example below.)</p><p>I think the paper would benefit greatly from a more explicit setup, ideally including a Figure, of the various potential constraints faced by early sensory processing and their respective implications for their experiments. While this aspect is not the main contribution of their paper, it would make it more explicit which alternative models/constraints are being excluded by their observation. It would also delineate better, what is inevitable conclusion within the efficient coding framework (finite resources and high input noise), and what is interpretation/speculation. Both are currently mixed in the Discussion section.</p><p>Psychophysical measurements necessarily characterize the entire system; from periphery to behavior. The author suggest their results support an optimization principle separate and different from that in the periphery and while I agree on an intuitive level, I would welcome help in thinking through the implications of combining two different principles, applied after each other, for the aggregate quantities that are being measured. E.g. why does the high input noise constraint only refer to what enters cortex, not already the retina, overriding the output-bottleneck in the visual periphery?</p><p>Finally, I'd like to commend the authors on making excellent use of the possibility of including supplementary information.</p><p>In summary, I recommend publication of this paper subject to:</p><p>1) Providing a more explicit setup for the different hypotheses to be tested and how they derive from the efficient coding principle subject to different constraints.</p><p>2) Omitting the behaviorally relevant claims not supported by their framework.</p><p>3) Responses to more minor points of critique below.</p><p><italic>Reviewer #3</italic></p><p>The authors demonstrate that the level of variability in various local, multi-point statistics in natural scenes quantitatively predicts the relative salience of these statistics. I find their data largely convincing, and I feel that this is an important question and that the authors are careful and creative in their approach here. However, I have several concerns, which I suspect in part reflect my own ignorance or misunderstanding about the present paper and past work by these authors.</p><p>First, I am not clear on what is qualitatively new here beyond what was shown in the nice 2010 PNAS paper by several of the same authors. I can see that some of the details of their analyses are different, and the comparisons here are more quantitative, but it seems to me that the main ideas and conclusions were already in place in the previous work.</p><p>The authors pitch their results in terms of cortical processing, but the only data presented are from human psychophysics, used here as a proxy for cortical processing. The authors give arguments for why salience strongly reflects cortical processing. But I feel that the claims about cortical processing are stated too strongly in some parts of the paper given that they do not directly measure cortical activity nor perform any manipulations to cortex, for example.</p><p>It seems to me that the dichotomy set up by the authors between predictions for peripheral and central processing is somewhat overstated. For example, in the final sentence of the Abstract, they state that the efficient coding hypothesis applies in “a different guise” and makes “different predictions” for the central nervous system than the periphery. They predict that sensitivity in the CNS should be greater for highly variable stimuli, but it is not clear to me that this is not one of the predictions one would make for peripheral sensory neurons using this principle.</p><p>What was the exact task being performed by the subjects?</p><p>What would <xref ref-type="fig" rid="fig1">Figure 1E</xref> look like for IId samples from white (or colored) noise?</p><p>How do the results change for images pixelated along a grid that is 45° tilted relative to the vertical and horizontal directions?</p><p>Related to this, could the √2 differences in the degree of variation for the vertical/horizontal and diagonal beta data points in <xref ref-type="fig" rid="fig3">Figure 3A</xref> be due to the difference in the distance between the points in question given the orientation of the pixelation grid?</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.03722.027</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p><italic>All reviewers found the data interesting, but all had concerns about whether they addressed the central hypotheses outlined in the abstract and introduction. In brief we do not find a convincing argument that the results 1) reflect a central mechanism, 2) relate to behavior, or 3) make different predictions than what one would predict for the sensory periphery given the efficient coding hypothesis. These questions are specified in detail in the reviews below. The consensus that emerged from the consultation session is that clarification and rewriting will be needed to ensure that the contribution of this work</italic> can <italic>be understood. Ordinarily</italic> eLife <italic>does not provide the full reviews, but in this case we believe it is important that you see the concerns of the reviewers as you prepare your revision</italic>.</p><p>Response to broad concerns:</p><p><italic>1) To what extent do our results reflect a central mechanism?</italic></p><p>There are three lines of evidence, and we detail these below and in the manuscript. First, stimuli are all high contrast (100%) and of a readily visible size (14 arcmin), so retinal limitations of contrast sensitivity and resolution are eliminated. Second, the task requires pooling of information over wide areas. Third, extracting three- and four-point correlations requires a kind of nonlinear processing (two stages of nonlinearity) that is not generally considered to be present in the retina; physiologic recordings show that neural responses to these correlations are not present in the thalamus, but are present in visual cortex, and much more so in V2 than in V1. We acknowledge that these lines are indirect; we do not directly record cortical activity, and we now state this explicitly in the manuscript (see response to comments raised by Reviewer 3). But we believe that together, these lines of evidence justify the statement that the thresholds are determined by limitations of central processing.</p><p><italic>2) To what extent do our results relate to behavior?</italic></p><p>The goal of the visual system is to make discriminations that are useful to guide action, rather than to reconstruct the image per se. We had used the term “behaviorally relevant” to highlight this idea, once in the Abstract and once in the manuscript; we have now removed it from the Abstract, where we see how it might have been confusing, and we expand on the point in the Introduction.</p><p>With regard to the comments raised by Reviewer 2, obviously our analysis of natural image statistics was without regard to task, and the psychophysical task was not designed to mimic a real-life behavior. But we think this is a strong point, not a weakness, as it shows that the allocation of resources in early cortical visual processing can be accounted for in a general framework. We now comment on this in the text.</p><p><italic>3) To what extent do our results make different predictions than one would predict for the sensory periphery?</italic></p><p>We now clarify this at several points in the manuscript, and we add a new Figure. Briefly, the contrast is not between peripheral and central processing per se, but the difference between two efficient coding regimes: one that is limited by noise in the output (i.e., limited capacity) and one regime that is limited by noise in the input (i.e., limited sampling). The periphery is typically considered to be characterized by the first regime, at least when it comes to thinking about receptive field properties, and we hypothesize that central processing is characterized by the second. The new Figure illustrates the qualitative difference between these regimes: when output noise is limiting, efficient coding predicts a whitening (reduced sensitivity for more variable input components); when input noise is limiting, efficient coding predicts the opposite (increased sensitivity for more variable input components).</p><p>Since central processing follows the peripheral processing, one could wonder how the two stages of the same processing stream can be subject to different constraints. The reason is as follows: the “input signal” in the context of efficient coding for the retina is directly the raw light intensity, and efficient coding regime is derived from the spatiotemporal correlation structure of light intensity. In contrast, the input signals for central coding are nonlinear functions of the original image (image statistic coordinates); this means that efficient coding regime is derived from the covariance structure of image statistics. Since the formal inputs to the encoders are very different (light or contrast in the periphery, image statistic estimates centrally), a different regime of efficient coding emerges.</p><p>Reviewer #1</p><p><italic>[…] In short, I think this is an interestingly, well-presented paper that I would encourage others to read, and that I would cite in my own work. However, I am not sure whether it is telling us something we didn't know, or just demonstrating something we did know in a particularly elegant fashion</italic>.</p><p>We are very pleased that Reviewer 1 found our paper to be both interesting and well-presented. We do feel that the paper is telling us something new and rather exciting, as we explain below in responses to the specific comments.</p><p><italic>1) I wasn’t sure how the paper demonstrated that efficient coding operates at a ‘central’ level, as the authors claim in the abstract. In fact, the paper doesn’t really have anything to do with what most cognitive scientists think of as ‘central processing’.</italic></p><p>With regard to ‘central processing’: We use this term to distinguish our focus – cortical sensory processing – from processing in the sensory periphery. We think this is a reasonable use of the term, even though there is (of course) much additional central processing that we do not consider. But we understand the potential for confusion, and we now clarify the use of the term in the Introduction: “To test this hypothesis, we focus on early stages of central visual processing. Here, early visual cortex (V1 and V2) is charged with extracting edges, shapes, and other complex correlations of light between multiple points in space...”</p><p>We feel that there is strong evidence that the computations that determine task performance occur in primary visual cortex, both V1 and V2, and we expand on this point in the paper. Specifically, we now state:</p><p>Discussion: “Although we did not record cortical responses directly, several lines of evidence indicate that that the perceptual thresholds we measured are determined by cortical processes. […] Conversely, macaque visual cortical neurons (Purpura et al., 1994), especially those in V2, manifest responses to three- and four-point correlations (Yu et al., 2013). ”</p><p><italic>It shows that we are most sensitive to patches defined by cardinal orientations, and that variability along this axis is an informative way to characterise natural scenes</italic>.</p><p>Our results about sensitivities to pairwise statistics in horizontal, vertical, and diagonal directions go substantially beyond a demonstration of previously-known findings, in several ways.</p><p>First, the “on-axis” sensitivities are not obvious consequences of the orientation tuning of cortical neurons (since we consider both positive and negative correlations) or the oblique effect (our observed differences are substantially larger, likely because these configurations differ in more than orientation). For further discussion of the oblique effect, please see our response to point 2 below.</p><p>Second, only a small portion of our findings (a prediction of four relative sensitivities) relate to individual “axes.” Most of our findings (more than a dozen free parameters) relate to how the axes interact – again, not something that is a consequence of orientation-tuning or the oblique effect.</p><p>Finally, it is possible that there is a misunderstanding about the axes themselves: they are not the spatial axes within an image, but abstract axes within a space of image statistics. We can see how this might have been confusing, and we now clarify it with the following text:</p><p>Results: “When pairs of natural image statistics covary, thus sampling oblique directions not aligned with the coordinate axes in the space of image statistics, our hypothesis predicts that human perceptual sensitivity is matched to both the degree and the direction of that covariation (we are referring here to the orientation of a distribution in the coordinate plane of a pair of image statistics, and not to an orientation in physical space).”</p><p>An important way in which our results extend beyond what is already known relates to the predictions of normative theories. To date, normative theories applied to cortical processing have been focused on predicting the response properties of single cells, considered as linear or quasilinear filters. Here, we ask how resources should be distributed across a population of cells to represent a diverse set of nonlinear, higher-order features that cover a multidimensional domain. We highlight this goal within the Introduction:</p><p>Introduction: “Can we extend such theories beyond the sensory periphery to describe cortical sensitivity to complex sensory features? […] We will show that these ideas predict a specific organizing principle for aggregate sensitivities arising in cortex: the perceptual salience of complex sensory signals increases with the variability, or unpredictability, of the corresponding signals over the ensemble of natural stimuli.”</p><p>We additionally emphasize that the goal of our study is to measure how sensitivities are distributed across a set of features, and is not limited to identifying features for which sensitivity is greatest. We now highlight this within the Introduction and Discussion:</p><p>Introduction: “We compare the spatial variation of local patterns of light across natural images with human sensitivity to manipulations of the same patterns in synthetic images. This allows us to determine how sensitivity is distributed across many different features, rather than simply determining the most salient ones. To this end...”</p><p>Discussion: “How should neural mechanisms be distributed to represent a diverse set of informative sensory features? We argued that...”</p><p><italic>2) The psychophysical results seems in part to restate the well-known oblique effect. It is known that we are most sensitive to cardinal orientations, and the neural underpinnings of this effect (in visual cortex) have been much discussed. The work of Simoncelli and colleagues (cited appropriately here) has been key in pointing out that sensitivity to cardinal directions matches the prevalence of cardinal information in the world. I think the work here extends that finding by demonstrating a particularly good correspondence between the spatial correlations in image statistics and this sensitivity over a number of basic features</italic>.</p><p>While we can see how our findings might at first glance be considered to be part of an “oblique effect,” they are actually quite different. The effect size we report is much larger: sensitivities are about 50% higher for cardinal directions vs diagonal ones; in the classical oblique effect, sensitivity for oblique- vs cardinal-direction gratings differ by 10–20% in the midrange of spatial frequencies (Campbell et al., 1966). The reason that the effect size we observe is much larger is that the horizontal and vertical pairwise correlations differ from the diagonal pairwise correlations in more ways than just orientation: checks involved in horizontal and vertical pairwise correlations share an edge, while checks involved in diagonal pairwise correlations only share a corner. Furthermore, the image property relevant to explaining the oblique effect in the work of Simoncelli and colleagues are the local statistics of image gradients / oriented edges. While pairwise correlations between pixels (that we study) can influence image gradients (responsible for explaining the oblique effect) and vice versa, there is no simple one-to-one mapping between the two sets of statistics (e.g., high beta-vertical does not by itself imply a lot of vertical edge fragments). We now explain how our findings differ from the oblique effect:</p><p>Results: “Note that the difference between the sensitivities in the horizontal and vertical directions (β− and β|) vs the diagonal directions (β\ and β/) is not simply an “oblique effect”, i.e., a greater sensitivity for cardinally- vs obliquely-oriented contours (Campbell et al., 1966). Horizontal and vertical pairwise correlations differ from the diagonal pairwise configurations in more than just orientation: checks involved in horizontal and vertical pairwise correlations share an edge, while checks involved in diagonal pairwise correlations only share a corner. Correspondingly, the difference in sensitivities for horizontal and vertical correlations vs diagonal correlations is approximately 50%, which is much larger than the size of the classical oblique effect (10–20%) (Campbell et al., 1966).”</p><p>Additionally, much of our study is devoted to third and fourth-order correlations and the interactions of the texture statistics. These statistics are qualitatively different from any pairwise correlations that might be relevant for the oblique effect, and are crucial for our demonstration that efficient coding also applies beyond the pairwise order.</p><p>In sum, we agree that the main finding of the work can be framed in the context of the match between image statistics and visual sensitivities (as in the Simoncelli framework), but we think that it is important to emphasize that we are not looking at an “oblique effect.”</p><p>Reviewer #2</p><p><italic>[…] In summary, I recommend publication of this paper subject to:</italic></p><p><italic>1) Providing a more explicit setup for the different hypotheses to be tested and how they derive from the efficient coding principle subject to different constraints</italic>.</p><p><italic>2) Omitting the behaviorally relevant claims not supported by their framework</italic>.</p><p><italic>3) Responses to more minor points of critique below</italic>.</p><p>We are very pleased that Reviewer 2 recommends publication of our paper subject to his/her outlined changes. Below, we address each of his/her concerns: (<bold>A</bold>) is addressed in points 1-2 below, (<bold>B</bold>) is addressed in points 3 and 9 below, and (<bold>C</bold>) is addressed in points 4-8 below.</p><p><italic>1) I think the paper would benefit greatly from a more explicit setup, ideally including a figure, of the various potential constraints faced by early sensory processing and their respective implications for their experiments. While this aspect is not the main contribution of their paper, it would make it more explicit which alternative models/constraints are being excluded by their observation. It would also delineate better, what is inevitable conclusion within the efficient coding framework (finite resources and high input noise), and what is interpretation/speculation. Both are currently mixed in the Discussion section</italic>.</p><p>We agree. We added substantial clarifying material and a new figure (<xref ref-type="fig" rid="fig4">Figure 4</xref>), as described below.</p><p><italic>2) Psychophysical measurements necessarily characterize the entire system - from periphery to behavior. The authors suggest their results support an optimization principle separate and different from that in the periphery and while I agree on an intuitive level, I would welcome help in thinking through the implications of combining two different principles, applied after each other, for the aggregate quantities that are being measured. E.g. why does the high input noise constraint only refer to what enters cortex, not already the retina, overriding the output-bottleneck in the visual periphery?</italic></p><p>If we understand the reviewer’s comment, it can be rephrased as follows: how can the cortex be operating in a regime dominated by input noise, when it follows peripheral processing, which sees the same input but is dominated by output noise?</p><p>The resolution of this apparent paradox is twofold. First, the critical issue is not the amount of input noise, but rather its size relative to the output noise. So, even though there is input noise at the level of the visual input, its severe output restriction (the transmission bottleneck) means that peripheral processing may be operating in a regime dominated by output noise. For cortex, which does not have the same output bottleneck, the same amount of input noise can dominate. The second consideration is the following. As we and others suggest, we view the “job” of early cortical processing as making inferences about surfaces and materials from visual texture (an idea that we formalize by measuring sensitivity to local image statistics). Given this goal, the fact that cortex only has access to a limited sample of a texture (i.e., the sample presented by a particular object) is also a form of input noise.</p><p>To illustrate this point, consider the following “toy visual system” whose task is to detect and estimate the local coherent motion of small, randomly moving point-like objects that are of high contrast (for example, the classic task of Newsome and Pare (1988), but with the dots placed on a 1/f background). Neurons in the periphery efficiently code the dots, making use of decorrelation. Centrally, however, the object of interest is the correlation structure of moving dots in small image patches, to extract the local coherent motion component. For this task, central processes must compare counts of how many dots move in each direction; at low density of dots, these estimates will be subject to large sampling errors due to random arrival and departure of dots from the region that is being sampled. Importantly, no matter what the SNR of the input is (even in 100% contrast vision where each dot is perfectly resolved), the local coherence estimates will be noisy due to the fact that local dot detections come as rare (and random) events; and the efficient central processing will thus be subject to large input noise. In our case, the local image statistics play the role of local dot counts, but the basic idea, that sampling is a form of noise, is identical. In both cases, because the signals relevant to periphery and cortex are different, one can end up in a different efficient coding regime at each stage of visual processing.</p><p>As suggested by the reviewer, we have added a new figure (<xref ref-type="fig" rid="fig4">Figure 4</xref>) to help clarify these points. <xref ref-type="fig" rid="fig4">Figure 4</xref> illustrates the emergence of two efficient coding regimes in a system that operates under a total power (resource) constraint. The relevant regime, which depends on the relative strengths of input and output noise, reflects the constraints faced by the system in consideration. For our system (modeled as in <xref ref-type="fig" rid="fig4">Figure 4C</xref>), we take inputs to be nonlinear image features that have already undergone peripheral preprocessing, as caricatured by our image analysis pipeline. We then apply the efficient coding framework to this set of nonlinear features, and we ask how sensitivities should be distributed among them.</p><p>The key point is that because these image features (counts of glider colorings) must be sampled from a spatial region of an image, any estimation of these features will be limited by sampling noise. We hypothesize that this sampling noise, which is a form of input noise, dominates. This predicts (as shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>), that sensitivity should increase with signal variability. We test this prediction and show that it holds.</p><p>We reorganized and added to the text in order to illustrate this:</p><p>Discussion: “A simple model of efficient coding by neural populations is shown in <xref ref-type="fig" rid="fig4">Figure 4A</xref> (details in Methods, Two regimes of efficient coding). Here, to enable analytical calculations, we used linear filters of variable gain and subject to Gaussian noise to model a population of neural channels encoding different features. […] The relative sizes of input and output noise (controlled by Λ in <xref ref-type="fig" rid="fig4">Figure 4A</xref>) determines the input ranges over which the two qualitatively different regimes of efficient coding apply.”</p><p>In addition, we clarified our language throughout the Discussion (see Discussion, Cortex faces a different class of challenges than the periphery ) to emphasize that the efficient coding regimes are a general feature of information optimization, and neither regime is a priori restricted to peripheral vs central processing. Thus, even in sensory periphery, there exist specific scenarios in which the signal is input- noise dominated. The best known example is perhaps that of night vision, when photon shot noise is no longer negligible and qualitatively reshapes peripheral information processing. Instead of decorrelation by center-surround receptive fields, receptive fields average the signal, i.e., they apply gain that enhances the correlations already present in the input in order to fight the detrimental effects of input noise.</p><p>We would like to emphasize clearly that the constraints considered here (nonlinear image features subject to sampling noise) thus differ from those considered by some of the most successful applications of efficient coding in the periphery. Because of these constraints, our finding that sensitivity increases with signal variability differs qualitatively from the findings of these studies, where whitening is optimal (or near optimal, see Doi and Lewicki (2014)) and sensitivity decreases with signal variability. Moreover, since feature extraction is thought to be one of the main tasks of central vision, the sampling noise constraint is likely to represent a general aspect of cortical processing, rather than a special case as for night vision in the periphery. And finally, the predictions made are quite different; we predict sensitivity to a wide variety of nonlinear features and their combinations, not the effective filtering behavior (i.e., shape) of a quasilinear receptive field.</p><p><italic>3) While the authors correctly note that central processing must be concerned with behaviorally relevant aspects of the inputs, the ‘variance’ in their principle is completely independent of the organism’s behavior and purely defined in terms of the stimulus. In fact, none of their discussion nor references to the earlier van Hateren papers that the authors invoke as a framework for interpreting their findings involves behavior. I suggest to simply drop that part of their claim and the suggestion that their principle might account for central processing and describe it as a principle governing early sensory processing where different constraints apply than in the sensory periphery. (Unless I’m missing a central part of their argument, of course. Example below</italic>.<italic>)</italic></p><p>We don’t intend to suggest that we are studying behavior, and we have edited the manuscript so that this is completely clear. We also added material to the Introduction and Discussion, including a new figure, that frames the work as consequences of differing constraints on peripheral and central processing.</p><p>Even though our focus is on visual processing and perception, the fact that visual processing must ultimately guide action plays a fundamental role: it motivates the idea that the visual system needs to make useful distinctions about the outside world, rather than reconstruct the image. But we can see that the term “behaviorally relevant” might have been misleading, so we have removed the term from the abstract, and we have explained the idea more carefully in the introduction:</p><p>“Can we extend such theories beyond the sensory periphery to describe cortical sensitivity to complex sensory features? Normative theories have been successful in predicting the response properties of single cells, including receptive fields in V1 (Olshausen and Field, 1996; Bell and Sejnowski, 1997; van Hateren and van der Schaaf, 1998; van Hateren and Ruderman, 1998; Hyvarinen and Hoyer, 2000; Vinje and Gallant, 2000; Karklin and Lewicki, 2009) and spectro-temporal receptive fields in primary auditory cortex (Carlson and DeWeese, 2002, 2012). […] We will show that these ideas predict a specific organizing principle for aggregate sensitivities arising in cortex: the perceptual salience of complex sensory signals increases with the variability, or unpredictability, of the corresponding signals over the ensemble of natural stimuli.”</p><p>Also (as mentioned in the response to the editors), we think that the fact that the image analysis and psychophysics was without regard to task is a strong point, not a weakness. We find it quite remarkable that we see such a striking match between natural image statistics and perceptual sensitivity without including any notion of “value” as related to behavior. This finding, that higher order percepts emerge from efficient coding without explicit specification of the goal, was the original hope of the efficient coding framework as first put forward by Barlow and Attneave (Barlow, 1959, 1961; Attneave, 1954). It remains an interesting direction for future research to ask how the inclusion of value would shape the framework studied here. We now mention this explicitly in the Discussion:</p><p>“Finally, we emphasize that although we have focused on perception of image statistics, we do this with the premise that this process is in the service of inferring the materials and objects that created an image and ultimately, guiding action. Thus, it is notable that we found a tight correspondence between visual perception and natural scene statistics without considering a specific task or behavioral set, and indeed, the emergence of higher order percepts without explicit specification of a task was the original hope of the efficient coding framework as first put forward by Barlow and Attneave (Barlow, 1959, 1961; Attneave, 1954). Doubtless, these “top-down” factors influence the visual computations that underlie perception, and the nature and site of this influence are an important focus of future research.”</p><p><italic>4) In my understanding, the black/white asymmetry is the main systematic deviation from the prediction that was empirically found. I think this should be included (with Figure) and discussed in the main manuscript and not just in the SI</italic>.</p><p>We feel that the reviewer might have misunderstood our results in this regard: the main systematic deviation from prediction concerns positive and negative values of the fourth-order statistic α, not black/white asymmetry (the latter was eliminated by our preprocessing, in which we binarize image patches at the pixel intensity median such that each patch has equal numbers of black and white pixels (see Results, Analyzing local image statistics in natural scenes).</p><p>Specifically, there is an asymmetry of the distribution of α-values extracted from natural images that is not mirrored in psychophysical sensitivities. We mention this in Discussion, Clues to neural mechanisms, where we reference the relevant figure (<xref ref-type="fig" rid="fig3s8">Figure 3–figure supplement 8</xref>). We now also include the discussion of this asymmetry within the main manuscript, in Methods, Asymmetries in distributions of natural image statistics.</p><p><italic>5) If your “glider” is anything more than a 2 × 2 window, then please explain in more detail.</italic></p><p>The reviewer is correct: the glider is a 2 × 2 window. We now clarify this in two places in the main text:</p><p>Results: “As we recently showed, some informative local correlations of natural scenes are captured by the configurations of luminances seen through a “glider”, i.e., a window defined by a 2 × 2 square arrangement of pixels (Tkačik et al., 2010).”</p><p>Results: “We characterize each patch by the histogram of 16 binary colorings (2<sup>2×2</sup>) seen through a square 2 × 2 pixel glider.”</p><p><italic>6) Block average should be explained better in the main text; wouldn’t have understood it without resorting to SI</italic>.</p><p>We now clarify the block-averaging process within the main text:</p><p>Results: “We preprocess the image patches as shown in <xref ref-type="fig" rid="fig1">Figure 1A</xref>. This involves first averaging pixel luminances over a square region of N × N pixels, which converts an image of size L<sub>1</sub> × L<sub>2</sub> pixels into an image of reduced size L<sub>1</sub>/N × L<sub>2</sub>/N pixels. Images are then divided into R × R square patches of these downsampled pixels and whitened (see Methods, Image preprocessing, for further details).”</p><p><italic>7) “Single scaling parameter for each image analysis”: what exactly does that mean? How many parameters per subject and comparison are needed? Can you say anything interesting about their distribution and the correlations between them?</italic></p><p>We clarify these points in the main text, including a rewording of the quoted phrase. As we explain, we carried out image analyses for several choices of two size parameters: a block-average factor N (where N × N image pixels are averaged at the first step of the processing pipeline) and a patch size R (where R × R is the size of a patch in N × N pixels in which statistics are determined). There were 2 choices of N (2 and 4) and 3 choices of R (32, 48, and 64), for a total of 6 parallel image analyses (in the SI, we show that our results hold over a wider range too). For each of these analyses, there was just one scale factor (the same for all statistics and all subjects), adjusted to minimize the least-squares error between the set of standard deviations and the set of psychophysical sensitivities:</p><p>Results: “This qualitative comparison can be converted to a quantitative one (<xref ref-type="fig" rid="fig3">Figure 3A</xref>), as a single scaling parameter aligns the standard deviation of natural image statistics with the corresponding perceptual sensitivities. In this procedure, each of the six image analyses is scaled by a single multiplicative factor that minimizes the squared error between the set of standard deviations and the set of subject-averaged sensitivities (see Methods, Image preprocessing, and <xref ref-type="fig" rid="fig3s1">Figure 3–figure supplement 1</xref> for additional details regarding scaling).”</p><p>With regard to a relationship between the scale factors and the analysis parameters (the block-average factor N and patch size R), there is (as one might expect) a systematic relationship. We now show this in detail in Methods, Image preprocessing, with a new figure and new text:</p><p>Methods: “To compare between natural image and psychophysical analyses, we scale the set of 9 standard deviations extracted from a given image analysis by a multiplicative factor that minimizes the squared error between the set of 9 standard deviations and the set of 9 psychophysical sensitivities. <xref ref-type="fig" rid="fig3s1">Figure 3–figure supplement 1</xref> shows the value of the scale factor for different choices of the block-average factor N and patch size R.”</p><p><xref ref-type="fig" rid="fig3s1">Figure 3–figure supplement 1</xref>. Scaling of natural image analyses. We scale each image analysis by a single scale factor that minimizes the squared error between the set of 9 standard deviations and the set of 9 psychophysical sensitivities. The scale factors are shown here as a function of block-average factor N for different choices of the patch size R. We find that the variance of image statistics decreases with increasing values of N, and thus larger values of N require a larger scale factor. Similarly, for a given value of N, the variance of image statistics increases with increasing R, and thus larger values of R require a larger scale factor.</p><p><italic>8) I’d welcome it if the authors could spell out the Null-hypothesis for their significance tests, rather than saying what they did mechanistically (”coordinate labels independently shuffled”).</italic></p><p>We now do this within the main text by replacing the mechanistic description of the significance tests with a description of each null hypothesis:</p><p>Results: “This value ranges from 0.987 to 0.999 across image analyses and was consistently larger than the value measured under the null hypothesis that the apparent correspondence be- tween statistics and sensitivities is chance (p ≤ 0.0003 for each image analysis; see Methods, Permutation tests, for details regarding statistical tests).”</p><p>Results: “This value, averaged across coordinate planes, ranges from 0.953 to 0.977 across image analyses. We compared this correspondence to that obtained under the null hypotheses that either (i) the apparent correspondence between image statistic covariances and isodiscrimination contours is chance, or (ii) the apparent covariances in image statistics are due to chance. The observed correspondence is much greater than the value measured under either null hypothesis (p ≤ 0.0003 for each image analysis under both hypotheses; see Methods, Analysis of image statistics in pairwise coordinate planes, and <xref ref-type="fig" rid="fig3s2">Figure 3–figure supplement 2</xref> for comparisons of eccentricity and tilt, and Methods, Permutation tests, for statistical tests).”</p><p>We also made corresponding additions to Methods, Permutation tests, in order to supplement the mechanistic description of the significance tests with the full descriptions of the null hypotheses.</p><p><italic>9) Discussion: “Cortical mechanisms should be...”: This is in clear contradiction to the behaviorally relevant argument elsewhere in the paper. Very variable features may be behaviorally relevant demanding cortical resources, or they may not be asking for them to be factored/normalized out (e.g. overall brightness). The theoretical framework is all about maximizing information with respect to the inputs, not with respect to behaviorally relevant outputs</italic>.</p><p>We agree, this sentence did not properly explain our ideas. We rewrote this section as follows:</p><p>Discussion: “How should neural mechanisms be distributed to represent a diverse set of informative sensory features? We argued that, when performance requires inferences limited by sampling of the statistics of input features, resources should be devoted in proportion to feature variability. A basic idea here is that features that range over a wider range of possible values are less predictable, and will better distinguish between contexts in the face of input noise. We used this hypothesis to successfully predict...”</p><p>Reviewer #3</p><p><italic>The authors demonstrate that the level of variability in various local, multi-point statistics in natural scenes quantitatively predicts the relative salience of these statistics. I find their data largely convincing, and I feel that this is an important question and that the authors are careful and creative in their approach here. However, I have several concerns, which I suspect in part reflect my own ignorance or misunderstanding about the present paper and past work by these authors</italic>.</p><p><italic>First, I am not clear on what is qualitatively new here beyond what was shown in the nice 2010 PNAS paper by several of the same authors. I</italic> can <italic>see that some of the details of their analyses are different, and the comparisons here are more quantitative, but it seems to me that the main ideas and conclusions were already in place in the previous work</italic>.</p><p>Briefly, the previous work (Tkačik et al., 2010) demonstrated a close relationship between which image statistics are analyzed by the visual system, and the statistics of natural images: resources are devoted to image statistics that cannot be predicted from simpler ones (Tkačik et al., 2010). Here, we build on this, and examine allocation of resources within this identified set of informative images. The previous work made no attempt to predict this allocation, and the current work is not applicable to image statistics that are uninformative (the distinction made in the previous work).</p><p>Previous work considered natural scene statistics defined based on local correlations defined over different spatial configurations of pixels. It demonstrated that these statistics can be divided into two groups, informative and uninformative. Informative features – higher-order statistics whose value cannot be deduced from lower-order statistics – are encoded, while uninformative features are not. Indeed, the visual system would be wasteful if it were to invest in mechanisms to encode uninformative statistics, and evidence from Tkačik et al. (2010) suggests that this does not happen. This advance was crucial; as one looks at high-order statistics, there is an exponential explosion of correlations that we (or the brain) could compute. Tkačik et al. (2010) showed that it only makes sense to focus on ones that are informative, and that “informativeness” can be determined from natural scenes alone.</p><p>The current manuscript takes this finding as a starting point for exploring the relative allocation of resources to encoding the set of nine informative statistics defined by the 2 × 2 square glider. Unlike Tkačik et al. (2010), which simply identified this set of statistics as informative, here we make and confirm dozens of quantitative predictions concerning how resources are allocated to encode them. The fact that an application of the efficient coding principle is successful in characterizing resource allocation is by no means a straightforward consequence of Tkačik et al. (2010), as any allocation of resources within this parameter set would have been consistent with those previous results.</p><p>To clarify this distinction, we have schematized the existence of informative vs uninformative features in a newly-added figure (<xref ref-type="fig" rid="fig4">Figure 4</xref>), and in the associated text. We note that the existence of uninformative statistics was the focus of previous work, and we note that the present work focuses on the optimal allocation of resources among those statistics that are informative:</p><p>Results: “Successive stages of sensory processing share the same broad goals: invest resources in encoding stimulus features that are sufficiently informative, and suppress less-informative ones. In the periphery, this is exemplified by the well-known suppression of very low spatial frequencies; in cortex, this is exemplified by insensitivity to high-order correlations that are predictable from lower-order ones. Previous work has shown that such higher-order correlations can be separated into two groups – informative and uninformative – and only the informative ones are encoded (Tkačik et al., 2010). We used this finding to select an informative subspace for the present study, and we asked how should resources should be efficiently allocated amongst features within this informative subspace.”</p><p><italic>The authors pitch their results in terms of cortical processing, but the only data presented are from human psychophysics, used here as a proxy for cortical processing. The authors give arguments for why salience strongly reflects cortical processing. But I feel that the claims about cortical processing are stated too strongly in some parts of the paper given that they do not directly measure cortical activity nor perform any manipulations to cortex, for example</italic>.</p><p>We rewrote this section, acknowledging that the inference is indirect and explicitly stating that we did not record cortical responses. But we do feel that the evidence that psychophysical performance reflects cortical processing is compelling, as it is based on independent lines of evidence that, individually, are strong: briefly, stimuli are many times above contrast and resolution thresholds; task performance requires pooling over wide areas; the computations required for the task are more complex than what is considered to occur in the retina; and physiologic recordings show that neural responses to high-order correlations are not present in the thalamus, but are present in visual cortex, and much more so in V2 than in V1.</p><p>Specifically, we state:</p><p>Results: “Although we did not record cortical responses directly, several lines of evidence indicate that that the perceptual thresholds we measured are determined by cortical processes. […] Conversely, macaque visual cortical neurons (Purpura et al., 1994), especially those in V2, manifest responses to three- and four-point correlations (Yu et al., 2013).”</p><p>Because this evidence is admittedly indirect, we added the modifier “likely” to the subheader for this section, to read “Perceptual salience of multi-point correlations likely arises in cortex.”</p><p><italic>It seems to me that the dichotomy set up by the authors between predictions for peripheral and central processing is somewhat overstated. For example, in the final sentence of the Abstract, they state that the efficient coding hypothesis applies in “a different guise” and makes “different predictions” for the central nervous system than the periphery. They predict that sensitivity in the CNS should be greater for highly variable stimuli, but it is not clear to me that this is not one of the predictions one would make for peripheral sensory neurons using this principle</italic>.</p><p>We see how a ‘dichotomy’ is an oversimplification, and a fairer description is that of two qualitatively different regimes (“whitening” and input-noise limiting) depending on the relative strengths of input and output noise.</p><p>As mentioned in our response to Reviewer 1, we clarified our language throughout the Discussion to emphasize that the efficient coding regimes are a general feature of information optimization, and neither regime is a priori restricted to peripheral vs central processing. Thus, even in sensory periphery, there might exist special regimes when the signal is input-noise dominated. The best known example is perhaps that of night vision, when photon shot noise is no longer negligible and qualitatively reshapes peripheral information processing. Instead of decorrelation by center-surround receptive fields, receptive fields averaging the signal, i.e., they apply gain that enhances the correlations already present in the input in order to fight the detrimental effects of input noise. These special considerations, which are relevant for night vision, do not apply generally to the extensively-studied area of daylight vision.</p><p>We would like to emphasize clearly that the constraints considered here (nonlinear image features subject to sampling noise) thus differ from those considered by some of the most successful applications of efficient coding in the periphery. Because of these constraints, our finding that sensitivity increases with signal variability differs qualitatively from the findings of these studies, where whitening is optimal (or near optimal, see Doi and Lewicki (2014)) and sensitivity decreases with signal variability. Moreover, since feature extraction is thought to be one of the main tasks of central vision, the sampling noise constraint is likely to represent a general aspect of cortical processing, rather than a special case as for night vision in the periphery.</p><p>In addition to changes in the text, we have added a fourth figure that illustrates these two regimes in a simple parallel-channel model. As an illustration of the regime in which output noise dominates, we discuss the well-known suppression of low spatial frequencies (Discussion). We then hypothesize that the other regime (input noise dominating) is relevant for central processing (Discussion). We discuss the specific application of efficient coding in this context, which differs from an application in the sensory periphery. Here, we apply efficient coding to complex nonlinear features (counts of glider colorings), where sampling fluctuations in glider counts provide a source of input noise.</p><p>We removed language that dichotomizes peripheral vs central processing, such as “it applies in a different guise and makes different predictions” (formerly the final sentence of the Abstract). Instead, we call upon the previous successes of efficient coding in the periphery in order to highlight one particular regime of efficient coding (whitening), and we use these examples to contrast the qualitative findings of our study.</p><p><italic>What was the exact task being performed by the subjects?</italic></p><p>We added the following clarification about the psychophysical task, and we refer the reader to Methods, Psychophysical methods, for additional details:</p><p>Results: “To characterize perceptual sensitivity to different statistics, we isolated them in synthetic visual images and used a figure/ground segmentation task (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). We used a four-alternative forced-choice task in which stimuli consisted of a textured target and a binary noise background (or vice-versa). Each stimulus was presented for 120ms and was followed by a noise mask. Subjects were then asked to identify the spatial location (top, bottom, left, or right) of the target. Experiments were carried out for synthetic stimuli in which the target or back- ground was defined by varying image statistic coordinates independently (<xref ref-type="fig" rid="fig2">Figure 2A</xref> shows examples of gamuts from which stimuli are built).”</p><p><italic>What would</italic> <xref ref-type="fig" rid="fig1"><italic>Figure 1</italic>E</xref> <italic>look like for IId samples from white (or colored) noise?</italic></p><p>When natural images are replaced with white noise, the variation is identical across individual image statistics. (The same is true for colored noise, since our processing pipeline includes whitening.) While this is mathematically guaranteed, it may not be obvious, and we agree that showing it is helpful. We now include a sentence within the main manuscript:</p><p>Results: “Interestingly, third-order correlations are the least variable across image patches. An analogous analysis performed on white noise yields a flat distribution with considerably smaller standard deviation values (See Methods, Analysis variants for Penn Natural Image Database, and <xref ref-type="fig" rid="fig1s3">Figure 1–figure supplement 3</xref> for comparison). These (and subsequent) findings are preserved across different choices of image analysis parameters...”</p><p>We have also included an additional Figure in Methods, Analysis variants for Penn Natural Image Database that illustrates this comparison, along with a corresponding addition to the text:</p><p>Methods “In <xref ref-type="fig" rid="fig1s3">Figure 1–figure supplement 3</xref>, we show that the relative variation in different image statistics (first shown in <xref ref-type="fig" rid="fig1">Figure 1E</xref>) is not an artifact of our image analysis pipeline, as the pattern of variation is destroyed if white-noise image patches are instead used.”</p><p><xref ref-type="fig" rid="fig1s3">Figure 1–figure supplement 3</xref>. Image statistics along single coordinate axes for white-noise patches. The robustly observed statistical structure of natural scenes (open circles) is completely absent from the same analysis performed on samples of white noise (shaded circles). The inset shows that this holds across analysis parameters.</p><p><italic>How do the results change for images pixelated along a grid that is 45 degrees tilted relative to the vertical and horizontal directions?</italic></p><p>Of course we predict that there would still be a close correspondence between image statistics and psychophysics. But testing this; and doing so in a way that accurately isolates what would likely be a subtle effect of the grid rotation, is unfortunately not practical. Re-photographing the images with an oblique sensor would be required to avoid the artifacts that would arise from digital rotation and resampling. Collecting a parallel set of psychophysical data would require approximately 200,000 additional psychophysical judgments (the better part of a year at a humane pace). We hope the reviewer understands.</p><p><italic>Related to this, could the√2 differences in the degree of variation for the vertical/horizontal and diagonal beta data points in</italic> <xref ref-type="fig" rid="fig3"><italic>Figure 3</italic>A</xref> <italic>be due to the difference in the distance between the points in question given the orientation of the pixelation grid?</italic></p><p>It is an interesting observation, but there is more going on than just a difference in center points: for vertical/horizontal two-point sensitivities, the checks involved share a common edge, while for the diagonal case, they only share a corner. Conversely, other experiments show that increasing the spacing between</p><p>checks (in either the cardinal or diagonal directions) has very little effect on the sensitivities, over a fivefold range including the range used here (Conte et al., 2014). So we think that the finding that the √ 2 is likely to be a coincidence. We’d therefore prefer not to mention this point, ratio is approximately
as mentioning it but then describing the above evidence would likely be viewed as a distraction.</p><p>But we do now emphasize that the diagonal and cardinal directions differ by configuration and not just orientation, so that it is clear that the difference in sensitivities is not simply an “oblique effect.” We have clarified this with the following addition to the main text:</p><p>Results: “Note that the difference between the sensitivities in the horizontal and vertical directions (β− and β|) vs the diagonal directions (β\ and β/) is not simply an “oblique effect”, i.e., a greater sensitivity cardinally- vs obliquely-oriented contours (Campbell et al., 1966). Horizontal and vertical pairwise correlations differ from the diagonal pairwise correlations in more than just orientation: checks involved in horizontal and vertical pairwise correlations share an edge, while checks involved in diagonal pairwise correlations only share a corner. Correspondingly, the difference in sensitivities for horizontal and vertical correlations vs diagonal correlations is approximately 50%, which is much larger than the size of the classical oblique effect (10–20%, see Campbell et al. (1966)).”</p></body></sub-article></article> |