Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
1 lines (1 sloc) 19.8 KB
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article article-type="article-commentary" dtd-version="1.1d1" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">01136</article-id><article-id pub-id-type="doi">10.7554/eLife.01136</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Insight</subject></subj-group><subj-group subj-group-type="heading"><subject>Neuroscience</subject></subj-group><subj-group subj-group-type="sub-display-channel"><subject>Psychophysics</subject></subj-group></article-categories><title-group><article-title>Time is of the essence for auditory scene analysis</article-title></title-group><contrib-group><contrib contrib-type="author" id="author-1347"><name><surname>Dykstra</surname><given-names>Andrew R</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="conf1"/><x> is at the </x></contrib><contrib contrib-type="author" corresp="yes" id="author-4378"><name><surname>Gutschalk</surname><given-names>Alexander</given-names></name><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="conf1"/><x> is at the </x></contrib><aff id="aff1"><institution content-type="dept">Auditory Cognition Lab, Department of Neurology</institution>, <institution>Ruprecht-Karls-Universität Heidelberg</institution>, <addr-line><named-content content-type="city">Heidelberg</named-content></addr-line>, <country>Germany</country> <email>andrew.dykstra@med.uni-heidelberg.de</email></aff><aff id="aff2"><institution content-type="dept">Auditory Cognition Lab, Department of Neurology</institution>, <institution>Ruprecht-Karls-Universität Heidelberg</institution>, <addr-line><named-content content-type="city">Heidelberg</named-content></addr-line>, <country>Germany</country> <email>alexander.gutschalk@med.uni-heidelberg.de</email></aff></contrib-group><pub-date date-type="pub" publication-format="electronic"><day>23</day><month>07</month><year>2013</year></pub-date><pub-date pub-type="collection"><year>2013</year></pub-date><volume>2</volume><elocation-id>e01136</elocation-id><permissions><copyright-statement>© 2013, Dykstra and Gutschalk</copyright-statement><copyright-year>2013</copyright-year><copyright-holder>Dykstra and Gutschalk</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/3.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife01136.pdf"/><related-article ext-link-type="doi" id="ra1" related-article-type="commentary-article" xlink:href="10.7554/eLife.00699"/><abstract><p>Using computational models and stimuli that resemble natural acoustic signals, auditory scientists explore how we segregate competing streams of sound.</p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>auditory scene analysis</kwd><kwd>temporal coherence</kwd><kwd>psychophysics</kwd><kwd>segregation</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>Human</kwd></kwd-group></article-meta></front><body><boxed-text><p><bold>Related research article</bold> Teki S, Chait M, Kumar S, Shamma S, Griffiths TD. 2013. Segregation of complex acoustic scenes based on temporal coherence. <italic>eLife</italic> <bold>2</bold>:e00699. doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.7554/eLife.00699">10.7554/eLife.00699</ext-link></p><p><bold>Image</bold> An acoustic stimulus in which elements of the target (black box) overlap in time and frequency with those of the background</p><p><inline-graphic xlink:href="elife01136inf001"/></p></boxed-text><p>On a busy street corner or in a crowded bar, sounds from many different sources mix together before entering the ear canal. However, despite possessing just two ears, humans and other animals are remarkably adept at sorting out which sounds belong to which source. This process, known as auditory scene analysis (<xref ref-type="bibr" rid="bib1">Bregman, 1990</xref>), is thought to underlie our ability to selectively listen to a single auditory ‘stream’ amidst competing streams: the so-called ‘cocktail party problem’ (<xref ref-type="bibr" rid="bib3">Cherry, 1953</xref>; <xref ref-type="bibr" rid="bib2">Broadbent, 1954</xref>). The loss of this ability is one of the most significant difficulties faced by individuals with hearing loss or damage to the auditory system, and may also be affected by the normal ageing process.</p><p>In contrast to the complexity of the acoustic environments we encounter on a daily basis, the vast majority of laboratory investigations into auditory scene analysis have used quite simple signals, often consisting of only a few elements (<xref ref-type="fig" rid="fig1">Figure 1A</xref>). Such stimuli have been used in an extensive body of research, including behavioural studies, neuroimaging experiments, and direct neuronal recordings. This research has told us a lot about the fundamental ways in which humans process sound, but some have questioned how relevant such simple stimuli are in understanding how we appreciate music or perceive speech at a cocktail party. Now, in <italic>eLife</italic>, Timothy Griffiths and co-workers—including Sundeep Teki and Maria Chait as joint first authors—report how they have used a new stimulus that more closely approximates natural acoustic signals to demonstrate that temporal coherence (that is, the coincidence of sound elements in and across time) is fundamental to auditory scene analysis in humans (<xref ref-type="bibr" rid="bib10">Teki et al., 2013</xref>).<fig id="fig1" position="float"><label>Figure 1.</label><caption><title>Representations of the relationship between time and frequency in three different types of stimuli that have been used to study auditory scene analysis.</title><p>(<bold>A</bold>) The galloping ABA_ paradigm introduced by <xref ref-type="bibr" rid="bib12">Van Noorden (1975)</xref>: when subjects are played two tones that differ little in frequency (lower panel), they report hearing a single, galloping stream. Conversely, when the difference in frequency is large and the low and high tones are out of synch (upper panel), listeners report hearing two regular streams simultaneously. (<bold>B</bold>) The jittered informational masking paradigm introduced by <xref ref-type="bibr" rid="bib6">Gutschalk et al. (2008)</xref>: although the blue target tones are easy to discriminate visually from the multi-tone background, listeners do not always hear them. (<bold>C</bold>) The stochastic figure-ground stimuli introduced by <xref ref-type="bibr" rid="bib11">Teki et al. (2011)</xref> (blue bars) contain elements of different frequencies, making them more like the sounds we encounter in everyday life than <bold>A</bold> and <bold>B</bold>.</p></caption><graphic xlink:href="elife01136f001"/></fig></p><p>The first models of our ability to segregate sound sources were based on data from behavioural, neurophysiological and imaging experiments in which subjects listened to various acoustic stimuli similar to those in <xref ref-type="fig" rid="fig1">Figure 1A</xref> and were asked to report whether they heard one or two streams of sound. The results of many such experiments are consistent with a model of auditory scene analysis in which the perception of a stream of sound is associated with the activity of a particular population of neurons, which can be readily distinguished from the activity of other populations (for a review see <xref ref-type="bibr" rid="bib7">Micheyl et al., 2007</xref>). However, recent work has shown that sounds that clearly activate distinct neuronal populations can, when synchronous, result in the percept of a single stream (<xref ref-type="bibr" rid="bib5">Elhilali et al., 2009</xref>). This led to the proposal that, subsequent to the auditory input being broken down into features such as pitch or spatial location, the sound from a single source is bound back together by temporal coherence between the neuronal populations representing its constituent features (<xref ref-type="bibr" rid="bib5">Elhilali et al., 2009</xref>; <xref ref-type="bibr" rid="bib9">Shamma et al., 2011</xref>).</p><p>Teki, Chait and co-workers—who are based at University College London, Newcastle University and the University of Maryland—extend previous work by devising a new ‘stochastic figure-ground’ stimulus (<xref ref-type="fig" rid="fig1">Figure 1C</xref>) that <italic>requires</italic> listeners to integrate information across time and frequency in order to perceive the blue ‘figure’ as separate from the background. They find that human listeners are quite sensitive to such figures. Furthermore, using computational modelling, they demonstrate that temporal coherence can at least qualitatively account for the results of behavioural experiments—which models based purely on the activation of separate populations struggle to explain. Because competing streams of speech also overlap in time and frequency, the data obtained with these stimuli further suggest that the brain could use this approach to solve the cocktail-party problem.</p><p>Although the current work is a substantial advance, and indicates that the human auditory system likely performs temporal coherence analysis, several questions remain unanswered. We know little about how or where this analysis might be performed in the brain, or how the results of such an analysis might be utilized by other brain regions. An earlier fMRI study revealed that activity in a region of the brain called the intraparietal sulcus increased when these new stimuli were perceived (<xref ref-type="bibr" rid="bib11">Teki et al., 2011</xref>). They therefore propose that the intraparietal sulcus either carries out temporal coherence computations or represents their output. This leaves open the possibility that these stimuli, and auditory streams generally, are segregated at a relatively early stage of processing, perhaps in auditory cortex. This would be consistent with recent research using other types of stimuli (<xref ref-type="fig" rid="fig1">Figure 1B</xref>) (<xref ref-type="bibr" rid="bib6">Gutschalk et al., 2008</xref>; <xref ref-type="bibr" rid="bib4">Dykstra, 2011</xref>) as well as updated versions of the temporal-coherence model (<xref ref-type="bibr" rid="bib8">Shamma et al., 2013</xref>).</p><p>Moreover, there are several phenomena that indicate that mechanisms other than, or in addition to, temporal coherence are required to fully explain how we perceptually organize sound. Bistable perception—whereby identical stimuli can give rise to two or more distinct percepts—is a particularly relevant example. On its own, temporal coherence cannot account for the fact that the same stimulus in the classical streaming paradigm (<xref ref-type="fig" rid="fig1">Figure 1A</xref>) can be heard as either one or two streams, or that the targets in an informational-masking stimulus (<xref ref-type="fig" rid="fig1">Figure 1B</xref>) are only sometimes perceived. The complex relationship between these sounds and the percepts they generate likely depends on additional mechanisms, acting both before and after the brain computes temporal coherence. However, this model provides a new framework within which to examine such questions, and should spark exciting new avenues of research in auditory scene analysis.</p></body><back><fn-group content-type="competing-interest"><fn fn-type="conflict" id="conf1"><label>Competing interests:</label><p>The authors declare that no competing interests exist.</p></fn></fn-group><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bregman</surname><given-names>AS</given-names></name></person-group><year>1990</year><source>Auditory scene analysis: the perceptual organization of sound</source><publisher-loc>Cambridge</publisher-loc><publisher-name>MIT Press</publisher-name></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Broadbent</surname><given-names>D</given-names></name></person-group><year>1954</year><article-title>The role of auditory localization in attention and memory span</article-title><source>J Exp Psychol</source><volume>47</volume><fpage>191</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1037/h0054182</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cherry</surname><given-names>EC</given-names></name></person-group><year>1953</year><article-title>Some experiments on the recognition of speech, with one and with two ears</article-title><source>J Acoust Soc Am</source><volume>25</volume><fpage>975</fpage><pub-id pub-id-type="doi">10.1121/1.1907229</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Dykstra</surname><given-names>AR</given-names></name></person-group><year>2011</year><source>Neural correlates of auditory perceptual organization measured with direct cortical recordings in humans [Thesis]</source><publisher-name>Massachusetts Institute of Technology</publisher-name><ext-link ext-link-type="uri" xlink:href="http://dspace.mit.edu/handle/1721.1/68451">http://dspace.mit.edu/handle/1721.1/68451</ext-link><fpage>148</fpage><lpage>69</lpage></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elhilali</surname><given-names>M</given-names></name><name><surname>Ma</surname><given-names>L</given-names></name><name><surname>Micheyl</surname><given-names>C</given-names></name><name><surname>Oxenham</surname><given-names>AJ</given-names></name><name><surname>Shamma</surname><given-names>SA</given-names></name></person-group><year>2009</year><article-title>Temporal coherence in the perceptual organization and cortical representation of auditory scenes</article-title><source>Neuron</source><volume>61</volume><fpage>317</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1016/j.neuron.2008.12.005</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gutschalk</surname><given-names>A</given-names></name><name><surname>Micheyl</surname><given-names>C</given-names></name><name><surname>Oxenham</surname><given-names>AJ</given-names></name></person-group><year>2008</year><article-title>Neural correlates of auditory perceptual awareness under informational masking</article-title><source>PLOS Biol</source><volume>6</volume><fpage>e138</fpage><pub-id pub-id-type="doi">10.1371/journal.pbio.0060138</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Micheyl</surname><given-names>C</given-names></name><name><surname>Carlyon</surname><given-names>RP</given-names></name><name><surname>Gutschalk</surname><given-names>A</given-names></name><name><surname>Melcher</surname><given-names>JR</given-names></name><name><surname>Oxenham</surname><given-names>AJ</given-names></name><name><surname>Rauschecker</surname><given-names>JP</given-names></name><etal/></person-group><year>2007</year><article-title>The role of auditory cortex in the formation of auditory streams</article-title><source>Hear Res</source><volume>229</volume><fpage>116</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1016/j.heares.2007.01.007</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Shamma</surname><given-names>S</given-names></name><name><surname>Elhilali</surname><given-names>M</given-names></name><name><surname>Ma</surname><given-names>L</given-names></name><name><surname>Micheyl</surname><given-names>C</given-names></name><name><surname>Oxenham</surname><given-names>AJ</given-names></name><name><surname>Pressnitzer</surname><given-names>D</given-names></name><etal/></person-group><year>2013</year><article-title>Temporal coherence and the streaming of complex sounds</article-title><person-group person-group-type="editor"><name><surname>Moore</surname><given-names>BCJ</given-names></name><name><surname>Carlyon</surname><given-names>RP</given-names></name><name><surname>Patterson</surname><given-names>RD</given-names></name><name><surname>Gockel</surname><given-names>HE</given-names></name><name><surname>Winter</surname><given-names>IM</given-names></name></person-group><source>Basic aspects of hearing: physiology and perception</source><publisher-loc>New York</publisher-loc><publisher-name>Springer</publisher-name><fpage>535</fpage><lpage>44</lpage></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shamma</surname><given-names>SA</given-names></name><name><surname>Elhilali</surname><given-names>M</given-names></name><name><surname>Micheyl</surname><given-names>C</given-names></name></person-group><year>2011</year><article-title>Temporal coherence and attention in auditory scene analysis</article-title><source>Trends Neurosci</source><volume>34</volume><fpage>114</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1016/j.tins.2010.11.002</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teki</surname><given-names>S</given-names></name><name><surname>Chait</surname><given-names>M</given-names></name><name><surname>Kumar</surname><given-names>S</given-names></name><name><surname>Shamma</surname><given-names>S</given-names></name><name><surname>Griffiths</surname><given-names>TD</given-names></name></person-group><year>2013</year><article-title>Segregation of complex acoustic scenes based on temporal coherence</article-title><source>eLife</source><volume>2</volume><fpage>e00699</fpage><pub-id pub-id-type="doi">10.7554/eLife.00699</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teki</surname><given-names>S</given-names></name><name><surname>Chait</surname><given-names>M</given-names></name><name><surname>Kumar</surname><given-names>S</given-names></name><name><surname>von Kriegstein</surname><given-names>K</given-names></name><name><surname>Griffiths</surname><given-names>TD</given-names></name></person-group><year>2011</year><article-title>Brain bases for auditory stimulus-driven figure-ground segregation</article-title><source>J Neurosci</source><volume>31</volume><fpage>164</fpage><lpage>71</lpage><pub-id pub-id-type="doi">10.1523/JNEUROSCI.3788-10.2011</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Van Noorden</surname><given-names>L</given-names></name></person-group><year>1975</year><source>Temporal coherence in the perception of tone sequences [Thesis]</source><publisher-loc>Eindhoven, The Netherlands</publisher-loc><publisher-name>Technical University Eindhoven</publisher-name></element-citation></ref></ref-list></back></article>