Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
2 lines (1 sloc) 195 KB
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article article-type="research-article" dtd-version="1.1d1" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">00522</article-id><article-id pub-id-type="doi">10.7554/eLife.00522</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Developmental biology and stem cells</subject></subj-group><subj-group subj-group-type="heading"><subject>Genomics and evolutionary biology</subject></subj-group></article-categories><title-group><article-title>Cellular resolution models for <italic>even skipped</italic> regulation in the entire <italic>Drosophila</italic> embryo</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" id="author-3730"><name><surname>Ilsley</surname><given-names>Garth R</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-3"/><xref ref-type="other" rid="par-6"/><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-3731"><name><surname>Fisher</surname><given-names>Jasmin</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="aff" rid="aff4"/><xref ref-type="other" rid="par-5"/><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-3733"><name><surname>Apweiler</surname><given-names>Rolf</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-3732"><name><surname>DePace</surname><given-names>Angela H</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-1392"><name><surname>Luscombe</surname><given-names>Nicholas M</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff6"/><xref ref-type="aff" rid="aff7"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-3"/><xref ref-type="other" rid="par-7"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/></contrib><aff id="aff1"><institution>European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus</institution>, <addr-line><named-content content-type="city">Cambridge</named-content></addr-line>, <country>United Kingdom</country></aff><aff id="aff2"><institution>Okinawa Institute of Science and Technology Graduate University</institution>, <addr-line><named-content content-type="city">Okinawa</named-content></addr-line>, <country>Japan</country></aff><aff id="aff3"><institution>Microsoft Research Cambridge</institution>, <addr-line><named-content content-type="city">Cambridge</named-content></addr-line>, <country>United Kingdom</country></aff><aff id="aff4"><institution content-type="dept">Department of Biochemistry</institution>, <institution>University of Cambridge</institution>, <addr-line><named-content content-type="city">Cambridge</named-content></addr-line>, <country>United Kingdom</country></aff><aff id="aff5"><institution content-type="dept">Department of Systems Biology</institution>, <institution>Harvard Medical School</institution>, <addr-line><named-content content-type="city">Boston</named-content></addr-line>, <country>United States</country></aff><aff id="aff6"><institution content-type="dept">UCL Genetics Institute, Department of Genetics, Evolution, and Environment</institution>, <institution>University College London</institution>, <addr-line><named-content content-type="city">London</named-content></addr-line>, <country>United Kingdom</country></aff><aff id="aff7"><institution>London Research Institute, Cancer Research UK</institution>, <addr-line><named-content content-type="city">London</named-content></addr-line>, <country>United Kingdom</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Guigo</surname><given-names>Roderic</given-names></name><role>Reviewing editor</role><aff><institution>Center for Genomic Regulation</institution>, <country>Spain</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>garth.ilsley@oist.jp</email></corresp><fn fn-type="con" id="equal-contrib"><label>†</label><p>These authors contributed equally to this work</p></fn></author-notes><pub-date date-type="pub" publication-format="electronic"><day>06</day><month>08</month><year>2013</year></pub-date><pub-date pub-type="collection"><year>2013</year></pub-date><volume>2</volume><elocation-id>e00522</elocation-id><history><date date-type="received"><day>07</day><month>01</month><year>2013</year></date><date date-type="accepted"><day>17</day><month>06</month><year>2013</year></date></history><permissions><copyright-statement>© 2013, Ilsley et al</copyright-statement><copyright-year>2013</copyright-year><copyright-holder>Ilsley et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/3.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife00522.pdf"/><related-article ext-link-type="doi" id="ra1" related-article-type="commentary" xlink:href="10.7554/eLife.01135"/><abstract><object-id pub-id-type="doi">10.7554/eLife.00522.001</object-id><p>Transcriptional control ensures genes are expressed in the right amounts at the correct times and locations. Understanding quantitatively how regulatory systems convert input signals to appropriate outputs remains a challenge. For the first time, we successfully model <italic>even skipped</italic> (<italic>eve</italic>) stripes 2 and 3+7 across the entire fly embryo at cellular resolution. A straightforward statistical relationship explains how transcription factor (TF) concentrations define <italic>eve</italic>’s complex spatial expression, without the need for pairwise interactions or cross-regulatory dynamics. Simulating thousands of TF combinations, we recover known regulators and suggest new candidates. Finally, we accurately predict the intricate effects of perturbations including TF mutations and misexpression. Our approach imposes minimal assumptions about regulatory function; instead we infer underlying mechanisms from models that best fit the data, like the lack of TF-specific thresholds and the positional value of homotypic interactions. Our study provides a general and quantitative method for elucidating the regulation of diverse biological systems.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.001">http://dx.doi.org/10.7554/eLife.00522.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.00522.002</object-id><title>eLife digest</title><p>The transcription of genes into messenger RNA (mRNA) molecules is one of the most important processes in biology, but our present understanding of this process is largely qualitative. Molecules such as transcription factors and regions of DNA other than the region that codes for the mRNA are known to interact with each other to influence the onset of transcription, and also the rate at which it occurs. However, given the cellular concentrations of transcription factors in a developing organism, it is not known if it is possible to accurately predict their effects on transcription. Being able to make such predictions would greatly improve our understanding of how transcription and the development of an organism are controlled.</p><p>Ilsley et al. have tackled this problem by analysing a large volume of data called the Virtual Embryo dataset: produced by the Berkeley <italic>Drosophila</italic> Transcription Network Project, this dataset includes the results of mRNA expression measurements on 95 different genes at six different times during the early development of <italic>Drosophila melanogaster</italic>, a species of fruit fly. In particular, Ilsley et al. focussed on the expression at one point in time of the <italic>even skipped (eve)</italic> gene, a widely studied gene that is important for embryo development in these fruit flies. The <italic>eve</italic> gene is one of the genes responsible for dividing the fly into segments which form part of its body plan.</p><p>Without making any assumptions about the biological mechanisms that might be involved, Ilsley et al. built a statistical model that was able to predict the pattern of gene expression for a fruit fly, given the concentrations of the relevant transcription factors in the various cells within the embryo as input. The model was also able to predict the patterns of gene expression observed in other experiments involving mutations and the misexpression of fruit fly genes. Moreover, Ilsley et al. have made various predictions involving the genes Bicoid and Hunchback that can be tested experimentally in future studies.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.002">http://dx.doi.org/10.7554/eLife.00522.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>transcriptional regulation</kwd><kwd>logistic regression</kwd><kwd>fly embryo</kwd><kwd>developmental patterning</kwd><kwd>positional information</kwd><kwd>even skipped</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd><italic>D. melanogaster</italic></kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution>EMBL</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Ilsley</surname><given-names>Garth R</given-names></name><name><surname>Apweiler</surname><given-names>Rolf</given-names></name><name><surname>Luscombe</surname><given-names>Nicholas M</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution>Cancer Research UK</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Luscombe</surname><given-names>Nicholas M</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution>Okinawa Institute of Science and Technology</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Ilsley</surname><given-names>Garth R</given-names></name><name><surname>Luscombe</surname><given-names>Nicholas M</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution>National Institutes of Health</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Apweiler</surname><given-names>Rolf</given-names></name><name><surname>DePace</surname><given-names>Angela H</given-names></name></principal-award-recipient></award-group><award-group id="par-5"><funding-source><institution-wrap><institution>Microsoft Research</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Fisher</surname><given-names>Jasmin</given-names></name></principal-award-recipient></award-group><award-group id="par-6"><funding-source><institution-wrap><institution>Peterhouse, Cambridge</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Ilsley</surname><given-names>Garth R</given-names></name></principal-award-recipient></award-group><award-group id="par-7"><funding-source><institution-wrap><institution>University College London</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Luscombe</surname><given-names>Nicholas M</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>A statistical model is able to predict patterns of gene expression output in <italic>Drosophila</italic> embryos using only the cellular concentrations of transcription factors as input.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>A detailed knowledge of transcriptional control will have profound consequences for our understanding of myriad biological processes, including development, homeostasis, and evolution of new phenotypes. To this end, through a combination of genomic, genetic, and molecular experiments, the field continues to accumulate considerable information documenting components of regulatory systems and regulator-target interactions (<xref ref-type="bibr" rid="bib19">Gerstein et al., 2010</xref>; <xref ref-type="bibr" rid="bib62">The modENCODE Consortium, 2010</xref>; <xref ref-type="bibr" rid="bib15">The ENCODE Project Consortium, 2012</xref>). At present however, many of these descriptions are qualitative. A major goal going forward is to interpret these data in a quantitative manner (<xref ref-type="bibr" rid="bib80">Wilczynski and Furlong, 2010</xref>): how do regulators and regulatory interactions convert input signals to the appropriate output expression pattern? In general, answering these questions remains a significant challenge. The experiments needed to probe regulatory functions in detail are technically demanding; moreover, many systems involve multiple layers of control that cannot be investigated within a single experimental set-up. Theoretical models can help advance experimental investigations by providing a framework for deriving general principles and developing testable hypotheses (<xref ref-type="bibr" rid="bib60">Reeves et al., 2006</xref>; <xref ref-type="bibr" rid="bib77">Tomlin and Axelrod, 2007</xref>; <xref ref-type="bibr" rid="bib44">Lewis, 2008</xref>; <xref ref-type="bibr" rid="bib52">Oates et al., 2009</xref>; <xref ref-type="bibr" rid="bib13">Davidson, 2010</xref>). An effective model should be able to define and predict expression accurately by describing how and by how much regulators influence target gene expression (<xref ref-type="bibr" rid="bib23">Hasty et al., 2001</xref>; <xref ref-type="bibr" rid="bib67">Segal and Widom, 2009</xref>).</p><p>Transcription in animals is controlled by interaction among transcription factors (TFs), enhancers, core promoters, silencers, insulators, and chromatin structure (<xref ref-type="bibr" rid="bib40">Lemon and Tjian, 2000</xref>; <xref ref-type="bibr" rid="bib3">Arnosti, 2003</xref>; <xref ref-type="bibr" rid="bib43">Levine, 2010</xref>; <xref ref-type="bibr" rid="bib53">Ohler and Wassarman, 2010</xref>; <xref ref-type="bibr" rid="bib14">Dean, 2011</xref>). It is thought that core promoter elements and chromatin structure provide general competence for transcription at transcription start sites (<xref ref-type="bibr" rid="bib41">Lenhard et al., 2012</xref>), whereas more distant enhancers up-regulate expression of genes under specific conditions (<xref ref-type="bibr" rid="bib10">Bulger and Groudine, 2011</xref>; <xref ref-type="bibr" rid="bib54">Ong and Corces, 2011</xref>). A single gene can be regulated by multiple enhancers, each directing a portion of the overall gene expression pattern in space and time. Enhancers operate by binding TFs, which in turn recruit regulatory co-factors and/or interact directly with the core promoter where RNA polymerase acts (<xref ref-type="bibr" rid="bib73">Spitz and Furlong, 2012</xref>). A comprehensive model of transcriptional regulation would therefore include many factors, such as regulatory DNA sequence, DNA conformation, TF concentrations and nucleosome position among others (<xref ref-type="bibr" rid="bib67">Segal and Widom, 2009</xref>). However, many of the parameters in such a model are currently impossible to measure. In the absence of such measurements, a partial yet predictive model based on available data is still valuable.</p><p>Here, we propose models of transcriptional control that are highly predictive of target gene expression given only TF concentrations at cellular resolution. Our goal is to make few assumptions about the underlying molecular mechanism. Instead, by generating models that predict experimental measurements as accurately as possible, we infer probable biological mechanisms and insights suggested by the parameters of the models. To achieve this, we focus on modeling the functional link between TF inputs and the resulting output (i.e., the ‘regulatory input function‘). These models are specific to individual enhancers: they capture how genomic loci interpret TF concentrations to control the output expression level of their target genes. Though multiple previous modeling studies have explicitly included protein–DNA interactions (e.g., in <italic>Drosophila</italic>, see <xref ref-type="bibr" rid="bib24">He et al., 2010</xref>; <xref ref-type="bibr" rid="bib32">Janssens et al., 2006</xref>; <xref ref-type="bibr" rid="bib33">Junion et al., 2012</xref>; <xref ref-type="bibr" rid="bib35">Kazemian et al., 2010</xref>; <xref ref-type="bibr" rid="bib66">Segal et al., 2008</xref>; <xref ref-type="bibr" rid="bib86">Zinzen et al., 2009</xref>), here, we choose to model the relationship between inputs and outputs directly as this offers several advantages. First and most importantly, this type of model encapsulates numerous relevant levels of biophysical interactions (i.e., TF-DNA, TF-TF, enhancer-promoter etc). Second, it enables us evaluate the utility of higher-order interactions between TFs, propose potential regulators and consider alternative hypotheses of experimental results. Third, in the context of developmental biology, it allows us to explore the minimal information required to define positional information in the early embryo. Finally, focusing on input and output measurements means that the approach is applicable to relatively uncharacterized systems, for instance where enhancer regions have not yet been identified, or in assessing the conservation of regulatory input functions between species (<xref ref-type="bibr" rid="bib84">Wunderlich et al., 2012</xref>).</p><p>We develop and test our models in the context of the well-studied <italic>even skipped</italic> (<italic>eve</italic>) enhancers in order to demonstrate their accuracy and utility. <italic>eve</italic> is expressed in a symmetrical pattern of seven stripes that subdivide the embryo along the anteroposterior axis (<xref ref-type="bibr" rid="bib51">Nüsslein-Volhard and Wieschaus, 1980</xref>). Each stripe is only a few nuclei wide and any regulatory input function of an enhancer must define at least two borders at a high level of precision. A number of well-characterized enhancers direct expression of the seven <italic>eve</italic> stripes individually or in pairs (<xref ref-type="bibr" rid="bib20">Goto et al., 1989</xref>; <xref ref-type="bibr" rid="bib21">Harding et al., 1989</xref>; <xref ref-type="bibr" rid="bib18">Fujioka et al., 1999</xref>). Here, we focus on the enhancers <italic>eve 2</italic> and <italic>eve 3+7</italic>, which have been shown to control stripe 2 and stripes 3 and 7 respectively (<xref ref-type="bibr" rid="bib20">Goto et al., 1989</xref>; <xref ref-type="bibr" rid="bib21">Harding et al., 1989</xref>; <xref ref-type="bibr" rid="bib74">Stanojevic et al., 1991</xref>; <xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>, <xref ref-type="bibr" rid="bib71">1996</xref>). Many of the input TFs and their roles in regulating <italic>eve</italic> expression have been defined; however, there remain unexplained properties underlying their regulation. An advantage of modeling <italic>eve</italic> is that we can use the available information as independent validations of our ability to recover known regulators and predict the outcome of regulatory perturbations, while also producing new insights. It is notable that though there has been some success in simulating the simpler gap gene expression pattern and in predicting <italic>eve</italic> expression on a portion of the anteroposterior axis, modeling pair-rule expression accurately across the whole embryo has remained a significant challenge (<xref ref-type="bibr" rid="bib29">Jaeger et al., 2004</xref>; <xref ref-type="bibr" rid="bib32">Janssens et al., 2006</xref>; <xref ref-type="bibr" rid="bib56">Papatsenko and Levine, 2008</xref>; <xref ref-type="bibr" rid="bib66">Segal et al., 2008</xref>; <xref ref-type="bibr" rid="bib35">Kazemian et al., 2010</xref>; <xref ref-type="bibr" rid="bib39">Kim et al., 2013</xref>).</p><p>To fit regulatory input functions, we require accurate measurements of expression levels for both the regulating TFs and <italic>eve</italic>. The Virtual Embryo from the Berkeley <italic>Drosophila</italic> Transcription Network Project provides the best available data for this purpose (<xref ref-type="bibr" rid="bib16">Fowlkes et al., 2008</xref>). It is a cellular resolution, spatiotemporal atlas of gene expression and morphology for a whole <italic>Drosophila melanogaster</italic> blastoderm embryo. The dataset contains the three-dimensional coordinates for 6078 nuclei, along with mRNA expression measurements of 95 different genes at six time points during the 50 min leading to gastrulation: these genes include critical TFs that direct patterning in the early <italic>Drosophila</italic> embryo.</p><p>Using our modeling framework, we (i) predict expression patterns with accuracy and explanatory power at cellular resolution across the whole embryo; (ii) recover previously described regulatory relationships and test whether they provide sufficient positional information to define the resulting expression pattern; (iii) propose potential new regulatory relationships by comparing alternative models; and (iv) predict expression patterns under perturbation of input TFs, capturing the outcome of knockdown and misexpression experiments. Given the high level of accuracy of our models, we conclude with observations regarding mechanism and principles of enhancer function.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>Approach of this study</title><p>Our strategy is to find the logistic regression coefficients that most accurately describe the relationship between measured regulator concentrations and specific stripes of <italic>eve</italic> expression (<xref ref-type="fig" rid="fig1">Figure 1</xref>). We first train our models using the known regulators described in the literature to evaluate if they are sufficient for determining <italic>eve</italic> expression. At this stage, we also test the model for consistency across different subsets of the data. Next, we ask generally which regulators are able to specify <italic>eve</italic> expression (regulator discovery) and consider the plausibility of concentration-dependent dual regulation. Finally, we assess whether our models are able to predict beyond the conditions of the training data: specifically, we test whether our models can predict expression under perturbation, such as mutation of TFs and their cognate enhancer binding sites, by comparing our predictions with independently published experimental results.<fig-group><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.003</object-id><label>Figure 1.</label><caption><title>Schematic representation of method used to model <italic>eve</italic> expression.</title><p>(<bold>A</bold>) Logistic regression is used to calculate the probability <italic>p</italic><sub><italic>i</italic></sub> that <italic>eve</italic> is ON in a given nucleus <italic>i</italic>, given TF concentrations. A logistic model linearly combines the values of independent variables (in this case, the concentrations, <italic>x</italic><sub><italic>ki</italic></sub>, of regulators 1 to <italic>k</italic>) to produce a prediction; the predictor, <italic>η</italic><sub><italic>i</italic></sub>, is then transformed by the logistic function to give the probability, <italic>p</italic><sub><italic>i</italic></sub>, of <italic>eve</italic> being ON. The weight parameters <italic>β</italic><sub><italic>k</italic></sub> are optimized to provide the best fit with the training data: positive weights indicate activators and negative weights indicate repressors. (<bold>B</bold>) Schematic representation of the data preparation, model training and prediction steps using <italic>eve</italic> stripe 2 as an example. The plots represent how logistic regression operates; the lateral perspectives of the embryo show the Virtual Embryo and processed expression data for <italic>eve</italic> and four regulators (Bcd, Hb, Kr, and Gt). In Step 1, <italic>eve</italic>’s expression is discretized whereas TF concentrations are retained as continuous values. Each nucleus corresponds to a data point. In Step 2, the logistic model is trained to classify whether <italic>eve</italic> is ON or OFF using all nuclei in stripe 2, and all OFF nuclei in the embryo. In Step 3, the trained model is used to predict <italic>eve</italic> expression in every nucleus of the entire embryo using the concentrations of the relevant regulators within them (shown in green for activators, and purple for repressors). In Step 4, the effects of perturbations are predicted by adjusting the concentration of the regulator under consideration (in this case, Hb), but without changing any model parameters.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.003">http://dx.doi.org/10.7554/eLife.00522.003</ext-link></p></caption><graphic xlink:href="elife00522f001"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.004</object-id><label>Figure 1—figure supplement 1.</label><caption><title>Expression of <italic>eve</italic> across the anteroposterior axis.</title><p>Each strip shows the expression of <italic>eve</italic> in a narrow lateral band (10 μm either side of the lateral midline) along the anteroposterior (A-P) axis for each time point (1–6) in the Virtual Embryo. The interval between the time points is approximately 10 min. The horizontal line is the threshold of 0.2 used in training the models. The rug plot indicates which nuclei are considered within the stripes according to this threshold.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.004">http://dx.doi.org/10.7554/eLife.00522.004</ext-link></p></caption><graphic xlink:href="elife00522fs001"/></fig></fig-group></p></sec><sec id="s2-2"><title>A classification approach for modeling <italic>eve</italic> expression</title><sec id="s2-2-1"><title>Preparing the data</title><p>We trained our models on a single time point: this has the advantage of eliminating uncertainties regarding nuclei assignments across time points in the Virtual Embryo dataset, yet still provides sufficient data for fitting. All models were trained using the third time point corresponding to ∼30 min before gastrulation, when according to the Virtual Embryo data, the borders are sharpening and the stripes are not moving dramatically (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1</xref>). This gave us confidence that <italic>eve</italic> is transcriptionally active in the relevant nuclei. At this time point, <italic>eve</italic>’s expression changes from high to low over only a few nuclei across all seven stripes along the anteroposterior axis. Thus we categorized each nucleus as ON or OFF depending on whether <italic>eve</italic> is above or below a value of 0.2 since this defines the stripe borders reasonably (<xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1</xref>). The Virtual Embryo dataset contains expression measurements normalized to a range of 0 to just over 1 across the entire embryo and time points; thus 0.2 corresponds to ∼20% of the maximal expression.</p><p>We made use of mRNA measurements for 34 regulatory genes and protein measurements for an additional four genes (<italic>bicoid</italic>, <italic>hunchback</italic>, <italic>Kruppel</italic> and <italic>giant</italic>). Since our model does not require absolute concentration measurements, mRNA expression is a reasonable proxy for protein assuming that the spatial distribution of the two is similar. We distinguish between protein and mRNA measurements by indicating the regulator name in italics for mRNA (e.g., <italic>gt</italic>) or normal case for protein (e.g., Gt). In contrast to <italic>eve</italic>, the expression profiles of these regulators were retained as continuous measurements because many of them are expressed in a graded fashion. Four pair-rule genes (<italic>fushi tarazu</italic>, <italic>odd skipped</italic>, <italic>hairy</italic> and <italic>paired</italic>) that have similar stripe patterns to <italic>eve</italic> were excluded from the data set; although some of them might help modulate the expression of <italic>eve</italic>, they were removed so that we could assess whether <italic>eve</italic>’s complex spatial pattern could be derived directly from simpler patterns of the regulators upstream of the pair-rule genes. Moreover, <italic>eve</italic> expression looks qualitatively normal in these TF mutants (<xref ref-type="bibr" rid="bib63">Schroeder et al., 2011</xref>).</p></sec><sec id="s2-2-2"><title>Modeling <italic>eve</italic> expression using logistic regression</title><p>We selected logistic regression for modeling <italic>eve</italic> expression because it provides a framework for linking continuous input variables (i.e., the regulator concentrations) to a binary output (i.e., <italic>eve</italic>’s expression state). Like linear regression, a logistic model linearly combines the values of the independent variables to produce a prediction; but the linear predictor is further transformed by the logistic function to give the probability, <italic>p</italic>, of <italic>eve</italic> being ON (<xref ref-type="fig" rid="fig1">Figure 1</xref> for a schematic of ‘Methods’). As in any regression model, the weight parameters are optimized so that the output shows the greatest agreement with the training data. The weight assigned to each TF indicates its regulatory role, with positive weights indicating activators and negative weights indicating repressors. Importantly, since each regulator in the linear combination has independent weight parameters, the model needs only relative concentration measurements. Models were trained for classification using the nuclei of the stripe(s) under consideration as well as all OFF nuclei in the embryo. It is important to note that although this can be viewed as a training step, the ability of the model to classify at this stage is of direct interest to us: do the regulators contain sufficient positional information to explain <italic>eve</italic> expression in the given stripe(s)? We then use the model to predict <italic>eve</italic> expression in every nucleus across the entire embryo using the concentrations of the relevant regulators within them. This step reveals the model’s applicability across the whole embryo, rather than just for the nuclei that were used for training.</p></sec></sec><sec id="s2-3"><title>Linear logistic modeling accurately recapitulates <italic>eve 2</italic> expression</title><p>First we focused on the expression of the second stripe of <italic>eve</italic>, as it is directed by a very well-characterized enhancer, <italic>eve 2</italic>. Through detailed molecular analysis, it is known that <italic>eve</italic> stripe 2 is controlled by the gap genes (<xref ref-type="bibr" rid="bib17">Frasch and Levine, 1987</xref>; <xref ref-type="bibr" rid="bib74">Stanojevic et al., 1991</xref>; <xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>), a class of TFs that are present in broad regions of the early embryo. In the generally accepted minimal mechanism, two activators Hunchback (Hb) and Bicoid (Bcd) enable broad permissive <italic>eve</italic> expression in the region of stripe 2, while two repressors Giant (Gt) and Kruppel (Kr) define the anterior and posterior borders respectively by suppressing <italic>eve</italic> outside the stripe.</p><sec id="s2-3-1"><title>Modeling with known regulators recapitulates <italic>eve 2</italic> expression</title><p>We trained the logistic model to define the expression of <italic>eve</italic> stripe 2 using a linear combination of the measured concentrations of Hb, Bcd, Gt, and Kr (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). <xref ref-type="fig" rid="fig2">Figure 2B</xref> shows the model’s output for every nucleus plotted from two perspectives according to their coordinates in the Virtual Embryo. Every nucleus is assigned a probability of <italic>eve</italic> expression and the color scale ranges from light (p=0) to dark (p=1); nuclei within the stripes (defined by actual <italic>eve</italic> expression) are shown in grey-scale from white to black, and predictions outside stripes are presented on a red-scale with peach for values near 0. <xref ref-type="fig" rid="fig2">Figure 2C</xref> depicts the probability of <italic>eve</italic> expression being above the threshold in the nuclei of the lateral midline along the anteroposterior axis. It is immediately apparent that the model successfully combines the four known regulators to define precisely the location of <italic>eve</italic> stripe 2. (Bcd’s role as a potential repressor is discussed below).<fig-group><fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.005</object-id><label>Figure 2.</label><caption><title>Logistic models accurately predict <italic>eve 2</italic> expression.</title><p>(<bold>A</bold>) Lateral perspectives of the <italic>Drosophila</italic> embryo depicting the contribution of four regulators (Bcd, Hb, Gt, and Kr) to the model output. Embryos are drawn with the anterior (A) to the left and posterior (P) to the right, along with regulator names and corresponding coefficients in the model. Each nucleus is shaded to indicate the level of contribution by regulators, with darker colors signifying stronger effects (in this case, due to higher regulator concentrations): green represents a positive, activating effect and purple a negative, repressive one. Inputs are continuous, but drawn using a discrete color scale for simplicity. (<bold>B</bold>) Lateral and 3D perspectives of the embryo show the model prediction of <italic>eve</italic> stripe 2 expression. Each nucleus is colored from light to dark for low to high probability of <italic>eve</italic> being ON: within stripes the color scale is from white to black and outside the stripes it is on a red scale, with peach for values below 0.15. (<bold>C</bold>) A ribbon plot showing the probability of <italic>eve</italic> expression (y-axis) for nuclei within 10 μm of the lateral midline along the anteroposterior axis (x-axis). The plot demonstrates that the stripe borders are sharply defined. It also allows easy comparisons with other models that are generally performed in one dimension. (<bold>D</bold>) For regulator discovery, for every possible pair of regulators, we determined the best-scoring model of four regulators containing the pair. The 38 regulators in the dataset are shown on the x- and y-axes of the heat map, and the highest scores for every pair are depicted in the intersecting cell on a color scale from light (minimum score in the heat map) to dark (highest score in the heat map). Regulators making consistently informative contributions to models can be identified by the dark bands running across the heat map. Using linear logistic models, Gt, Hb and Bcd can be clearly seen to be informative regulators (highlighted in black). (<bold>E</bold>–<bold>G</bold>) Prediction made using a quadratic logistic model, in which Bcd is assigned a concentration-dependent dual regulatory activity: it is an activator (green) at low concentrations in the region of stripe 2, and a repressor (purple) at higher concentrations everywhere else. The model outputs a better prediction for stripe 2 expression as shown in <xref ref-type="table" rid="tbl1">Table 1</xref>. Most importantly, it reconciles Bcd’s apparently paradoxical behavior compared with the literature (<xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>; <xref ref-type="bibr" rid="bib1">Andrioli et al., 2002</xref>). (<bold>H</bold>) Regulator discovery using quadratic models identifies Gt, Hb, Bcd, and Kr as informative regulators (highlighted in black).</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.005">http://dx.doi.org/10.7554/eLife.00522.005</ext-link></p></caption><graphic xlink:href="elife00522f002"/></fig><fig id="fig2s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.006</object-id><label>Figure 2—figure supplement 1.</label><caption><title>Training the linear model without the anteriormost region gives Bcd an activating role.</title><p>Regulator inputs and model output are shown as in <xref ref-type="fig" rid="fig2">Figure 2</xref>. (<bold>B</bold>) and (<bold>C</bold>) The stripe is still sharply defined when the anteriormost region is excluded. (<bold>A</bold>) Bcd is activating in stripe 2, but more strongly in the anterior.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.006">http://dx.doi.org/10.7554/eLife.00522.006</ext-link></p></caption><graphic xlink:href="elife00522fs002"/></fig><fig id="fig2s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.007</object-id><label>Figure 2—figure supplement 2.</label><caption><title>Consistency of the <italic>eve 2</italic> linear and quadratic models (DV, AP and cross-validation).</title><p>The linear and quadratic models are trained as in the main text, but with the dataset initially restricted. DV is restricted to 20 μm on either side of the lateral mid-line. AP includes only the stripe and its immediately neighboring nuclei. Cross-validation is the average of 100 predictions each trained on a random subset of 50 nuclei (out of 2936). (Max) makes use of all 38 candidate regulators, but with the same training data as the models described in the main text.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.007">http://dx.doi.org/10.7554/eLife.00522.007</ext-link></p></caption><graphic xlink:href="elife00522fs003"/></fig><fig id="fig2s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.008</object-id><label>Figure 2—figure supplement 3.</label><caption><title>The linear logistic regression model is not unreasonably flexible: a given set of regulators cannot fit any stripe well.</title><p>A logistic model for Hb, Bcd, Kr, and Gt is fit using training data that is selected as described in the main text for each stripe (1–7, right axis). The predictions (left axis) for each nucleus are plotted along the whole anteroposterior axis, but for clarity, only the predictions for the nuclei within 10 μm either side of the lateral midline are shown.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.008">http://dx.doi.org/10.7554/eLife.00522.008</ext-link></p></caption><graphic xlink:href="elife00522fs004"/></fig><fig id="fig2s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.009</object-id><label>Figure 2—figure supplement 4.</label><caption><title>The quadratic logistic regression model is not unreasonably flexible: a given set of regulators cannot fit any stripe well.</title><p>A logistic model for Hb, Bcd, Kr and Gt, including a quadratic term for Bcd, is fit using training data that is selected as described in the main text for each stripe (1–7, right axis). The predictions (left axis) for each nucleus are plotted along the whole anteroposterior axis, but for clarity, only the predictions for the nucleus within 10 μm either side of the lateral midline are shown.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.009">http://dx.doi.org/10.7554/eLife.00522.009</ext-link></p></caption><graphic xlink:href="elife00522fs005"/></fig></fig-group></p><p>To the best of our knowledge, this the first time that <italic>eve 2</italic>’s expression has been predicted so accurately across the entire embryo including the anteriormost region. Most nuclei inside the stripe are correctly classified as having a high probability of being ON, and there is minimal ‘over-spill’ either side of the stripe (<xref ref-type="fig" rid="fig2">Figure 2A–C</xref> and <xref ref-type="table" rid="tbl1">Table 1</xref>). The model defines <italic>eve</italic> expression around the entire circumference of the embryo, following the dorsal-ventral curvature of the stripe: this demonstrates that the four standard regulators of <italic>eve 2</italic> already encode this information, implying that dorsoventral factors are not required to provide this information directly to <italic>eve 2</italic>. Finally, it is notable that the model predicts a small amount of expression near stripe 7; ectopic expression of stripe 7 is sometimes observed in transgenic reporters driven by <italic>eve 2</italic> enhancers (<xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>; <xref ref-type="bibr" rid="bib32">Janssens et al., 2006</xref>; <xref ref-type="bibr" rid="bib22">Hare et al., 2008</xref>).<table-wrap id="tbl1" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.010</object-id><label>Table 1.</label><caption><p>Measurements quantifying the accuracy of <italic>eve 2</italic> predictions</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.010">http://dx.doi.org/10.7554/eLife.00522.010</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Model</th><th>In the stripe (%)</th><th>Immediate neighbors (%)</th><th>2<sup>nd</sup> degree neighbors (%)</th></tr></thead><tbody><tr><td><italic>eve 2</italic> linear logistic</td><td align="char" char=".">86</td><td align="char" char=".">23</td><td align="char" char=".">2</td></tr><tr><td><italic>eve 2</italic> quadratic logistic</td><td align="char" char=".">93</td><td align="char" char=".">21</td><td align="char" char=".">2</td></tr></tbody></table><table-wrap-foot><fn><p>The table shows the percentage of nuclei with predicted <italic>eve</italic> expression (p&gt;0.5). This is a stringent measure of the accuracy of classification, and is particularly useful for assessing the accuracy of the stripe borders. Nuclei with <italic>eve</italic> expression&gt;0.2 are defined as those ‘in the stripe’; neighboring nuclei are outside this thresholded region, either immediately adjacent to it, or two nuclei away. A perfect prediction should identify all stripe 2 nuclei as having a high probability of ON with the probability of being ON dropping off rapidly further from the stripe. Though both linear and quadratic models output excellent predictions, the latter provides a slightly more accurate fit to the data.</p></fn></table-wrap-foot></table-wrap></p><p>It is worth noting here that the model’s performance is most reliably assessed by visually comparing the predicted and actual distributions of <italic>eve</italic> expression as in <xref ref-type="fig" rid="fig2">Figure 2</xref>; this enables one to evaluate thousands of individual predictions, as well as the overall shape of the prediction, which are not easily captured in a single statistical measure. Nonetheless, <xref ref-type="table" rid="tbl1">Table 1</xref> quantifies the accuracy of the model—particularly in defining the borders of the stripe—by calculating the percentage of nuclei with a high fitted probability of <italic>eve</italic> being ON (threshold p&gt;0.5; see ‘Methods’ for description of p). Almost all nuclei within the stripe are correctly identified as ON, and the percentage of nuclei having a high fitted probability quickly drops off further from the stripe.</p></sec><sec id="s2-3-2"><title>The model performs consistently across different subsets of the data</title><p>We were interested in the extent to which our model fit is dependent on the subset of the embryo chosen as training data (<xref ref-type="fig" rid="fig2s2">Figure 2—figure supplement 2</xref>). We found that the model performs well in a cross-validation test in which we averaged 100 predictions with the training data restricted to 50 randomly selected nuclei; that is, less than 2% of the training data. Conversely, we also assessed whether the model is overly flexible in being able to train and predict the expression of any arbitrary stripe in the embryo using the above four regulators. We found this is not the case, suggesting that the positional information provided by these regulators is specific for the <italic>eve 2</italic> enhancer and that our model interprets this information accurately (<xref ref-type="fig" rid="fig2s3">Figure 2—figure supplement 3</xref>).</p></sec></sec><sec id="s2-4"><title>Regulator discovery ascertains the known regulators</title><p>Having successfully applied the model using known regulators, we next developed a method to identify a parsimonious set of regulators from the dataset informative for the target enhancer’s expression. Such techniques are broadly applicable in discovering potential regulators of uncharacterized enhancers, and therefore useful in producing testable hypotheses. We tested a stepwise selection process, but found that it generally includes more regulators than necessary for a good visual fit (e.g., a stepwise selection procedure for <italic>eve 2</italic> with the Bayesian information criterion finds 11 regulators). The stopping point (i.e., the penalty for adding an extra parameter) is effectively arbitrary in this case, or at least difficult to determine a priori in a justifiable manner. Additionally, stepwise selection does not consider all models exhaustively.</p><p>Instead, since we are particularly interested in identifying parsimonious models that can explain <italic>eve</italic> expression, and logistic models are fast to fit, we took the approach of fitting all possible models of four regulators out of the possible 38 in the dataset (73,815 models) and used the log likelihood of each fitted model as its score (or equivalently here, the Akaike information criterion). Gratifyingly, the best-scoring model comprises the known regulators Hb, Bcd, Gt, and Kr.</p><p>To make use of the scores more generally, we developed a method that summarizes the scores for all 73,815 predictions and highlights regulators that work well together (<xref ref-type="fig" rid="fig2">Figure 2D</xref>). For each possible pair of regulators (the fixed pair), we determined the best-scoring model of four regulators containing the pair. The two regulators of the fixed pair are shown on the axes on the heat map, with the highest score for the pair depicted in the intersecting cell on a color scale from light (the minimum score on the heat map) to dark (the maximum score). Dark bands crossing the heat map highlight individual regulators that consistently make informative contributions to stripe 2 expression. Hence, it is clear that Bcd, Hb, and Gt are key regulators. Although Kr is actually in the top-scoring model, the heat map does not show it as consistently informative.</p></sec><sec id="s2-5"><title>A quadratic logistic model suggests a dual regulatory role for Bcd</title><p>The linear model successfully recapitulates stripe 2 expression; however, it identifies Bcd as a repressor, whereas most existing literature defines the TF as an activator. Despite the apparent consensus, Bcd’s function is not straightforward. The need for Bcd-binding sites for successful <italic>eve</italic> expression suggests an activating function (<xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>); but this does not explain why the enhancer is inactive in the anteriormost region of the embryo despite Bcd being present at high concentrations and the known repressors Gt and Kr having low concentrations (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). Our linear models reflect this apparent paradox: Bcd is highlighted as one of the most important TFs during regulator discovery in spite of consistently having a negative coefficient, but a model trained by excluding the anterior region of the embryo assigns Bcd an activating function (<xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref>). These observations strongly suggest that Bcd—as both a repressor and activator—provides useful positional information to <italic>eve</italic>.</p><p>We asked whether these two functions could be reconciled if Bcd’s regulatory effect were dependent on its concentration, either directly, or mediated through other factors or post-translational modifications (<xref ref-type="bibr" rid="bib30">Janody et al., 2000</xref>, <xref ref-type="bibr" rid="bib31">2001</xref>; <xref ref-type="bibr" rid="bib1">Andrioli et al., 2002</xref>). This is readily modeled by adding a single parameter: a quadratic term for Bcd (<xref ref-type="fig" rid="fig2">Figure 2E–G</xref>). The result is clear: the modified model retains a repressive function for Bcd in the anterior of the embryo where it is present in high concentrations, but enables an activating function in the region of stripe 2 where it has lower concentrations (<xref ref-type="fig" rid="fig2">Figure 2E</xref>). The modification doesn’t lead to over-fitting on small training subsets and in fact improves the model’s ability to generalize to the whole embryo from an anteroposteriorly restricted training subset (<xref ref-type="fig" rid="fig2s2 fig2s4">Figure 2—figure supplements 2 and 4</xref>). In addition, regulator discovery now identifies all four TFs as important, with a more consistently informative role for Kr than in the simple linear model (<xref ref-type="fig" rid="fig2">Figure 2H</xref>).</p></sec><sec id="s2-6"><title>Independent experiments validate <italic>eve 2</italic> model predictions</title><p>We next tested whether our model is predictive of experimental perturbations. We considered experiments that test the role of <italic>eve 2</italic> regulators by either knocking down the input TF (<xref ref-type="bibr" rid="bib74">Stanojevic et al., 1991</xref>), or by mutating binding sites for that TF in the <italic>eve 2</italic> enhancer (<xref ref-type="bibr" rid="bib4">Arnosti et al., 1996</xref>). To simulate these perturbations, we set the concentrations of Bcd or Hb to zero without further adjustment of the coefficients. Strictly speaking, this models the direct effect of the perturbation and is akin to the removal of the relevant binding sites from the enhancer.</p><p>The results of these perturbations are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>. Only the quadratic model correctly predicts the expression pattern in a Bcd null mutant (<xref ref-type="fig" rid="fig3">Figure 3C</xref>). In the linear model, Bcd is designated a repressor and so its mutant causes broad <italic>eve</italic> expression in the anterior of the embryo in contrast to the experimental result (<xref ref-type="fig" rid="fig3">Figure 3C</xref>). In the quadratic model the lack of either activator (Bcd or Hb) abolishes the expression of stripe 2 as expected (<xref ref-type="fig" rid="fig3">Figure 3C,D</xref>). In both the linear and quadratic models, the loss of the repressors Gt or Kr causes <italic>eve</italic> expression to extend towards the anterior and posterior of the embryo respectively, in line with their roles in defining the stripe borders (<xref ref-type="fig" rid="fig3">Figure 3A,B</xref>). <fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.011</object-id><label>Figure 3.</label><caption><title>The quadratic model accurately predicts <italic>eve 2</italic> expression under perturbation of input TFs.</title><p>The effects of regulatory perturbations on stripe 2 expression are predicted by altering regulator concentrations but keeping all the model coefficients unchanged; for TF deletion or binding site mutants, this involves setting the relevant regulator’s concentrations to 0. Predictions are made for perturbations using the linear and quadratic models. Comparisons to experiments provide robust, independent validations of model predictions. Loss of (<bold>A</bold>) <italic>gt</italic> or (<bold>B</bold>) <italic>Kr</italic> causes <italic>eve</italic> expression to extend towards the anterior and posterior of the embryo respectively, in excellent agreement with experimental evidence. (<bold>C</bold>) For the <italic>bcd</italic> mutant, the linear model predicts expression at the anterior of the embryo, something that is not observed in experiments. In contrast, the quadratic model does not suffer from this. (<bold>D</bold>) Perturbing <italic>hb</italic> leads to complete loss of <italic>eve</italic> stripe 2 for both models. The better agreement between predictions and experimental evidence suggests that the quadratic is a more plausible model of <italic>eve 2</italic> regulation.</p><p>In situ images in panels 1, 2, and 6 are reproduced from Figure 4B–C and 6C, <xref ref-type="bibr" rid="bib70">Small et al. (1992)</xref>, <italic>The EMBO Journal</italic>; Nature Publishing Group has granted permission to reproduce these images under the terms of the Creative Commons Attribution 3.0 Unported License (<ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">CC BY 3.0</ext-link>).</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.011">http://dx.doi.org/10.7554/eLife.00522.011</ext-link></p></caption><graphic xlink:href="elife00522f003"/><permissions><copyright-statement>© 1991, Cold Spring Harbor Laboratory Press, All Rights Reserved</copyright-statement><copyright-year>1991</copyright-year><copyright-holder>Cold Spring Harbor Laboratory Press</copyright-holder><license><license-p>The in situ image in panel 3 is reprinted with permission from Figure 2D, <xref ref-type="bibr" rid="bib72">Small et al. (1991)</xref>, <italic>Genes &amp; Development</italic>.</license-p></license></permissions><permissions><copyright-statement>© 1991, American Association for the Advancement of Science, All Rights Reserved</copyright-statement><copyright-year>1991</copyright-year><copyright-holder>American Association for the Advancement of Science</copyright-holder><license><license-p>In situ images in panels 4 and 5 are reprinted with permission from Figure 3A and 3C, <xref ref-type="bibr" rid="bib74">Stanojevic et al. (1991)</xref>, <italic>Science</italic>.</license-p></license></permissions><permissions><copyright-statement>© 1996, The Company of Biologists, All Rights Reserved</copyright-statement><copyright-year>1996</copyright-year><copyright-holder>The Company of Biologists</copyright-holder><license><license-p>The in situ image in panel 7 is reproduced with permission from Figure 6B, <xref ref-type="bibr" rid="bib4">Arnosti et al. (1996)</xref>, <italic>Development</italic>.</license-p></license></permissions></fig></p><p>Both models predict the observed response to binding site mutations: the expansions of stripe 2 in the correct directions and extent along the length of the embryo. The models demonstrate that the extent of posterior extension in the Kr mutant is restricted because of decreasing activator concentrations (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). For the anterior extension in the Gt mutant, the restriction requires a repressor since activator concentrations remain high to the end of the embryo (<xref ref-type="fig" rid="fig3">Figure 3A</xref>; <xref ref-type="bibr" rid="bib1">Andrioli et al., 2002</xref>). Bcd can provide this repression in both linear and quadratic models: however, only the quadratic can reconcile this with Bcd’s known activating function. For the linear model to work with a Bcd activator, one would need a fifth regulator as an anterior repressor. Indeed, multiple studies have searched for a repressor in this region, and multiple candidates have been identified though none have been conclusive (<xref ref-type="bibr" rid="bib6">Bellaïche et al., 1996</xref>; <xref ref-type="bibr" rid="bib30">Janody et al., 2000</xref>; <xref ref-type="bibr" rid="bib1">Andrioli et al., 2002</xref>; <xref ref-type="bibr" rid="bib85">Zhao et al., 2002</xref>; <xref ref-type="bibr" rid="bib69">Singh et al., 2005</xref>).</p></sec><sec id="s2-7"><title>Models successfully predict <italic>eve 3+7</italic> expression</title><p><italic>eve</italic> stripes 3 and 7 are regulated together by a single enhancer (<xref ref-type="bibr" rid="bib71">Small et al., 1996</xref>; <xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref>; <xref ref-type="bibr" rid="bib75">Struffi et al., 2011</xref>). Such an arrangement requires appropriate TF concentrations for <italic>eve</italic> activation to be present in nuclei separated by some distance. We tested whether our modeling framework can contend with the challenge of specifying two extra stripe borders using the available regulator concentrations.</p><sec id="s2-7-1"><title>A combination of modeling and regulator discovery suggests two plausible models</title><p>We first fit our models using only the known regulators of <italic>eve</italic> stripes 3 and 7, Hb and Kni. Kni is thought to repress the region between the stripes and Hb is thought to repress in the anterior and posterior regions outside the stripes (<xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref>). The measured concentrations of Hb (protein) and <italic>kni</italic> (mRNA) alone are not sufficient for our models of stripe 3 and 7 expression; in particular, the concentration of Hb is too low to repress expression to the posterior of stripe 7 (<xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref>).</p><p>Using regulator discovery (<xref ref-type="fig" rid="fig4s6 fig4s7">Figure 4—figure supplements 6 and 7</xref>), we identified two alternative models that are able to define stripes 3 and 7 (<xref ref-type="fig" rid="fig4">Figure 4</xref>). The first is a linear logistic model that includes two additional gap genes, Giant (Gt) and <italic>tailless</italic> (<italic>tll</italic>); including both Gt (protein) and <italic>tll</italic> (mRNA) improves predictions over including <italic>tll</italic> alone (<xref ref-type="fig" rid="fig4s2">Figure 4—figure supplement 2</xref>). In this model, all regulators function as repressors: the model has a positive intercept which can represent a ubiquitous activator (<xref ref-type="fig" rid="fig4">Figure 4A</xref>). Our second model is a quadratic logistic regression model that treats Hb as a dual regulator, in a similar manner to Bcd for <italic>eve 2</italic> (<xref ref-type="fig" rid="fig4">Figure 4D</xref>). Concentration-dependent regulation by Hb—as an activator at low concentrations and repressor at higher levels—has been suggested by previous experimental work (<xref ref-type="bibr" rid="bib26">Hülskamp et al., 1990</xref>, <xref ref-type="bibr" rid="bib25">1994</xref>; <xref ref-type="bibr" rid="bib87">Zuo et al., 1991</xref>; <xref ref-type="bibr" rid="bib65">Schulz and Tautz, 1994</xref>); and used to model stripe 3 expression (<xref ref-type="bibr" rid="bib56">Papatsenko and Levine, 2008</xref>) and gap gene regulation (<xref ref-type="bibr" rid="bib8">Bieler et al., 2011</xref>). Using regulator discovery, we again identified <italic>tll</italic> as the top candidate for repressing expression posterior to stripe 7 (<xref ref-type="fig" rid="fig4s7">Figure 4—figure supplement 7</xref>). <italic>tll</italic> has previously been proposed as a regulator of stripe 7, in some cases as an activator (<xref ref-type="bibr" rid="bib71">Small et al., 1996</xref>) and in others as a repressor (<xref ref-type="bibr" rid="bib32">Janssens et al., 2006</xref>; <xref ref-type="bibr" rid="bib49">Morán and Jiménez, 2006</xref>). Both our linear and quadratic models output good predictions of <italic>eve</italic> stripes 3 and 7 (<xref ref-type="fig" rid="fig4">Figure 4B,C,E,F</xref> and <xref ref-type="table" rid="tbl2">Table 2</xref>). As with predictions for <italic>eve 2</italic>, the high probability predictions are within the stripes and the models successfully replicate <italic>eve</italic> expression around the embryo. Further, using these chosen regulators, the models are not able to train and predict the expression of any arbitrary pair of stripes (<xref ref-type="fig" rid="fig4s4 fig4s5">Figure 4—figure supplements 4,5</xref>).<fig-group><fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.012</object-id><label>Figure 4.</label><caption><title>Linear and quadratic logistic models accurately predict <italic>eve 3+7</italic> expression.</title><p>Regulator inputs and model output are shown as in <xref ref-type="fig" rid="fig2">Figure 2</xref>. (<bold>A</bold>–<bold>C</bold>) The linear model including Hb, <italic>kni</italic>, <italic>tll,</italic> and Gt; (<bold>D</bold>–<bold>F</bold>) the quadratic model comprises Hb, <italic>kni,</italic> and <italic>tll</italic>, with a quadratic term for Hb as a concentration-dependent dual regulator. Both models clearly define the two stripes, though the midline ribbon plots show that the quadratic model defines the sharpest borders. The initial predictions therefore suggest that the quadratic model provides the best output.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.012">http://dx.doi.org/10.7554/eLife.00522.012</ext-link></p></caption><graphic xlink:href="elife00522f004"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.013</object-id><label>Figure 4—figure supplement 1.</label><caption><title>Hb and <italic>kni</italic> are not sufficient for a good model fit.</title><p>Regulator inputs and model output are shown as in <xref ref-type="fig" rid="fig2">Figure 2</xref>. (<bold>A–C</bold>) The linear logistic model with only Hb and <italic>kni</italic> does not repress expression to the posterior of stripe 7.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.013">http://dx.doi.org/10.7554/eLife.00522.013</ext-link></p></caption><graphic xlink:href="elife00522fs006"/></fig><fig id="fig4s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.014</object-id><label>Figure 4—figure supplement 2.</label><caption><title>A linear logistic model with Hb, <italic>kni</italic> and <italic>tll</italic> does not have sharp stripe borders.</title><p>Regulator inputs and model output are shown as in <xref ref-type="fig" rid="fig2">Figure 2</xref>. (<bold>A–C</bold>) The linear logistic model with Hb, <italic>kni</italic> and <italic>tll</italic> does not produce as sharp borders as the models in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.014">http://dx.doi.org/10.7554/eLife.00522.014</ext-link></p></caption><graphic xlink:href="elife00522fs007"/></fig><fig id="fig4s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.015</object-id><label>Figure 4—figure supplement 3.</label><caption><title>Consistency of the <italic>eve 3+7</italic> linear and quadratic models.</title><p>(Stripe 3, Stripe 7, DV, AP and Cross-validation) The linear and quadratic models are trained as in the main text, but with the dataset initially restricted. Stripe 3 is trained using stripe 3 and the nuclei that are outside of the stripes (OFF). Stripe 7 is trained using stripe 7 and the nuclei that are outside of the stripes (OFF). DV is restricted to 20 μm on either side of the lateral mid-line. AP includes only the stripes and their immediately neighboring nuclei. Cross-validation is the average of 100 predictions each trained on a random subset of 50 nuclei (out of 3481). (Max) makes use of all 38 candidate regulators, but with the same training data as the models described in the main text.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.015">http://dx.doi.org/10.7554/eLife.00522.015</ext-link></p></caption><graphic xlink:href="elife00522fs008"/></fig><fig id="fig4s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.016</object-id><label>Figure 4—figure supplement 4.</label><caption><title>The linear logistic regression model is not unreasonably flexible: a given set of regulators cannot fit any pair of stripes well.</title><p>A logistic model for Hb, <italic>kni</italic>, <italic>tll,</italic> and Gt is fit using training data that is selected as described in the main text for each pair of stripes excluding stripe 1 (right axis). The predictions (left axis) for each nucleus are plotted along the whole anteroposterior axis, but for clarity, only the predictions for the nuclei within 10 μm either side of the lateral midline are shown.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.016">http://dx.doi.org/10.7554/eLife.00522.016</ext-link></p></caption><graphic xlink:href="elife00522fs009"/></fig><fig id="fig4s5" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.017</object-id><label>Figure 4—figure supplement 5.</label><caption><title>The quadratic logistic regression model is not unreasonably flexible: a given set of regulators cannot fit any pair of stripes well.</title><p>A logistic model for Hb, <italic>kni,</italic> and <italic>tll</italic>, including a quadratic term for Hb, is fit using training data that is selected as described in the main text for each pair of stripes excluding stripe 1 (right axis). The predictions (left axis) for each nucleus are plotted along the whole anteroposterior axis, but for clarity, only the predictions for the nuclei within 10 μm either side of the lateral midline are shown.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.017">http://dx.doi.org/10.7554/eLife.00522.017</ext-link></p></caption><graphic xlink:href="elife00522fs010"/></fig><fig id="fig4s6" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.018</object-id><label>Figure 4—figure supplement 6.</label><caption><title>Regulatory discovery for a linear logistic model of <italic>eve 3+7.</italic></title><p>Regulatory discovery is as shown in <xref ref-type="fig" rid="fig2">Figure 2D</xref>. For every possible pair of regulators, we determined the best-scoring model of four regulators containing the pair. The 38 regulators in the dataset are shown on the x- and y-axes of the heat map, and the highest scores for every pair are depicted in the intersecting cell on a color scale from light (minimum score in the heat map) to dark (highest score in the heat map). Regulators making consistently informative contributions to models can be identified by the dark bands running across the heat map. Here, Gt, <italic>kni,</italic> and <italic>tll</italic> can be seen to be informative regulators.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.018">http://dx.doi.org/10.7554/eLife.00522.018</ext-link></p></caption><graphic xlink:href="elife00522fs011"/></fig><fig id="fig4s7" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.019</object-id><label>Figure 4—figure supplement 7.</label><caption><title>Regulatory discovery for a quadratic logistic model of <italic>eve 3+7.</italic></title><p>Regulatory discovery is as shown in <xref ref-type="fig" rid="fig2">Figure 2H</xref>. For every possible pair of regulators, we determined the best-scoring model of four regulators containing the pair. The 38 regulators in the dataset are shown on the x- and y-axes of the heat map, and the highest scores for every pair are depicted in the intersecting cell on a color scale from light (minimum score in the heat map) to dark (highest score in the heat map). Regulators making consistently informative contributions to models can be identified by the dark bands running across the heat map. Here, Hb, <italic>kni</italic>, Kr, and <italic>tll</italic> can be seen to be informative regulators.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.019">http://dx.doi.org/10.7554/eLife.00522.019</ext-link></p></caption><graphic xlink:href="elife00522fs012"/></fig></fig-group><table-wrap id="tbl2" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.020</object-id><label>Table 2.</label><caption><p>Measurements quantifying the accuracy of <italic>eve 3+7</italic> predictions</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.020">http://dx.doi.org/10.7554/eLife.00522.020</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th>Model</th><th>In the stripe (%)</th><th>Immediate neighbors (%)</th><th>2<sup>nd</sup> degree neighbors (%)</th></tr></thead><tbody><tr><td colspan="4"><italic>eve 3+7</italic> linear logistic</td></tr><tr><td><italic>kni</italic> and Hb</td><td align="char" char=".">0</td><td align="char" char=".">0</td><td align="char" char=".">0</td></tr><tr><td><italic>kni,</italic> Hb, and <italic>tll</italic></td><td align="char" char=".">56</td><td align="char" char=".">39</td><td align="char" char=".">27</td></tr><tr><td><italic>kni,</italic> Hb, <italic>tll</italic>, and <italic>gt</italic></td><td align="char" char=".">76</td><td align="char" char=".">37</td><td align="char" char=".">17</td></tr><tr><td><italic>eve 3+7</italic> quadratic logistic</td><td align="char" char=".">86</td><td align="char" char=".">39</td><td align="char" char=".">9</td></tr></tbody></table><table-wrap-foot><fn><p>The table shows similar measures of accuracy for stripes 3 and 7 as in <xref ref-type="table" rid="tbl1">Table 1</xref>. It is clear that the quadratic model and the 4-regulator linear model provide the best predictions, with the most sharply defined borders.</p></fn></table-wrap-foot></table-wrap></p></sec><sec id="s2-7-2"><title>There are reasons to favor the quadratic model</title><p>We prefer the quadratic model over the linear for a variety of reasons. First, it is simpler: the quadratic requires only three regulators, compared to four in the linear model. Both models have five parameters, which include three shared regulators (Hb, <italic>kni</italic>, <italic>tll</italic>) and the intercept. Second, the quadratic model has more clearly defined stripe borders than the linear model. Third, the quadratic model is more robust to the choice of training data (<xref ref-type="fig" rid="fig4s3">Figure 4—figure supplement 3</xref>), indicating that it describes the regulatory relationship uniformly across the embryo: the model performs consistently whether it is trained on either of the two stripes, a restricted region around the lateral midline, or only on the stripes and their immediately neighboring nuclei. Finally, the quadratic model retains accurate expression of the stripes even when all 38 candidate regulators are included; by contrast the prediction from the linear model begins to fragment spatially, which suggests localized over-fitting.</p></sec></sec><sec id="s2-8"><title>Independent experimental perturbations are consistent with the quadratic model</title><p>As with <italic>eve 2</italic>, we can further compare the models by predicting the outcomes of regulatory perturbations of input TFs (<xref ref-type="fig" rid="fig5">Figure 5</xref>). Here we consider perturbations of <italic>kni</italic> and <italic>hb</italic>, the best characterized regulators of <italic>eve 3+7</italic>. It is again important to distinguish between expression in a mutant background, which reveals both direct and indirect interactions, and corresponding binding site mutations within the <italic>eve 3+7</italic> enhancer, which probe only direct interactions.<fig-group><fig id="fig5" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.021</object-id><label>Figure 5.</label><caption><title>Linear and quadratic logistic models accurately predict <italic>eve 3+7</italic> expression under perturbation of Kni and Hb.</title><p>The effects of regulatory perturbations on <italic>eve 3+7</italic> expression are predicted as described in the main text. (<bold>A</bold>) Perturbation of <italic>kni</italic> and its binding sites cause full reporter expression between the stripes. The linear model predicts this observed extension, but the quadratic does not. (<bold>B</bold>) Perturbation of <italic>hb</italic> causes stripe 3 to expand and move anteriorly, and stripe 7 to expand slightly. Binding site mutations show similar effects, though perhaps without the anterior shift of stripe 3. The linear model provides good prediction of both stripes. The quadratic produces a good stripe 3 prediction, including its anterior shift, but fails to predict any expression in stripe 7. (<bold>C–F</bold>) Given the initial preference for the quadratic, we considered minor and biological plausible assumptions that allow the model to make accurate predictions. For the <italic>kni</italic> mutants, these are (<bold>C</bold>) the minor adjustment of the intercept and (<bold>D</bold>) inclusion of indirect effects of <italic>kni</italic> on <italic>hb</italic> by increasing Hb by 50% of wild-type <italic>kni</italic>. For the <italic>hb</italic> mutants, these are (<bold>E</bold>) the inclusion of residual maternal Hb in the posterior and (<bold>F</bold>) simulating the effects of residual Hb binding sites.</p><p>In situ images in panels 1 and 3 are reprinted with permission from Figure 4B–C, <xref ref-type="bibr" rid="bib71">Small et al. (1996)</xref>, <italic>Developmental Biology</italic> (© copyright Elsevier, 1996, All Rights Reserved). In situ images in panels 2 and 4 are reproduced with permission from Figures 4H and 6D, <xref ref-type="bibr" rid="bib75">Struffi et al. (2011)</xref>, <italic>Development</italic> (© copyright The Company of Biologists, 2011, All Rights Reserved).</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.021">http://dx.doi.org/10.7554/eLife.00522.021</ext-link></p></caption><graphic xlink:href="elife00522f005"/></fig><fig id="fig5s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.022</object-id><label>Figure 5—figure supplement 1.</label><caption><title>Indirect effects and the quadratic model can explain the expansion and retreat of expression observed in the <italic>eve 3+7</italic> reporter in a <italic>kni</italic> mutant.</title><p>Since Kni represses Hb, the loss of <italic>kni</italic> may lead to an increase of Hb towards steady state. To approximate this, Hb was added in increasing proportion, from 20% to 150%, of wild-type <italic>kni</italic> expression. The resulting <italic>eve 3+7</italic> quadratic prediction is shown in a <italic>kni</italic> mutant.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.022">http://dx.doi.org/10.7554/eLife.00522.022</ext-link></p></caption><graphic xlink:href="elife00522fs013"/></fig><fig id="fig5s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.023</object-id><label>Figure 5—figure supplement 2.</label><caption><title>An adjustment to the intercept in the quadratic logistic model for <italic>eve 3+7</italic> results in a slight expansion of expression between the stripes.</title><p>This prediction shows the effect of increasing the intercept by 4.5. This change in the intercept corresponds to potential differences in expression between the endogenous gene and the transgenic reporter.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.023">http://dx.doi.org/10.7554/eLife.00522.023</ext-link></p></caption><graphic xlink:href="elife00522fs014"/></fig><fig id="fig5s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.024</object-id><label>Figure 5—figure supplement 3.</label><caption><title>Hb binding site mutants may dampen or remove Hb repression at higher concentrations.</title><p>Dampening the effect of Hb at higher concentrations, for example by <inline-formula><mml:math id="inf4"><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msup><mml:mrow><mml:mtext> e</mml:mtext></mml:mrow><mml:mrow><mml:mo>−</mml:mo><mml:mn>2</mml:mn><mml:mtext>Hb</mml:mtext></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> as shown in the center plot, changes the regulatory impact of Hb in the embryo (left and right plots). The left plot shows Hb activating at low concentrations and repressing at high concentrations; the right plot shows an attenuation of this effect leading instead to a weakening in activation. The corresponding predictions are shown below.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.024">http://dx.doi.org/10.7554/eLife.00522.024</ext-link></p></caption><graphic xlink:href="elife00522fs015"/></fig></fig-group></p><sec id="s2-8-1"><title>Perturbing <italic>kni</italic></title><p>In the <italic>kni</italic> mutant, expression of an <italic>eve 3+7</italic> reporter transgene extends fully between the two stripes before partially retreating towards wild-type expression (<xref ref-type="bibr" rid="bib71">Small et al., 1996</xref>). Similarly, when Kni-binding sites in the <italic>eve 3+7</italic> enhancer are removed, the expression pattern matches the <italic>kni</italic> null mutant (<xref ref-type="bibr" rid="bib75">Struffi et al., 2011</xref>), although an earlier transgenic reporter with fewer mutated Kni-binding sites showed only partial extension (<xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref>).</p><p>To mimic both of these types of perturbations, we eliminated <italic>kni</italic> as an input. Under these conditions, the linear model predicts the observed full extension between the two stripes, whereas the quadratic does not (<xref ref-type="fig" rid="fig5">Figure 5A</xref>). However, given our reasons for preferring the quadratic model described above, it is worth considering some minor and biologically plausible assumptions to reconcile these perturbation experiments with the quadratic model (<xref ref-type="fig" rid="fig5">Figure 5C,D</xref>). We discuss these in terms of direct and indirect effects below.</p><sec id="s2-8-1-1"><title>Direct effects</title><p>The direct effects can be understood by considering two related minor adjustments (<xref ref-type="fig" rid="fig5">Figure 5C</xref>). First, we can assume that the Kni protein is ubiquitously expressed in the embryo at low concentrations, but that this is not reflected in the Virtual Embryo dataset; it is possible that in situ hybridization was not sufficiently sensitive for these low-level transcripts or that the protein has a slightly different profile to the <italic>kni</italic> mRNA. If we increase <italic>kni</italic> concentrations in the Virtual Embryo dataset by just 0.1 (∼10% of the maximum measured value across all time points), the retrained quadratic model predicts full extension between the stripes. The prediction for wild-type expression is not affected and this adjustment is sufficient for explaining both the <italic>kni</italic> mutant and the <italic>eve 3+7</italic> binding site mutations. Second, since this adjustment produces a model with identical coefficients (i.e., <italic>β</italic><sub><italic>k</italic></sub>) except for the intercept (i.e., <italic>β</italic><sub><italic>0</italic></sub>)—which increases from −8.1 to −3.6—we can change the intercept directly in the quadratic model. This adjustment is also sufficient to predict full extension between the stripes, though it alters the wild-type prediction slightly (<xref ref-type="fig" rid="fig5s2">Figure 5—figure supplement 2</xref>). This change in the intercept corresponds to potential differences in expression between the endogenous gene and the transgenic reporter. For instance, the reporter might have a lower barrier to activation and be more efficiently transcribed relative to the endogenous enhancer. Alternatively, the mutations in the transgenic enhancer may have abolished Kni repression, but then introduced the binding of another weak, ubiquitous activator.</p></sec><sec id="s2-8-1-2"><title>Indirect effects</title><p>Next, we consider the effects of Kni on downstream regulators (<xref ref-type="fig" rid="fig5">Figure 5D</xref>). There is strong evidence that <italic>kni</italic> is a repressor of <italic>hb</italic>, as its loss causes <italic>hb</italic> expression to extend from stripe 7 towards stripe 3 (<xref ref-type="bibr" rid="bib26">Hülskamp et al., 1990</xref>; <xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref>). We simulated this indirect interaction by increasing Hb concentration in proportion to the relative loss of <italic>kni</italic>. Since Hb is an activator at low concentrations in the quadratic model, this indirect effect can drive <italic>eve</italic> expression between the stripes. This adjustment is not relevant to binding site mutants, but interestingly it does provide a tentative explanation for the partial retreat of <italic>eve</italic>’s extension towards a wild-type expression pattern: as Hb concentrations increase over time, the TF eventually switches from an activator to a repressor of <italic>eve</italic> between the stripes (<xref ref-type="fig" rid="fig5s1">Figure 5—figure supplement 1</xref>).</p></sec></sec><sec id="s2-8-2"><title>Perturbing <italic>hb</italic></title><p><italic>hb</italic> is both maternally and zygotically expressed. In embryos null for <italic>hb</italic> zygotic expression, <italic>eve</italic> stripe 3 moves anteriorly and expands, whereas stripe 7 shows more limited widening (<xref ref-type="bibr" rid="bib71">Small et al., 1996</xref>). Mutating Hb-binding sites in the <italic>eve 3+7</italic> enhancer leads to similar expansion, though perhaps without the anterior shift of stripe 3 (<xref ref-type="bibr" rid="bib75">Struffi et al., 2011</xref>). Maternally deposited <italic>hb</italic> mRNA is ubiquitous but differentially translated in the anterior (<xref ref-type="bibr" rid="bib27">Hülskamp et al., 1989</xref>). Zygotically, <italic>hb</italic> is transcribed in both an anterior domain that largely overlaps the maternal <italic>hb</italic> pattern and in a posterior stripe (<xref ref-type="bibr" rid="bib48">Margolis et al., 1995</xref>). Thus at the time point used here, the zygotic mutant likely contains residual Hb protein in the anterior at the time point we use in this study.</p><p>We simulated the zygotic mutant by eliminating Hb altogether in the posterior and decreasing its expression to 20% in the anterior domain (<xref ref-type="fig" rid="fig5">Figure 5B</xref>). In these conditions, the linear model produces a good prediction for both stripes. The quadratic model arguably produces a better prediction for stripe 3, capturing its movement towards the anterior; however, it predicts zero expression in stripe 7. Based on our preference for the quadratic model as described in previous sections, and on the evidence presented in the following section, we again consider minor adjustments to reconcile it with experimental results (<xref ref-type="fig" rid="fig5">Figure 5E,F</xref>).</p><sec id="s2-8-2-1"><title>Direct effects</title><p>The first adjustment is relevant for the zygotic mutant, and assumes some active Hb in the posterior around stripe 7 (<xref ref-type="fig" rid="fig5">Figure 5E</xref>). Specifically, having as little as 0.15 of Hb (∼15% of maximal expression) in this region is sufficient for a good prediction of stripe 7. Next, we simulated the effects of having some residual Hb-binding sites in the enhancer, as a mutagenesis experiment may not abolish all binding (e.g., see <xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref> compared to <xref ref-type="bibr" rid="bib75">Struffi et al., 2011</xref>). A simple way to model this is to have the same low level of Hb activity as in the zygotic mutant, which produces a good prediction as just described. As an alternative, we also considered whether incomplete mutagenesis could affect Hb’s dual-regulatory behavior. <xref ref-type="bibr" rid="bib56">Papatsenko and Levine (2008)</xref> proposed that the dual role is facilitated by adjacently bound Hb molecules masking each other’s active sites; in this scenario, we would expect dual regulation to be attenuated as binding sites are lost through mutagenesis. We simulated this by dampening the regulatory effect of Hb at higher concentrations and found that the predicted expression patterns agree with experimental results (<xref ref-type="fig" rid="fig5">Figure 5F</xref>, <xref ref-type="fig" rid="fig5s3">Figure 5—figure supplement 3</xref>). Notably, this prediction does not show movement of stripe 3 towards the anterior or expansion in stripe 7, correctly reflecting the experimental results of binding site mutagenesis.</p></sec></sec></sec><sec id="s2-9"><title>Models predict <italic>eve 2</italic> and <italic>3+7</italic> expression at earlier time points</title><p>We also tested the linear and quadratic models for both <italic>eve 2</italic> and <italic>eve 3+7</italic> on the two previous time points in the Virtual Embryo, which were not used for training (<xref ref-type="fig" rid="fig6">Figure 6</xref>). The results for <italic>eve 2</italic> show a wider stripe forming before it narrows to the boundaries of stripe 2. This mirrors published results for an <italic>eve 2</italic> reporter as well as the endogenous expression of <italic>eve</italic> (<xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>; <xref ref-type="bibr" rid="bib1">Andrioli et al., 2002</xref>). The predictions for <italic>eve 3+7</italic> are also consistent in terms of the positions of the stripes, although they have stripe 3 appearing earlier than stripe 7. This timing difference is not obvious in the endogenous <italic>eve</italic> expression recorded in the Virtual Embryo, although stripe 7 does appear relatively weak in some transgenic reporters (<xref ref-type="bibr" rid="bib71">Small et al., 1996</xref>). At the earlier time points the difference in sharpness of the stripe borders between the quadratic and linear model is more pronounced suggesting that the interpretation of positional information by the quadratic model is more stable and precise.<fig id="fig6" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.025</object-id><label>Figure 6.</label><caption><title>Models predict <italic>eve 2</italic> and <italic>3+7</italic> expression at earlier time points.</title><p>Model predictions for earlier time points in the Virtual Embryo are shown for the (<bold>A</bold>) <italic>eve 2</italic> and (<bold>B</bold>) <italic>eve 3+7</italic> linear and quadratic models. The time points are labeled from the start of the dataset; the third time point is the one used throughout the main text. For <italic>eve 2,</italic> the linear and quadratic models show a wider stripe at the second time point and a well-defined stripe at time point 3. This matches the in situ images below from <xref ref-type="bibr" rid="bib1">Andrioli et al. (2002)</xref> which show a transgenic reporter at early and mid cycle. The predictions for <italic>eve 3+7</italic> are consistent in terms of the positions of the stripes, with stripe 3 appearing earlier than stripe 7. At the earlier time points the difference in sharpness of the stripe borders between the quadratic and linear model is more pronounced suggesting that the interpretation of positional information by the quadratic model is more stable and precise.</p><p>The in situ image for <italic>eve 3+7</italic> is reprinted with permission from Figure 2C, <xref ref-type="bibr" rid="bib71">Small et al. (1996)</xref>, <italic>Developmental Biology</italic> (© copyright Elsevier, 1996, All Rights Reserved). In situ images for <italic>eve 2</italic> are reproduced with permission from Figure 4A,B, <xref ref-type="bibr" rid="bib1">Andrioli et al. (2006)</xref>, <italic>Development</italic> (© copyright The Company of Biologists, 2006, All Rights Reserved).</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.025">http://dx.doi.org/10.7554/eLife.00522.025</ext-link></p></caption><graphic xlink:href="elife00522f006"/></fig></p></sec><sec id="s2-10"><title>Quadratic <italic>eve 2</italic> and <italic>3+7</italic> models predict TF misexpression results better than linear models</title><p>As a final test of our models, we compare our predictions to the misexpression work of Clyde and colleagues (<xref ref-type="fig" rid="fig7">Figure 7</xref>, <xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1</xref>; <xref ref-type="bibr" rid="bib11">Clyde et al., 2003</xref>). In this study, the authors constructed transgenes with a <italic>snail</italic> promoter that misexpressed <italic>hb</italic> or <italic>kni</italic> in the ventral region of the embryo and recorded the effects of expressing one or two copies of these transgenes. The experiments confirmed that Hb and Kni repress <italic>eve 3+7</italic> and <italic>4+6</italic> at different concentrations as suggested by <xref ref-type="bibr" rid="bib18">Fujioka et al. (1999)</xref>. However, the experiments also revealed some curious observations that are not easily explained. First, stripes 3 and 7 respond differently to the same additional concentrations of Hb despite being regulated by the same enhancer. Secondly, and most intriguingly, the results show substantial bending of stripes: in the presence of one copy of the <italic>hb</italic> transgene, stripe 3 extends towards the posterior of the embryo, and with two copies, stripe 7 bends towards the anterior. These behaviors cannot be explained readily by simple, qualitative inspection of the embryos and they were not explored in the original study.<fig-group><fig id="fig7" position="float"><object-id pub-id-type="doi">10.7554/eLife.00522.026</object-id><label>Figure 7.</label><caption><title>Quadratic models accurately predict fine-scale features of expression patterns due to input misexpression.</title><p>The study by <xref ref-type="bibr" rid="bib11">Clyde et al. (2003)</xref> misexpressed <italic>hb</italic> and <italic>kni</italic> along the ventral surface of the embryo using transgenes driven by a <italic>snail</italic> promoter and recorded the effects of one or two copies of these transgenes on <italic>eve</italic> expression. We replicated these experiments using quadratic models for <italic>eve 2</italic> and <italic>eve 3+7</italic> (trained on stripe 3), by adding Hb and <italic>kni</italic> in proportion to the distribution of <italic>snail</italic> in the Virtual Embryo dataset. As described in the main text, we also added an indirect effect from Hb activating <italic>Kr</italic>. (<bold>A</bold>) With <italic>kni</italic> misexpression, the model accurately predicts the thinning (x1 transgene), then cutting of stripe 3 (x2). (<bold>B</bold>) With Hb misexpression, the model successfully predicts the bulging, then cutting and bending of stripe 3 (x2), and the bulging of stripe 7 (x2). Stripe 2 remains unaffected in both perturbations, in agreement with the experimental results. The accuracy of the predictions indicates that the quadratic model for <italic>eve 3+7</italic> can explain the experimental results very well. In contrast the linear models are unable to predict these results.</p><p>In situ images are reproduced from Figures 1F–H and 1K–M, <xref ref-type="bibr" rid="bib11">Clyde et al. (2003)</xref>, published in <italic>Nature</italic>; Nature Publishing Group has granted permission to reproduce these images under the terms of the Creative Commons Attribution 3.0 Unported License (<ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">CC BY 3.0</ext-link>).</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.026">http://dx.doi.org/10.7554/eLife.00522.026</ext-link></p></caption><graphic xlink:href="elife00522f007"/></fig><fig id="fig7s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.00522.027</object-id><label>Figure 7—figure supplement 1.</label><caption><title>Supplementary misexpression predictions of the <italic>eve 2</italic> and <italic>eve 3+7</italic> linear (A,B) and quadratic (C–E) models.</title><p>(<bold>A</bold>, <bold>C</bold> and <bold>E</bold>) have no indirect effects, whereas (<bold>B</bold> and <bold>D</bold>) include a hypothetical indirect effect mediated by Hb activating Kr. This is modeled by adding Kr in proportion (50%) to the increase of Hb. The <italic>eve 3+7</italic> model in (<bold>A</bold>–<bold>D</bold>) was trained on stripes 3 and 7, whereas in (<bold>E</bold>) it was trained on stripe 3 only. The predictions are shown for one (x1) and two (x2) copies of the <italic>hb</italic> and <italic>kni</italic> transgenes as described in the main text.</p><p><bold>DOI:</bold><ext-link ext-link-type="doi" xlink:href="10.7554/eLife.00522.027">http://dx.doi.org/10.7554/eLife.00522.027</ext-link></p></caption><graphic xlink:href="elife00522fs016"/></fig></fig-group></p><p>To model this experiment, we added Hb and <italic>kni</italic> to the Virtual Embryo in proportion to the measured distribution of <italic>snail</italic> at different concentrations ranging from 0.1 to 0.4 (∼10% to ∼40% of maximal expression). We simulated the responses of <italic>eve 2</italic> and <italic>eve 3+7</italic> using both the linear and quadratic models above and a quadratic model trained on stripe 3. <xref ref-type="fig" rid="fig7">Figure 7</xref> and <xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1</xref> display the predictions of these models at 0.2 and 0.4 of Hb and <italic>kni</italic>, simulating the effects of one or two copies of the transgene respectively. <xref ref-type="fig" rid="fig7">Figure 7</xref> includes a putative indirect effect on <italic>eve 2</italic> via Kr (see below).</p><p>The quadratic models predict the fine-scale effects of this misexpression experiment with remarkable accuracy whereas the linear models do not (compare <xref ref-type="fig" rid="fig7">Figure 7</xref> to <xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1A,B</xref>). Specifically, the quadratic model trained on stripe 3 successfully reproduces the bending and bulging of both stripes 3 and 7 under <italic>hb</italic> misexpression (<xref ref-type="fig" rid="fig7">Figure 7B</xref>), as well as the repression seen with <italic>kni</italic> misexpression (<xref ref-type="fig" rid="fig7">Figure 7A</xref>). Our models for <italic>eve 2</italic> predict that since Hb is an activator, increasing the concentration of Hb should lead to increased activation of <italic>eve 2</italic> and a resultant broadening of the stripe (<xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1A,C,E</xref>); but this is not in fact observed in the misexpression study. Indirect effects of Hb on another factor, such as Kr, can resolve this discrepancy: adding Kr in proportion (50%) to the increase of Hb does indeed prevent stripe 2 from expanding, but only in the quadratic model and not in the linear (<xref ref-type="fig" rid="fig7">Figure 7</xref> and <xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1B,D</xref>). Given all the evidence provided here, on balance we conclude that the quadratic models are the most likely for both <italic>eve 2</italic> and <italic>eve 3+7</italic>.</p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><sec id="s3-1"><title>Summary</title><p>Our goal was to understand the regulatory system underlying spatiotemporal patterning in the early <italic>Drosophila</italic> embryo by fitting regulatory input functions to the output of individual enhancers. Our models are accurate, predictive and simple to apply and interpret. We showed that simple functional forms relating TF concentrations to <italic>eve</italic> expression outputs are highly predictive of wild-type and mutant expression patterns. In doing so, we have demonstrated that precise positional information—in other words, information interpreted by individual nuclei to produce an expression pattern—is available in the early embryo. We determined whether TFs are most informative when serving as activators or repressors of each enhancer, and we also explored whether a dual-regulatory role for some TFs improved expression predictions. Here we discuss our work in relation to other models of regulatory function, the insights our models provide into transcriptional regulation and positional information in the embryo, and the experimentally testable hypotheses proposed by our models.</p></sec><sec id="s3-2"><title>Previous models</title><p>The regulation underlying anteroposterior patterning of the <italic>Drosophila</italic> blastoderm has long been a favorite system for modeling work; for recent reviews, see for example <xref ref-type="bibr" rid="bib28">Jaeger et al. (2009)</xref> and <xref ref-type="bibr" rid="bib54a">Papatsenko (2009)</xref>. Some models have been successful in reproducing the gap gene patterns (<xref ref-type="bibr" rid="bib29">Jaeger et al., 2004</xref>, <xref ref-type="bibr" rid="bib28">2007</xref>; <xref ref-type="bibr" rid="bib8">Bieler et al., 2011</xref>; <xref ref-type="bibr" rid="bib55">Papatsenko and Levine, 2011</xref>), but none have succeeded in accurately predicting precise stripes of <italic>even skipped</italic> across the whole anteroposterior axis of the embryo (<xref ref-type="bibr" rid="bib42">Levine, 2008</xref>). In general, previous models have focused on utilizing information contained in the <italic>cis</italic>-regulatory sequence; for example predicting expression and evaluating potential TFs of a 1.7kb region of regulatory DNA upstream of <italic>eve</italic> (<xref ref-type="bibr" rid="bib32">Janssens et al., 2006</xref>), fitting models to fusions of <italic>eve</italic> enhancers and predicting expression from different regulatory DNA (<xref ref-type="bibr" rid="bib39">Kim et al., 2013</xref>), testing models of TF binding and synergy by predicting expression across many enhancers (<xref ref-type="bibr" rid="bib66">Segal et al., 2008</xref>; <xref ref-type="bibr" rid="bib24">He et al., 2010</xref>) and identifying enhancer sequences within the genome based on the fit between predicted and observed expression patterns (<xref ref-type="bibr" rid="bib35">Kazemian et al., 2010</xref>).</p><p>Our choices in modeling the regulatory function of enhancers differ from these previous studies in a number of important respects. First, our models are highly accurate in fitting the <italic>eve</italic> expression pattern in the entire embryo. This is in part because we chose to model the regulatory function of each enhancer separately, rather than fitting a single model that applies across many enhancers simultaneously. By defining parameters that are specific for each enhancer, we are able to assign the regulatory roles for TFs in a context-specific manner. Second, our models also perform well because, unlike previous studies, we do not impose any biological mechanisms on our models (e.g., a ‘thermodynamic score’ for protein–DNA interactions). Instead we worked the other way round: we tested models that fit data as accurately as possible and then inferred the underlying mechanisms. This simple framework nonetheless allows us to propose experimentally testable hypotheses. Third, our modeling framework is quick to apply. This allowed us to search comprehensively for informative regulators, a property that is particularly valuable for studying poorly characterized enhancers.</p></sec><sec id="s3-3"><title>Inferred mechanisms for regulatory input function</title><p>Since the models are accurate and predictive, they may reflect the underlying molecular mechanism for transcriptional regulation. Further, the models are relatively easy to interpret, so we can infer what they mean in terms of biological mechanism. Here we highlight three features.</p><sec id="s3-3-1"><title>Thresholding a combination of TFs is sufficient for positional information</title><p>One of the important questions in animal development is how each cell determines its position in the embryo. Early work on positional information in the <italic>Drosophila</italic> embryo was inspired by the idea of a morphogen gradient that is interpreted by the nuclei according to a set threshold (<xref ref-type="bibr" rid="bib5">Ashe and Briscoe, 2006</xref>; <xref ref-type="bibr" rid="bib12">Crick, 1970</xref>; <xref ref-type="bibr" rid="bib81">Wolpert, 1969</xref>, <xref ref-type="bibr" rid="bib82">1996</xref>). More recently, it has been concluded that a lone-acting morphogen is insufficient for providing precise positional information to the embryo, especially since no gradient with this characteristic has been measured in an embryo (<xref ref-type="bibr" rid="bib38">Kerszberg and Wolpert, 2007</xref>; <xref ref-type="bibr" rid="bib83">Wolpert, 2011</xref>). Our model, however, shows that the combined action of multiple morphogens and a corresponding interpretative threshold is indeed able to read positional information from measured gradients alone. In particular, it succeeds by applying the threshold to the overall balance of activators and repressors rather than to each factor individually.</p><p>Focusing on the contributions of individual TFs also tends to emphasize the role of repressors in providing positional information to the <italic>eve</italic> stripes. Since repressors are often crucial in defining the borders of the stripes, it is natural to suppose that the activators are merely permissive, and that precision in positional information is provided by the repressors. Here we show that an alternative view is compatible with the data. In particular, activators and repressors contribute symmetrically to positional information: they work to increase or decrease the probability of transcription, but neither class acts separately according to a threshold that is independent of the concentrations of other factors. Thus, if more activators are present in a nucleus, a higher concentration of repressors will be required to reduce transcription to the same level. This means that positional information cannot be defined by any one factor in isolation, and nor can mutant results be interpreted reliably in the absence of data on other factors.</p></sec><sec id="s3-3-2"><title>Pairwise cooperative interactions between TFs are not necessary for synergy</title><p>Our model can help clarify the concept of synergy, where the effect of one regulator depends on the concentration of another. This has been proposed in the context of transcriptional activators in general (<xref ref-type="bibr" rid="bib76">Struhl, 2001</xref>) and observed between Hb and Bcd in controlling expression of <italic>eve 2</italic> (<xref ref-type="bibr" rid="bib74">Stanojevic et al., 1991</xref>; <xref ref-type="bibr" rid="bib68">Simpson-Brose et al., 1994</xref>). Our model shows that this effect is observed with a linear combination of concentrations: that is, without any pairwise interactions between Hb and Bcd or other factors. Thus, our model is compatible with the early findings of <xref ref-type="bibr" rid="bib4">Arnosti et al. (1996)</xref>, which suggest that <italic>eve</italic> transcription is controlled by the total balance of activators and repressors rather than through complex and intricate combinatorial interactions between TFs. However, this is not to say that cooperative interactions do not take place, or are not important in other contexts, but rather that it is necessary to distinguish between synergistic interactions that can be explained by independent binding of multiple factors (as in our model), and those that occur as a result of pairwise interactions between TFs. We expect pairwise interactions between TFs on the DNA to require particular arrangements of binding sites. Therefore, the success of our model without pairwise interactions suggests that the ordering and exact spacing of binding sites are not important, except potentially in the case of dual regulation. This agrees well with multiple observations about the flexibility of enhancer sequences, which can tolerate rearrangement over evolutionary time while maintaining their function (reviewed in <xref ref-type="bibr" rid="bib9">Borok et al., 2010</xref>).</p></sec><sec id="s3-3-3"><title>Dual regulatory function of Hb and Bcd</title><p>Hb has a dual role: it acts as a repressor in some enhancers (e.g., <italic>eve 3+7</italic>) and an activator in others (e.g. <italic>eve 2</italic>) (<xref ref-type="bibr" rid="bib70">Small et al., 1992</xref>, <xref ref-type="bibr" rid="bib71">1996</xref>). Here, like <xref ref-type="bibr" rid="bib56">Papatsenko and Levine (2008)</xref>, we model a dual role for Hb in the context of a single enhancer. In our model of <italic>eve 3+7</italic>, including a quadratic term for concentration-dependent dual regulation produces better wild-type predictions, explains experimental perturbations accurately (with certain assumptions), and produces consistent fits across different training subsets. Although <xref ref-type="bibr" rid="bib35">Kazemian el al. (2010)</xref> did not find a quadratic term for Hb generally useful for fitting logistic models to <italic>Drosophila</italic> expression patterns, they did find this to be true for Bcd, particularly for the anterior parts of the expression patterns. In our work, this term is not needed for a good fit, but we add it for <italic>eve 2</italic> to show how a repressive role in the anterior can be reconciled with an activating role around stripe 2 (<xref ref-type="bibr" rid="bib85">Zhao et al., 2002</xref>; <xref ref-type="bibr" rid="bib69">Singh et al., 2005</xref>).</p><p>Concentration-dependent regulatory activities have been observed in other systems: for instance in humans, at low concentrations, Sp1 is as an activator of the folate receptor gene in conjunction with Ets TFs; at higher concentrations it becomes a repressor by blocking Ets binding (<xref ref-type="bibr" rid="bib36">Kelley et al., 2003</xref>). Our model does not reveal how Hb and Bcd achieve dual-regulatory activity, and it is quite possible that they make use of different mechanisms. One possibility is a change in protein–protein interactions, through formation of homo-oligomers or interactions with co-factors (e.g., <xref ref-type="bibr" rid="bib31">Janody et al., 2001</xref>). Alternatively as with Sp1, changes in DNA occupancies may alter how regulators interact with adjacent TF molecules. We discuss experimental tests of these possibilities below. Regardless of mechanism, however, we propose that concentration-dependent effects are important, in contrast to the hypothesis that concentrations above a predefined threshold are neutral in effect. Moreover, we suggest that similar analysis techniques could be used to test potential dual-regulatory capabilities of other regulators, such as Gli and Lef/Tcf in the Hedgehog and Wnt signaling pathways (<xref ref-type="bibr" rid="bib46">Logan and Nusse, 2004</xref>; <xref ref-type="bibr" rid="bib2">Arce et al., 2006</xref>; <xref ref-type="bibr" rid="bib78">Varjosalo and Taipale, 2008</xref>; <xref ref-type="bibr" rid="bib79">Whitington et al., 2011</xref>).</p></sec></sec><sec id="s3-4"><title>Experimentally testable hypotheses</title><p>Our models predict which input TFs are relevant for a given enhancer, and whether they act as activators or repressors. In the case of <italic>eve 2</italic> and <italic>eve 3+7</italic>, we showed that many of these predictions are confirmed by independent experiments already in the literature. These studies involve either perturbing a candidate regulator by mutation, over-expression or misexpression, or mutagenizing binding sites for a candidate regulator in an enhancer sequence, and then measuring the expression of <italic>eve</italic>. To confirm our predictions, we made qualitative comparisons between published data (in the form of a single representative image) and our model predictions. Having validated our modeling framework on these well-characterized enhancers, we can now broadly apply this framework to discover regulators for less well-characterized enhancers in this system. While many enhancers in this network have been mapped by computational studies and functional genomics (<xref ref-type="bibr" rid="bib7">Berman et al., 2002</xref>; <xref ref-type="bibr" rid="bib64">Schroeder et al., 2004</xref>; <xref ref-type="bibr" rid="bib35">Kazemian et al., 2010</xref>; <xref ref-type="bibr" rid="bib50">Négre et al., 2011</xref>; <xref ref-type="bibr" rid="bib63">Schroeder et al., 2011</xref>), our knowledge of most of their regulatory input functions remains incomplete. Our modeling framework complements existing functional genomic and bioinformatics approaches: combined they will allow a comprehensive description of the relevant inputs of each of these enhancers, and how those inputs work together to produce an output expression pattern.</p><p>Our models also point to a role for concentration-dependent effects of Hb and Bcd on their targets. We hypothesize that this is due to concentration-dependent differences in protein-protein interactions, perhaps mediated by the arrangement of TF binding sites in an enhancer, as has been proposed for Hb (<xref ref-type="bibr" rid="bib56">Papatsenko and Levine, 2008</xref>). To test whether binding site arrangements are important, the binding sites for Bcd and Hb can be rearranged within the <italic>eve 2</italic> and <italic>eve 3+7</italic> enhancers, and the output of these mutated enhancers measured. To test which parts of the TFs are involved in mediating protein-protein interactions, the TFs themselves can be mutated, and protein–protein interactions can be assayed by in vitro binding studies. Finally, to test the concentration-dependent effects directly, the concentration of Hb and Bcd can be manipulated in vivo by over-expression, knock-down and misexpression. Our modeling framework is especially useful in this last case, as predictions with and without concentration-dependent effects can be compared. We propose that misexpression studies are likely to be particularly informative, based on the fine-scale differences such as stripe bending and bulging that we were able to predict.</p><p>Instead of making qualitative comparisons to experimental data, it would be ideal to test our models quantitatively at cellular resolution. This is possible if we create additional Virtual Embryo data where perturbations, both to input TFs and enhancer sequences, are measured. For knock-down, over-expression or misexpression of TFs, we will need to create a new Virtual Embryo for each perturbation. This will capture all of the direct and indirect consequences of perturbing the TF. We can assess the consequences of mutating enhancer sequences by integrating transgenic reporters into any given Virtual Embryo dataset, as in <xref ref-type="bibr" rid="bib84">Wunderlich et al., 2012</xref>. Creating these new datasets is not a trivial undertaking technically but it would provide the framework for us to directly compare the output of our model predictions to experimental data at cellular resolution to detect fine-scale differences, and without making assumptions about indirect effects. For example, this would allow us to test our proposed role for <italic>tll</italic> in repressing the posterior border of <italic>eve</italic> stripe 7, where classic experiments have been inconclusive and to validate future predictions for other enhancers in the segmentation network. We fully anticipate that analyzing this type of data will lead to further refinements of our models.</p></sec><sec id="s3-5"><title>General applicability of our modeling approach</title><p>Clearly, our model depends on the quality of the data in the Virtual Embryo, which was derived from many in situ hybridization images of the <italic>Drosophila</italic> blastoderm (<xref ref-type="bibr" rid="bib37">Keränen et al., 2006</xref>; <xref ref-type="bibr" rid="bib47">Luengo Hendriks et al., 2006</xref>; <xref ref-type="bibr" rid="bib16">Fowlkes et al., 2008</xref>). To predict spatiotemporal expression patterns, it’s important that the measurements are quantitative and at the resolution of individual cells. One advantage of the blastoderm is that the relevant nuclei are near the surface of the embryo, making it easier to segment the overall fluorescence signal and assign it to individual nuclei. However, microscopy and other techniques such as single-cell transcriptomics are continually improving (<xref ref-type="bibr" rid="bib34">Kalisky et al., 2011</xref>); we anticipate that many comparable datasets will become available over time, both for other developmental time points in <italic>Drosophila</italic>, and in other model systems. Our study demonstrates how theoretical models can be applied to such data in order to make new biological discoveries.</p></sec></sec><sec id="s4" sec-type="methods"><title>Methods</title><sec id="s4-1"><title>Virtual Embryo dataset</title><p>Release 2.0 of the Virtual Embryo dataset was downloaded from the Berkeley <italic>Drosophila</italic> Transcription Network Project website (<ext-link ext-link-type="uri" xlink:href="http://bdtnp.lbl.gov/Fly-Net/">http://bdtnp.lbl.gov/Fly-Net/</ext-link>) (<xref ref-type="bibr" rid="bib16">Fowlkes et al., 2008</xref>). The release contains composited mRNA expression measurements for 95 genes in 6078 nuclei at six time points (or ‘cohorts’). Also provided are protein expression data for four gene products (Bcd, Hb, Kr, and Gt) for some of the time points. Data for the current study were extracted from a ‘comma-separated values’ (CSV) format Virtual Embryo file (D_mel_wt__alas_r2.vpc): each row corresponds to a nucleus in the embryo, with columns containing measurements including three-dimensional coordinates, average expression level for a given gene, time point for measurement etc. Expression measurements are provided as relative values for each nucleus, ranging from ‘0’ for minimum expression across all six time points to a little over ‘1’ for maximum expression (e.g.,, the maximum for <italic>eve</italic> is 1.11 and for Hb it is 1.05). The variability in the maximum is a result of the method used to determine the relative variation between nuclei across different time points in the Virtual Embryo (<xref ref-type="bibr" rid="bib16">Fowlkes et al., 2008</xref>).</p><p>The coordinates of the Virtual Embryo are along the anteroposterior (x), left-right (y) and dorsoventral (z) axes. The difference between the minimum and maximum is 404 μm for the x-coordinate, 154 μm for the y-coordinate and 155 μm for the z-coordinate.</p></sec><sec id="s4-2"><title>Training data preparation</title><p>Training was performed using expression measurements at the third time point (Cohort 3). 6,078 nuclei were classed as ON (2444) or OFF (3634) depending on whether <italic>eve</italic>’s expression is above or below the threshold of 0.2 (approximately 20% of maximum). Nuclei were grouped into the seven <italic>eve</italic> stripes making use of the neighboring nuclei information provided in the <italic>Virtual Embryo</italic> (stripe 2 = 348 nuclei, stripe 3 = 342 nuclei, stripe 7 = 383 nuclei). mRNA expression measurements for 34 genes were included in the training data (<italic>brk, bun, cad, CG10924, CG17786, CG4702, cnc, croc, Cyp310a1, D, Dfd, Doc2, emc, fj, fkh, hkb, kni, knrl, oc, path, rho, sala, slp1, slp2, sna, sob, srp, term, tll, Traf1, trn, tsh, twi, zen</italic>). For four TFs (Bcd, Hb, Kr, Gt), we used the protein expression measurements instead.</p></sec><sec id="s4-3"><title>Training logistic regression models</title><p>Logistic regression was used to model <italic>eve</italic> expression by linking the regulator concentrations as continuous input variables, to <italic>eve</italic>’s expression state as the binary output. For a nucleus <italic>i</italic>, the predictor, <italic>η</italic><sub><italic>i</italic></sub>, is calculated as a linear combination of concentrations:<disp-formula id="equ1"><mml:math id="m1"><mml:mrow><mml:msub><mml:mi>η</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mn>...</mml:mn><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></disp-formula>where <italic>x</italic><sub><italic>ki</italic></sub> is the expression measurement of the <italic>k</italic>th gene for the <italic>i</italic>th nucleus with the <italic>β</italic> to be estimated. For the quadratic models, a single quadratic term was added for the regulator, q, in question:</p><disp-formula id="equ2"><mml:math id="m2"><mml:mrow><mml:msub><mml:mi>η</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mn>...</mml:mn><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>q</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow></mml:math></disp-formula><p>The predictor is linked to the estimated probability <italic>p</italic><sub><italic>i</italic></sub> of <italic>eve</italic> being ON in the <italic>i</italic>th nucleus:</p><disp-formula id="equ3"><mml:math id="m3"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:msub><mml:mrow><mml:mo>−</mml:mo><mml:mi>η</mml:mi></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula><p>Models for <italic>eve 2</italic> were trained using the 348 nuclei defined as ON in stripe 2, as well as the nuclei defined as OFF excluding the nuclei of other stripes and their immediate neighbors (2588 nuclei). Similarly, models for <italic>eve 3+7</italic> were trained using 725 ON nuclei in stripes 3 and 7, and 2756 OFF nuclei.</p><p>The models were fitted using the R function glm from the stats package, which uses Iteratively Re-weighted Least Squares. For our best fitting models, glm issued a fitting and evaluation warning message. This was because most of the logistic models that classify the <italic>eve</italic> stripes successfully have some fitted probabilities very near 0 or 1. (The nuclei on the borders of the stripes have intermediate values). Although this can suggest problems in certain situations, here, in agreement with Ripley (<xref ref-type="bibr" rid="bib61">Ripley, 2008</xref>) it is viewed as a desirable outcome of classification. The trained model was then used to predict <italic>eve</italic> expression in all 6078 nuclei across the entire embryo, using the concentrations of the relevant regulators.</p></sec><sec id="s4-4"><title>Tests of model consistency across different training subsets</title><p>The consistency of the model across different training subsets was tested in several ways. (i) Each model was trained on a subset of the training dataset and then used to predict <italic>eve</italic> expression for the whole embryo. Subsets used included: nuclei within 20 μm either side of the lateral midline; nuclei within the relevant stripe(s) and only their immediate neighbors; and a cross-validation test, which was the average of 100 predictions each trained on a random subset of 50 nuclei. For <italic>eve 3+7</italic>, two extra subsets excluded the ON nuclei from either stripe 3 or 7. Less consistent models produce poor predictions after training on some subsets. (ii) Each model was trained using all 38 regulators and then used to predict <italic>eve</italic> expression for the whole embryo. Models suffering from localized over-fitting show fragmented <italic>eve</italic> expression. (iii) Models were trained for each of the stripes in turn, using the regulators of the best-fitting models (such as Bcd, Hb, Gt, and Kr for <italic>eve 2</italic>). This showed that the given regulators are not able to fit any arbitrary stripe well.</p></sec><sec id="s4-5"><title>Predictions of regulatory perturbations</title><p>The effects of regulatory perturbations were simulated by adjusting the concentrations of the relevant regulator without changing any model parameters (i.e., without retraining), and then predicting <italic>eve</italic> expression across the whole embryo. Binding site mutations and null mutants were simulated by setting the regulator concentration to ‘0’ in all nuclei. Where indicated, indirect effects were simulated by adjusting the expression level of downstream regulators and again, predicting <italic>eve</italic> expression without any model adjustments. Other types of regulatory perturbations, such as the misexpression studies, were performed similarly by adjusting regulator concentrations as described in the main text.</p></sec><sec id="s4-6"><title>Visual display of model outputs</title><p>Model predictions of wild-type and mutant <italic>eve</italic> expression are displayed graphically for each nucleus in the embryo. The Virtual Embryo contains three-dimensional coordinates for each nucleus, making it possible to show the predictions in their spatial context. In most figures, embryos are shown from two perspectives: lateral and three-dimensional. In the lateral perspective, each nucleus is plotted using the (<italic>x</italic>,<italic>z</italic>) coordinate, ignoring the <italic>y</italic> coordinate. The <italic>x-</italic> and <italic>z-</italic>axes are aligned to the anteroposterior (left to right) and dorsoventral (top to bottom) axes respectively, so showing a view from the left side of the embryo. Since predictions for the left and right sides are similar, all nuclei (i.e., both left and right) are plotted in one composite view from the left side of the embryo. The three-dimensional perspective is plotted using the cloud function from the lattice package in R, similarly from an anterior perspective. Nuclei are colored according to the model’s prediction, from p=0 (light) to p=1 (dark). The color scale for predictions within stripes is grey-scale and predictions outside of stripes are shown on a red scale, with peach for values below 0.15.</p></sec><sec id="s4-7"><title>Calculating the accuracy of model outputs</title><p>To accompany the visual display of wild-type predictions, we also calculated percentage accuracies to aid comparison between alternative models. These values provide good indications of model performance in predicting the stripe boundaries. For each model, we calculated the proportion of nuclei predicted as being ON (p&gt;0.5) within the stripe(s) under consideration (i.e., true positives), in nuclei immediately adjacent to the stripe nuclei, and two nuclei away (i.e., false positives). The identities of neighboring nuclei are provided by the Virtual Embryo dataset.</p></sec><sec id="s4-8"><title>Regulator discovery</title><p>For <italic>eve 2</italic>, we trained all possible linear models using four out of 38 regulators in the dataset (total 73,815 models), using the log likelihood of each fitted model as its score. A similar approach was used for exploring quadratic models, except that any model containing Bcd and/or Hb also included the corresponding quadratic term(s). The results are summarized as heat maps as shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>.</p></sec><sec id="s4-9"><title>Software</title><p>Analysis was performed with R version 2.15.1 (<xref ref-type="bibr" rid="bib58">R Core Team, 2012</xref>), using colors from the ColorBrewer palettes in the RColorBrewer package. Plots made use of the lattice, ggplot2 and RBGL packages. The graph package was used to select neighboring nuclei.</p></sec></sec></body><back><ack id="ack"><title>Acknowledgements</title><p>We would like to thank the members of the DePace laboratory, in particular Tara Lydiard-Martin, Max V Staller, Zeba Wunderlich, and Ben Vincent, for insightful discussions throughout the project.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>The authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>GRI, Selection and preparation of data, Conception and design, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con2"><p>JF, Supervisory role, Discussions, Drafting or revising the article</p></fn><fn fn-type="con" id="con3"><p>RA, Supervisory role, Discussions, Drafting or revising the article</p></fn><fn fn-type="con" id="con4"><p>AHD, Supervisory role, Discussions, Interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con5"><p>NML, Supervisory role, Discussions, Interpretation of data, Drafting or revising the article</p></fn></fn-group></sec><sec sec-type="supplementary-material"><title>Additonal files</title><sec sec-type="datasets"><title>Major dataset</title><p>The following previously published dataset was used:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro1"><name><surname>Fowlkes</surname><given-names>CC</given-names></name>, <name><surname>Hendriks</surname><given-names>CL</given-names></name>, <name><surname>Keränen</surname><given-names>SV</given-names></name>, <name><surname>Weber</surname><given-names>GH</given-names></name>, <name><surname>Rübel</surname><given-names>O</given-names></name>, <name><surname>Huang</surname><given-names>MY</given-names></name>, <etal/>, <year>2008</year><x>, </x><source>A quantitative spatiotemporal atlas of gene expression in the <italic>Drosophila</italic> blastoderm</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://bdtnp.lbl.gov/Fly-Net/bidatlas.jsp">http://bdtnp.lbl.gov/Fly-Net/bidatlas.jsp</ext-link><x>, </x><comment>Publicly available at <ext-link ext-link-type="uri" xlink:href="http://bdtnp.lbl.gov/">http://bdtnp.lbl.gov/</ext-link>.</comment></related-object></p></sec></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andrioli</surname><given-names>LP</given-names></name><name><surname>Vasisht</surname><given-names>V</given-names></name><name><surname>Theodosopoulou</surname><given-names>E</given-names></name><name><surname>Oberstein</surname><given-names>A</given-names></name><name><surname>Small</surname><given-names>S</given-names></name></person-group><year>2002</year><article-title>Anterior repression of a <italic>Drosophila</italic> stripe enhancer requires three position-specific mechanisms</article-title><source>Development</source><volume>129</volume><fpage>4931</fpage><lpage>40</lpage><ext-link ext-link-type="uri" xlink:href="http://dev.biologists.org/content/129/21/4931.long">http://dev.biologists.org/content/129/21/4931.long</ext-link></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Arce</surname><given-names>L</given-names></name><name><surname>Yokoyama</surname><given-names>NN</given-names></name><name><surname>Waterman</surname><given-names>ML</given-names></name></person-group><year>2006</year><article-title>Diversity of LEF/TCF action in development and disease</article-title><source>Oncogene</source><volume>25</volume><fpage>7492</fpage><lpage>504</lpage><pub-id pub-id-type="doi">10.1038/sj.onc.1210056</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Arnosti</surname><given-names>DN</given-names></name></person-group><year>2003</year><article-title>Analysis and function of transcriptional regulatory elements: insights from <italic>Drosophila</italic></article-title><source>Annu Rev Entomol</source><volume>48</volume><fpage>579</fpage><lpage>602</lpage><pub-id pub-id-type="doi">10.1146/annurev.ento.48.091801.112749</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Arnosti</surname><given-names>DN</given-names></name><name><surname>Barolo</surname><given-names>S</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name><name><surname>Small</surname><given-names>S</given-names></name></person-group><year>1996</year><article-title>The eve stripe 2 enhancer employs multiple modes of transcriptional synergy</article-title><source>Development</source><volume>122</volume><fpage>205</fpage><lpage>14</lpage><ext-link ext-link-type="uri" xlink:href="http://dev.biologists.org/content/122/1/205.abstract">http://dev.biologists.org/content/122/1/205.abstract</ext-link></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ashe</surname><given-names>HL</given-names></name><name><surname>Briscoe</surname><given-names>J</given-names></name></person-group><year>2006</year><article-title>The interpretation of morphogen gradients</article-title><source>Development</source><volume>133</volume><fpage>385</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1242/dev.02238</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bellaïche</surname><given-names>Y</given-names></name><name><surname>Bandyopadhyay</surname><given-names>R</given-names></name><name><surname>Desplan</surname><given-names>C</given-names></name><name><surname>Dostatni</surname><given-names>N</given-names></name></person-group><year>1996</year><article-title>Neither the homeodomain nor the activation domain of Bicoid is specifically required for its down-regulation by the Torso receptor tyrosine kinase cascade</article-title><source>Development</source><volume>122</volume><fpage>3499</fpage><lpage>508</lpage></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Berman</surname><given-names>BP</given-names></name><name><surname>Nibu</surname><given-names>Y</given-names></name><name><surname>Pfeiffer</surname><given-names>BD</given-names></name><name><surname>Tomancak</surname><given-names>P</given-names></name><name><surname>Celniker</surname><given-names>SE</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name><etal/></person-group><year>2002</year><article-title>Exploiting transcription factor binding site clustering to identify cis-regulatory modules involved in pattern formation in the <italic>Drosophila</italic> genome</article-title><source>Proc Natl Acad Sci USA</source><volume>99</volume><fpage>757</fpage><lpage>62</lpage><pub-id pub-id-type="doi">10.1073/pnas.231608898</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bieler</surname><given-names>J</given-names></name><name><surname>Pozzorini</surname><given-names>C</given-names></name><name><surname>Naef</surname><given-names>F</given-names></name></person-group><year>2011</year><article-title>Whole-embryo modeling of early segmentation in <italic>Drosophila</italic> identifies robust and fragile expression domains</article-title><source>Biophys J</source><volume>101</volume><fpage>287</fpage><lpage>96</lpage><pub-id pub-id-type="doi">10.1016/j.bpj.2011.05.060</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Borok</surname><given-names>MJ</given-names></name><name><surname>Tran</surname><given-names>DA</given-names></name><name><surname>Ho</surname><given-names>MC</given-names></name><name><surname>Drewell</surname><given-names>RA</given-names></name></person-group><year>2010</year><article-title>Dissecting the regulatory switches of development: lessons from enhancer evolution in <italic>Drosophila</italic></article-title><source>Development</source><volume>137</volume><fpage>5</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1242/dev.036160</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bulger</surname><given-names>M</given-names></name><name><surname>Groudine</surname><given-names>M</given-names></name></person-group><year>2011</year><article-title>Functional and mechanistic diversity of distal transcription enhancers</article-title><source>Cell</source><volume>144</volume><fpage>327</fpage><lpage>39</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2011.01.024</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Clyde</surname><given-names>DE</given-names></name><name><surname>Corado</surname><given-names>MS</given-names></name><name><surname>Wu</surname><given-names>X</given-names></name><name><surname>Pare</surname><given-names>A</given-names></name><name><surname>Papatsenko</surname><given-names>D</given-names></name><name><surname>Small</surname><given-names>S</given-names></name></person-group><year>2003</year><article-title>A self-organizing system of repressor gradients establishes segmental complexity in <italic>Drosophila</italic></article-title><source>Nature</source><volume>426</volume><fpage>849</fpage><lpage>53</lpage><pub-id pub-id-type="doi">10.1038/nature02189</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Crick</surname><given-names>F</given-names></name></person-group><year>1970</year><article-title>Diffusion in embryogenesis</article-title><source>Nature</source><volume>225</volume><fpage>420</fpage><lpage>2</lpage><pub-id pub-id-type="doi">10.1038/225420a0</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Davidson</surname><given-names>EH</given-names></name></person-group><year>2010</year><article-title>Emerging properties of animal gene regulatory networks</article-title><source>Nature</source><volume>468</volume><fpage>911</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1038/nature09645</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dean</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>In the loop: long range chromatin interactions and gene regulation</article-title><source>Brief Funct Genomics</source><volume>10</volume><fpage>3</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1093/bfgp/elq033</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><collab>The ENCODE Project Consortium</collab><year>2012</year><article-title>An integrated encyclopedia of DNA elements in the human genome</article-title><source>Nature</source><volume>489</volume><fpage>57</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1038/nature11247</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fowlkes</surname><given-names>CC</given-names></name><name><surname>Hendriks</surname><given-names>CL</given-names></name><name><surname>Keränen</surname><given-names>SV</given-names></name><name><surname>Weber</surname><given-names>GH</given-names></name><name><surname>Rübel</surname><given-names>O</given-names></name><name><surname>Huang</surname><given-names>MY</given-names></name><etal/></person-group><year>2008</year><article-title>A quantitative spatiotemporal atlas of gene expression in the <italic>Drosophila</italic> blastoderm</article-title><source>Cell</source><volume>133</volume><fpage>364</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2008.01.053</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frasch</surname><given-names>M</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1987</year><article-title>Complementary patterns of even-skipped and fushi tarazu expression involve their differential regulation by a common set of segmentation genes in <italic>Drosophila</italic></article-title><source>Genes Dev</source><volume>1</volume><fpage>981</fpage><lpage>95</lpage><pub-id pub-id-type="doi">10.1101/gad.1.9.981</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fujioka</surname><given-names>M</given-names></name><name><surname>Emi-Sarker</surname><given-names>Y</given-names></name><name><surname>Yusibova</surname><given-names>GL</given-names></name><name><surname>Goto</surname><given-names>T</given-names></name><name><surname>Jaynes</surname><given-names>JB</given-names></name></person-group><year>1999</year><article-title>Analysis of an even-skipped rescue transgene reveals both composite and discrete neuronal and early blastoderm enhancers, and multi-stripe positioning by gap gene repressor gradients</article-title><source>Development</source><volume>126</volume><fpage>2527</fpage><lpage>38</lpage></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gerstein</surname><given-names>MB</given-names></name><name><surname>Lu</surname><given-names>ZJ</given-names></name><name><surname>Van Nostrand</surname><given-names>EL</given-names></name><name><surname>Cheng</surname><given-names>C</given-names></name><name><surname>Arshinoff</surname><given-names>BI</given-names></name><name><surname>Liu</surname><given-names>T</given-names></name><etal/></person-group><year>2010</year><article-title>Integrative analysis of the <italic>Caenorhabditis elegans</italic> genome by the modENCODE project</article-title><source>Science</source><volume>330</volume><fpage>1775</fpage><lpage>87</lpage><pub-id pub-id-type="doi">10.1126/science.1196914</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Goto</surname><given-names>T</given-names></name><name><surname>Macdonald</surname><given-names>P</given-names></name><name><surname>Maniatis</surname><given-names>T</given-names></name></person-group><year>1989</year><article-title>Early and late periodic patterns of even skipped expression are controlled by distinct regulatory elements that respond to different spatial cues</article-title><source>Cell</source><volume>57</volume><fpage>413</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1016/0092-8674(89)90916-1</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harding</surname><given-names>K</given-names></name><name><surname>Hoey</surname><given-names>T</given-names></name><name><surname>Warrior</surname><given-names>R</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1989</year><article-title>Autoregulatory and gap gene response elements of the even-skipped promoter of <italic>Drosophila</italic></article-title><source>EMBO J</source><volume>8</volume><fpage>1205</fpage><lpage>12</lpage></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hare</surname><given-names>EE</given-names></name><name><surname>Peterson</surname><given-names>BK</given-names></name><name><surname>Iyer</surname><given-names>VN</given-names></name><name><surname>Meier</surname><given-names>R</given-names></name><name><surname>Eisen</surname><given-names>MB</given-names></name></person-group><year>2008</year><article-title>Sepsid even-skipped enhancers are functionally conserved in <italic>Drosophila</italic> despite lack of sequence conservation</article-title><source>PLOS Genet</source><volume>4</volume><fpage>e1000106</fpage><pub-id pub-id-type="doi">10.1371/journal.pgen.1000106</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hasty</surname><given-names>J</given-names></name><name><surname>McMillen</surname><given-names>D</given-names></name><name><surname>Isaacs</surname><given-names>F</given-names></name><name><surname>Collins</surname><given-names>JJ</given-names></name></person-group><year>2001</year><article-title>Computational studies of gene regulatory networks: in numero molecular biology</article-title><source>Nat Rev Genet</source><volume>2</volume><fpage>268</fpage><lpage>79</lpage><pub-id pub-id-type="doi">10.1038/35066056</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>He</surname><given-names>X</given-names></name><name><surname>Samee</surname><given-names>MA</given-names></name><name><surname>Blatti</surname><given-names>C</given-names></name><name><surname>Sinha</surname><given-names>S</given-names></name></person-group><year>2010</year><article-title>Thermodynamics-based models of transcriptional regulation by enhancers: the roles of synergistic activation, cooperative binding and short-range repression</article-title><source>PLOS Comput Biol</source><volume>6</volume><fpage>e1000935</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1000935</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hülskamp</surname><given-names>M</given-names></name><name><surname>Lukowitz</surname><given-names>W</given-names></name><name><surname>Beermann</surname><given-names>A</given-names></name><name><surname>Glaser</surname><given-names>G</given-names></name><name><surname>Tautz</surname><given-names>D</given-names></name></person-group><year>1994</year><article-title>Differential regulation of target genes by different alleles of the segmentation gene hunchback in <italic>Drosophila</italic></article-title><source>Genetics</source><volume>138</volume><fpage>125</fpage><lpage>34</lpage></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hülskamp</surname><given-names>M</given-names></name><name><surname>Pfeifle</surname><given-names>C</given-names></name><name><surname>Tautz</surname><given-names>D</given-names></name></person-group><year>1990</year><article-title>A morphogenetic gradient of hunchback protein organizes the expression of the gap genes kruppel and knirps in the early <italic>Drosophila</italic> embryo</article-title><source>Nature</source><volume>346</volume><fpage>577</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1038/346577a0</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hülskamp</surname><given-names>M</given-names></name><name><surname>Schroder</surname><given-names>C</given-names></name><name><surname>Pfeifle</surname><given-names>C</given-names></name><name><surname>Jackle</surname><given-names>H</given-names></name><name><surname>Tautz</surname><given-names>D</given-names></name></person-group><year>1989</year><article-title>Posterior segmentation of the <italic>Drosophila</italic> embryo in the absence of a maternal posterior organizer gene</article-title><source>Nature</source><volume>338</volume><fpage>629</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1038/338629a0</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jaeger</surname><given-names>J</given-names></name><name><surname>Sharp</surname><given-names>DH</given-names></name><name><surname>Reinitz</surname><given-names>J</given-names></name></person-group><year>2007</year><article-title>Known maternal gradients are not sufficient for the establishment of gap domains in <italic>Drosophila melanogaster</italic></article-title><source>Mech Dev</source><volume>124</volume><fpage>108</fpage><lpage>28</lpage><pub-id pub-id-type="doi">10.1016/j.mod.2006.11.001</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jaeger</surname><given-names>J</given-names></name><name><surname>Surkova</surname><given-names>S</given-names></name><name><surname>Blagov</surname><given-names>M</given-names></name><name><surname>Janssens</surname><given-names>H</given-names></name><name><surname>Kosman</surname><given-names>D</given-names></name><name><surname>Kozlov</surname><given-names>KN</given-names></name><etal/></person-group><year>2004</year><article-title>Dynamic control of positional information in the early <italic>Drosophila</italic> embryo</article-title><source>Nature</source><volume>430</volume><fpage>368</fpage><lpage>71</lpage><pub-id pub-id-type="doi">10.1038/nature02678</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Janody</surname><given-names>F</given-names></name><name><surname>Sturny</surname><given-names>R</given-names></name><name><surname>Catala</surname><given-names>F</given-names></name><name><surname>Desplan</surname><given-names>C</given-names></name><name><surname>Dostatni</surname><given-names>N</given-names></name></person-group><year>2000</year><article-title>Phosphorylation of bicoid on MAP-kinase sites: contribution to its interaction with the torso pathway</article-title><source>Development</source><volume>127</volume><fpage>279</fpage><lpage>89</lpage></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Janody</surname><given-names>F</given-names></name><name><surname>Sturny</surname><given-names>R</given-names></name><name><surname>Schaeffer</surname><given-names>V</given-names></name><name><surname>Azou</surname><given-names>Y</given-names></name><name><surname>Dostatni</surname><given-names>N</given-names></name></person-group><year>2001</year><article-title>Two distinct domains of Bicoid mediate its transcriptional downregulation by the Torso pathway</article-title><source>Development</source><volume>128</volume><fpage>2281</fpage><lpage>90</lpage></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Janssens</surname><given-names>H</given-names></name><name><surname>Hou</surname><given-names>S</given-names></name><name><surname>Jaeger</surname><given-names>J</given-names></name><name><surname>Kim</surname><given-names>AR</given-names></name><name><surname>Myasnikova</surname><given-names>E</given-names></name><name><surname>Sharp</surname><given-names>D</given-names></name><etal/></person-group><year>2006</year><article-title>Quantitative and predictive model of transcriptional control of the <italic>Drosophila melanogaster</italic> even skipped gene</article-title><source>Nat Genet</source><volume>38</volume><fpage>1159</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1038/ng1886</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Junion</surname><given-names>G</given-names></name><name><surname>Spivakov</surname><given-names>M</given-names></name><name><surname>Girardot</surname><given-names>C</given-names></name><name><surname>Braun</surname><given-names>M</given-names></name><name><surname>Gustafson</surname><given-names>EH</given-names></name><name><surname>Birney</surname><given-names>E</given-names></name><etal/></person-group><year>2012</year><article-title>A transcription factor collective defines cardiac cell fate and reflects lineage history</article-title><source>Cell</source><volume>148</volume><fpage>473</fpage><lpage>86</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2012.01.030</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kalisky</surname><given-names>T</given-names></name><name><surname>Blainey</surname><given-names>P</given-names></name><name><surname>Quake</surname><given-names>SR</given-names></name></person-group><year>2011</year><article-title>Genomic analysis at the single-cell level</article-title><source>Annu Rev Genet</source><volume>45</volume><fpage>431</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1146/annurev-genet-102209-163607</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kazemian</surname><given-names>M</given-names></name><name><surname>Blatti</surname><given-names>C</given-names></name><name><surname>Richards</surname><given-names>A</given-names></name><name><surname>McCutchan</surname><given-names>M</given-names></name><name><surname>Wakabayashi-Ito</surname><given-names>N</given-names></name><name><surname>Hammonds</surname><given-names>AS</given-names></name><etal/></person-group><year>2010</year><article-title>Quantitative analysis of the <italic>Drosophila</italic> segmentation regulatory network using pattern generating potentials</article-title><source>PLOS Biol</source><volume>8</volume><fpage>e1000456</fpage><pub-id pub-id-type="doi">10.1371/journal.pbio.1000456</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kelley</surname><given-names>KM</given-names></name><name><surname>Wang</surname><given-names>H</given-names></name><name><surname>Ratnam</surname><given-names>M</given-names></name></person-group><year>2003</year><article-title>Dual regulation of ets-activated gene expression by SP1</article-title><source>Gene</source><volume>307</volume><fpage>87</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.1016/S0378-1119(03)00445-1</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Keränen</surname><given-names>SV</given-names></name><name><surname>Fowlkes</surname><given-names>CC</given-names></name><name><surname>Luengo Hendriks</surname><given-names>CL</given-names></name><name><surname>Sudar</surname><given-names>D</given-names></name><name><surname>Knowles</surname><given-names>DW</given-names></name><name><surname>Malik</surname><given-names>J</given-names></name><etal/></person-group><year>2006</year><article-title>Three-dimensional morphology and gene expression in the <italic>Drosophila</italic> blastoderm at cellular resolution II: dynamics</article-title><source>Genome Biol</source><volume>7</volume><fpage>R124</fpage><pub-id pub-id-type="doi">10.1186/gb-2006-7-12-r124</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kerszberg</surname><given-names>M</given-names></name><name><surname>Wolpert</surname><given-names>L</given-names></name></person-group><year>2007</year><article-title>Specifying positional information in the embryo: looking beyond morphogens</article-title><source>Cell</source><volume>130</volume><fpage>205</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2007.06.038</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>AR</given-names></name><name><surname>Martinez</surname><given-names>C</given-names></name><name><surname>Ionides</surname><given-names>J</given-names></name><name><surname>Ramos</surname><given-names>AF</given-names></name><name><surname>Ludwig</surname><given-names>MZ</given-names></name><name><surname>Ogawa</surname><given-names>N</given-names></name><etal/></person-group><year>2013</year><article-title>Rearrangements of 2.5 kilobases of noncoding DNA from the <italic>Drosophila</italic> even-skipped locus define predictive rules of genomic cis-regulatory logic</article-title><source>PLOS Genet</source><volume>9</volume><fpage>e1003243</fpage><pub-id pub-id-type="doi">10.1371/journal.pgen.1003243</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lemon</surname><given-names>B</given-names></name><name><surname>Tjian</surname><given-names>R</given-names></name></person-group><year>2000</year><article-title>Orchestrated response: a symphony of transcription factors for gene control</article-title><source>Genes Dev</source><volume>14</volume><fpage>2551</fpage><lpage>69</lpage><pub-id pub-id-type="doi">10.1101/gad.831000</pub-id></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lenhard</surname><given-names>B</given-names></name><name><surname>Sandelin</surname><given-names>A</given-names></name><name><surname>Carninci</surname><given-names>P</given-names></name></person-group><year>2012</year><article-title>Metazoan promoters: emerging characteristics and insights into transcriptional regulation</article-title><source>Nat Rev Genet</source><volume>13</volume><fpage>233</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1038/nrg3163</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>2008</year><article-title>A systems view of <italic>Drosophila</italic> segmentation</article-title><source>Genome Biol</source><volume>9</volume><fpage>207</fpage><pub-id pub-id-type="doi">10.1186/gb-2008-9-2-207</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>2010</year><article-title>Transcriptional enhancers in animal development and evolution</article-title><source>Curr Biol</source><volume>20</volume><fpage>R754</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.1016/j.cub.2010.06.070</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lewis</surname><given-names>J</given-names></name></person-group><year>2008</year><article-title>From signals to patterns: space, time, and mathematics in developmental biology</article-title><source>Science</source><volume>322</volume><fpage>399</fpage><lpage>403</lpage><pub-id pub-id-type="doi">10.1126/science.1166154</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Logan</surname><given-names>CY</given-names></name><name><surname>Nusse</surname><given-names>R</given-names></name></person-group><year>2004</year><article-title>The Wnt signaling pathway in development and disease</article-title><source>Annu Rev Cell Dev Biol</source><volume>20</volume><fpage>781</fpage><lpage>810</lpage><pub-id pub-id-type="doi">10.1146/annurev.cellbio.20.010403.113126</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Luengo Hendriks</surname><given-names>CL</given-names></name><name><surname>Keränen</surname><given-names>SV</given-names></name><name><surname>Fowlkes</surname><given-names>CC</given-names></name><name><surname>Simirenko</surname><given-names>L</given-names></name><name><surname>Weber</surname><given-names>GH</given-names></name><name><surname>DePace</surname><given-names>AH</given-names></name><etal/></person-group><year>2006</year><article-title>Three-dimensional morphology and gene expression in the <italic>Drosophila</italic> blastoderm at cellular resolution I: data acquisition pipeline</article-title><source>Genome Biol</source><volume>7</volume><fpage>R123</fpage><pub-id pub-id-type="doi">10.1186/gb-2006-7-12-r123</pub-id></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Margolis</surname><given-names>JS</given-names></name><name><surname>Borowsky</surname><given-names>ML</given-names></name><name><surname>Steingrimsson</surname><given-names>E</given-names></name><name><surname>Shim</surname><given-names>CW</given-names></name><name><surname>Lengyel</surname><given-names>JA</given-names></name><name><surname>Posakony</surname><given-names>JW</given-names></name></person-group><year>1995</year><article-title>Posterior stripe expression of hunchback is driven from two promoters by a common enhancer element</article-title><source>Development</source><volume>121</volume><fpage>3067</fpage><lpage>77</lpage></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><collab>The modENCODE Consortium</collab><year>2010</year><article-title>Identification of functional elements and regulatory circuits by <italic>Drosophila</italic> modENCODE</article-title><source>Science</source><volume>330</volume><fpage>1787</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.1126/science.1198374</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morán</surname><given-names>E</given-names></name><name><surname>Jiménez</surname><given-names>G</given-names></name></person-group><year>2006</year><article-title>The tailless nuclear receptor acts as a dedicated repressor in the early <italic>Drosophila</italic> embryo</article-title><source>Mol Cell Biol</source><volume>26</volume><fpage>3446</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1128/MCB.26.9.3446-3454.2006</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Négre</surname><given-names>N</given-names></name><name><surname>Brown</surname><given-names>CD</given-names></name><name><surname>Ma</surname><given-names>L</given-names></name><name><surname>Bristow</surname><given-names>CA</given-names></name><name><surname>Miller</surname><given-names>SW</given-names></name><name><surname>Wagner</surname><given-names>U</given-names></name><etal/></person-group><year>2011</year><article-title>A cis-regulatory map of the <italic>Drosophila</italic> genome</article-title><source>Nature</source><volume>471</volume><fpage>527</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1038/nature09990</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nüsslein-Volhard</surname><given-names>C</given-names></name><name><surname>Wieschaus</surname><given-names>E</given-names></name></person-group><year>1980</year><article-title>Mutations affecting segment number and polarity in <italic>Drosophila</italic></article-title><source>Nature</source><volume>287</volume><fpage>795</fpage><lpage>801</lpage><pub-id pub-id-type="doi">10.1038/287795a0</pub-id></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oates</surname><given-names>AC</given-names></name><name><surname>Gorfinkiel</surname><given-names>N</given-names></name><name><surname>González-Gaitán</surname><given-names>M</given-names></name><name><surname>Heisenberg</surname><given-names>CP</given-names></name></person-group><year>2009</year><article-title>Quantitative approaches in developmental biology</article-title><source>Nat Rev Genet</source><volume>10</volume><fpage>517</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1038/nrg2548</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ohler</surname><given-names>U</given-names></name><name><surname>Wassarman</surname><given-names>DA</given-names></name></person-group><year>2010</year><article-title>Promoting developmental transcription</article-title><source>Development</source><volume>137</volume><fpage>15</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1242/dev.035493</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ong</surname><given-names>CT</given-names></name><name><surname>Corces</surname><given-names>VG</given-names></name></person-group><year>2011</year><article-title>Enhancer function: new insights into the regulation of tissue-specific gene expression</article-title><source>Nat Rev Genet</source><volume>12</volume><fpage>283</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1038/nrg2957</pub-id></element-citation></ref><ref id="bib54a"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Papatsenko</surname><given-names>D</given-names></name></person-group><year>2009</year><article-title>Stripe formation in the early fly embryo: principles, models, and networks</article-title><source>BioEssays</source><volume>31</volume><fpage>1172</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1002/bies.200900096</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Papatsenko</surname><given-names>D</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>2011</year><article-title>The <italic>Drosophila</italic> gap gene network is composed of two parallel toggle switches</article-title><source>PLOS ONE</source><volume>6</volume><fpage>e21145</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0021145</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Papatsenko</surname><given-names>D</given-names></name><name><surname>Levine</surname><given-names>MS</given-names></name></person-group><year>2008</year><article-title>Dual regulation by the Hunchback gradient in the <italic>Drosophila</italic> embryo</article-title><source>Proc Natl Acad Sci USA</source><volume>105</volume><fpage>2901</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1073/pnas.0711941105</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><collab>R Core Team</collab><year>2012</year><article-title>R: a language and environment for statistical computing</article-title><comment>(Vienna: R Foundation for Statistical Computing)</comment><ext-link ext-link-type="uri" xlink:href="http://www.r-project.org/">http://www.r-project.org/</ext-link></element-citation></ref><ref id="bib60"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reeves</surname><given-names>GT</given-names></name><name><surname>Muratov</surname><given-names>CB</given-names></name><name><surname>Schüpbach</surname><given-names>T</given-names></name><name><surname>Shvartsman</surname><given-names>SY</given-names></name></person-group><year>2006</year><article-title>Quantitative models of developmental pattern formation</article-title><source>Dev Cell</source><volume>11</volume><fpage>289</fpage><lpage>300</lpage><pub-id pub-id-type="doi">10.1016/j.devcel.2006.08.006</pub-id></element-citation></ref><ref id="bib61"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ripley</surname><given-names>BD</given-names></name></person-group><year>2008</year><source>Pattern Recognition and Neural Networks</source><publisher-loc>Cambridge</publisher-loc><publisher-name>Cambridge University Press</publisher-name></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schroeder</surname><given-names>MD</given-names></name><name><surname>Greer</surname><given-names>C</given-names></name><name><surname>Gaul</surname><given-names>U</given-names></name></person-group><year>2011</year><article-title>How to make stripes: deciphering the transition from non-periodic to periodic patterns in <italic>Drosophila</italic> segmentation</article-title><source>Development</source><volume>138</volume><fpage>3067</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.1242/dev.062141</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schroeder</surname><given-names>MD</given-names></name><name><surname>Pearce</surname><given-names>M</given-names></name><name><surname>Fak</surname><given-names>J</given-names></name><name><surname>Fan</surname><given-names>H</given-names></name><name><surname>Unnerstall</surname><given-names>U</given-names></name><name><surname>Emberly</surname><given-names>E</given-names></name><etal/></person-group><year>2004</year><article-title>Transcriptional control in the segmentation gene network of <italic>Drosophila</italic></article-title><source>PLOS Biol</source><volume>2</volume><fpage>E271</fpage><pub-id pub-id-type="doi">10.1371/journal.pbio.0020271</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schulz</surname><given-names>C</given-names></name><name><surname>Tautz</surname><given-names>D</given-names></name></person-group><year>1994</year><article-title>Autonomous concentration-dependent activation and repression of kruppel by hunchback in the <italic>Drosophila</italic> embryo</article-title><source>Development</source><volume>120</volume><fpage>3043</fpage><lpage>9</lpage></element-citation></ref><ref id="bib66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Segal</surname><given-names>E</given-names></name><name><surname>Raveh-Sadka</surname><given-names>T</given-names></name><name><surname>Schroeder</surname><given-names>M</given-names></name><name><surname>Unnerstall</surname><given-names>U</given-names></name><name><surname>Gaul</surname><given-names>U</given-names></name></person-group><year>2008</year><article-title>Predicting expression patterns from regulatory sequence in <italic>Drosophila</italic> segmentation</article-title><source>Nature</source><volume>451</volume><fpage>535</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1038/nature06496</pub-id></element-citation></ref><ref id="bib67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Segal</surname><given-names>E</given-names></name><name><surname>Widom</surname><given-names>J</given-names></name></person-group><year>2009</year><article-title>From DNA sequence to transcriptional behaviour: a quantitative approach</article-title><source>Nat Rev Genet</source><volume>10</volume><fpage>443</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/nrg2591</pub-id></element-citation></ref><ref id="bib68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Simpson-Brose</surname><given-names>M</given-names></name><name><surname>Treisman</surname><given-names>J</given-names></name><name><surname>Desplan</surname><given-names>C</given-names></name></person-group><year>1994</year><article-title>Synergy between the hunchback and bicoid morphogens is required for anterior patterning in <italic>Drosophila</italic></article-title><source>Cell</source><volume>78</volume><fpage>855</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1016/S0092-8674(94)90622-X</pub-id></element-citation></ref><ref id="bib69"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname><given-names>N</given-names></name><name><surname>Zhu</surname><given-names>W</given-names></name><name><surname>Hanes</surname><given-names>SD</given-names></name></person-group><year>2005</year><article-title>Sap18 is required for the maternal gene bicoid to direct anterior patterning in <italic>Drosophila melanogaster</italic></article-title><source>Dev Biol</source><volume>278</volume><fpage>242</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1016/j.ydbio.2004.11.011</pub-id></element-citation></ref><ref id="bib70"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Small</surname><given-names>S</given-names></name><name><surname>Blair</surname><given-names>A</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1992</year><article-title>Regulation of even-skipped stripe 2 in the <italic>Drosophila</italic> embryo</article-title><source>EMBO J</source><volume>11</volume><fpage>4047</fpage><lpage>57</lpage></element-citation></ref><ref id="bib71"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Small</surname><given-names>S</given-names></name><name><surname>Blair</surname><given-names>A</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1996</year><article-title>Regulation of two pair-rule stripes by a single enhancer in the <italic>Drosophila</italic> embryo</article-title><source>Dev Biol</source><volume>175</volume><fpage>314</fpage><lpage>24</lpage><pub-id pub-id-type="doi">10.1006/dbio.1996.0117</pub-id></element-citation></ref><ref id="bib72"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Small</surname><given-names>S</given-names></name><name><surname>Kraut</surname><given-names>R</given-names></name><name><surname>Hoey</surname><given-names>T</given-names></name><name><surname>Warrior</surname><given-names>R</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1991</year><article-title>Transcriptional regulation of a pair-rule stripe in <italic>Drosophila</italic></article-title><source>Genes Dev</source><volume>5</volume><fpage>827</fpage><lpage>39</lpage><pub-id pub-id-type="doi">10.1101/gad.5.5.827</pub-id></element-citation></ref><ref id="bib73"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Spitz</surname><given-names>F</given-names></name><name><surname>Furlong</surname><given-names>EE</given-names></name></person-group><year>2012</year><article-title>Transcription factors: from enhancer binding to developmental control</article-title><source>Nat Rev Genet</source><volume>13</volume><fpage>613</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1038/nrg3207</pub-id></element-citation></ref><ref id="bib74"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stanojevic</surname><given-names>D</given-names></name><name><surname>Small</surname><given-names>S</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name></person-group><year>1991</year><article-title>Regulation of a segmentation stripe by overlapping activators and repressors in the <italic>Drosophila</italic> embryo</article-title><source>Science</source><volume>254</volume><fpage>1385</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1126/science.1683715</pub-id></element-citation></ref><ref id="bib75"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Struffi</surname><given-names>P</given-names></name><name><surname>Corado</surname><given-names>M</given-names></name><name><surname>Kaplan</surname><given-names>L</given-names></name><name><surname>Yu</surname><given-names>D</given-names></name><name><surname>Rushlow</surname><given-names>C</given-names></name><name><surname>Small</surname><given-names>S</given-names></name></person-group><year>2011</year><article-title>Combinatorial activation and concentration-dependent repression of the <italic>Drosophila</italic> even skipped stripe 3+7 enhancer</article-title><source>Development</source><volume>138</volume><fpage>4291</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1242/dev.065987</pub-id></element-citation></ref><ref id="bib76"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Struhl</surname><given-names>K</given-names></name></person-group><year>2001</year><article-title>Gene regulation. a paradigm for precision</article-title><source>Science</source><volume>293</volume><fpage>1054</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1126/science.1064050</pub-id></element-citation></ref><ref id="bib77"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tomlin</surname><given-names>CJ</given-names></name><name><surname>Axelrod</surname><given-names>JD</given-names></name></person-group><year>2007</year><article-title>Biology by numbers: mathematical modelling in developmental biology</article-title><source>Nat Rev Genet</source><volume>8</volume><fpage>331</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1038/nrg2098</pub-id></element-citation></ref><ref id="bib78"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Varjosalo</surname><given-names>M</given-names></name><name><surname>Taipale</surname><given-names>J</given-names></name></person-group><year>2008</year><article-title>Hedgehog: functions and mechanisms</article-title><source>Genes Dev</source><volume>22</volume><fpage>2454</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.1101/gad.1693608</pub-id></element-citation></ref><ref id="bib79"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Whitington</surname><given-names>T</given-names></name><name><surname>Jolma</surname><given-names>A</given-names></name><name><surname>Taipale</surname><given-names>J</given-names></name></person-group><year>2011</year><article-title>Beyond the balance of activator and repressor</article-title><source>Sci Signal</source><volume>4</volume><fpage>pe29</fpage><pub-id pub-id-type="doi">10.1126/scisignal.2002183</pub-id></element-citation></ref><ref id="bib80"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wilczynski</surname><given-names>B</given-names></name><name><surname>Furlong</surname><given-names>EE</given-names></name></person-group><year>2010</year><article-title>Challenges for modeling global gene regulatory networks during development: insights from <italic>Drosophila</italic></article-title><source>Dev Biol</source><volume>340</volume><fpage>161</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1016/j.ydbio.2009.10.032</pub-id></element-citation></ref><ref id="bib81"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolpert</surname><given-names>L</given-names></name></person-group><year>1969</year><article-title>Positional information and the spatial pattern of cellular differentiation</article-title><source>J Theor Biol</source><volume>25</volume><fpage>1</fpage><lpage>47</lpage><pub-id pub-id-type="doi">10.1016/S0022-5193(69)80016-0</pub-id></element-citation></ref><ref id="bib82"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolpert</surname><given-names>L</given-names></name></person-group><year>1996</year><article-title>One hundred years of positional information</article-title><source>Trends Genet</source><volume>12</volume><fpage>359</fpage><lpage>64</lpage><pub-id pub-id-type="doi">10.1016/S0168-9525(96)80019-9</pub-id></element-citation></ref><ref id="bib83"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wolpert</surname><given-names>L</given-names></name></person-group><year>2011</year><article-title>Positional information and patterning revisited</article-title><source>J Theor Biol</source><volume>269</volume><fpage>359</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1016/j.jtbi.2010.10.034</pub-id></element-citation></ref><ref id="bib84"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wunderlich</surname><given-names>Z</given-names></name><name><surname>Bragdon</surname><given-names>MD</given-names></name><name><surname>Eckenrode</surname><given-names>KB</given-names></name><name><surname>Lydiard-Martin</surname><given-names>T</given-names></name><name><surname>Pearl-Waserman</surname><given-names>S</given-names></name><name><surname>Depace</surname><given-names>AH</given-names></name></person-group><year>2012</year><article-title>Dissecting sources of quantitative gene expression pattern divergence between <italic>Drosophila</italic> species</article-title><source>Mol Syst Biol</source><volume>8</volume><fpage>604</fpage><pub-id pub-id-type="doi">10.1038/msb.2012.35</pub-id></element-citation></ref><ref id="bib85"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname><given-names>C</given-names></name><name><surname>York</surname><given-names>A</given-names></name><name><surname>Yang</surname><given-names>F</given-names></name><name><surname>Forsthoefel</surname><given-names>DJ</given-names></name><name><surname>Dave</surname><given-names>V</given-names></name><name><surname>Fu</surname><given-names>D</given-names></name><etal/></person-group><year>2002</year><article-title>The activity of the <italic>Drosophila</italic> morphogenetic protein bicoid is inhibited by a domain located outside its homeodomain</article-title><source>Development</source><volume>129</volume><fpage>1669</fpage><lpage>80</lpage></element-citation></ref><ref id="bib86"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zinzen</surname><given-names>RP</given-names></name><name><surname>Girardot</surname><given-names>C</given-names></name><name><surname>Gagneur</surname><given-names>J</given-names></name><name><surname>Braun</surname><given-names>M</given-names></name><name><surname>Furlong</surname><given-names>EE</given-names></name></person-group><year>2009</year><article-title>Combinatorial binding predicts spatio-temporal cis-regulatory activity</article-title><source>Nature</source><volume>462</volume><fpage>65</fpage><lpage>70</lpage><pub-id pub-id-type="doi">10.1038/nature08531</pub-id></element-citation></ref><ref id="bib87"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zuo</surname><given-names>P</given-names></name><name><surname>Stanojević</surname><given-names>D</given-names></name><name><surname>Colgan</surname><given-names>J</given-names></name><name><surname>Han</surname><given-names>K</given-names></name><name><surname>Levine</surname><given-names>M</given-names></name><name><surname>Manley</surname><given-names>JL</given-names></name></person-group><year>1991</year><article-title>Activation and repression of transcription by the gap proteins hunchback and kruppel in cultured <italic>Drosophila</italic> cells</article-title><source>Genes Dev</source><volume>5</volume><fpage>254</fpage><lpage>64</lpage><pub-id pub-id-type="doi">10.1101/gad.5.2.254</pub-id></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.00522.028</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Guigo</surname><given-names>Roderic</given-names></name><role>Reviewing editor</role><aff><institution>Center for Genomic Regulation</institution>, <country>Spain</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elife.elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>Thank you for sending your work entitled “Cellular resolution models for <italic>even skipped</italic> regulation in the entire <italic>Drosophila</italic> embryo” for consideration at <italic>eLife</italic>. Your article has been favorably evaluated by a Senior editor and 3 reviewers, one of whom is a member of our Board of Reviewing Editors.</p><p>The following individuals responsible for the peer review of your submission each wish to reveal their identity: Roderic Guigo (Reviewing editor); Mike Levine (peer reviewer).</p><p>The Reviewing editor and the other reviewers discussed their comments before we reached this decision, and the Reviewing editor has assembled the following comments to help you prepare a revised submission.</p><p>1) Two of the reviewers raised concerns about the statistical methods employed to validate the model. Specifically, one of the reviewers stated that the conclusions should be drawn from a blind test or N-fold cross validation; however, the authors used this to test the model over-fitting as a separate section. Nevertheless, if we understood them correctly, the main conclusions presented are drawn from using the entire set of data points, including those used for training. This is circular, and the authors should clarify this point in the main text. The over-fitting assessment is not generally necessary here, as the number of parameters is considerably below the number of independent data points.</p><p>2) Two of the reviewers also suggested that the authors should further investigate the correlation between the input variables and test models including fewer variables. One of the reviewers suggested that you might use stepwise logistic regression as a way to select those variables that are truly informative. Using all combinations of 4 variables (in the case of stripe 2) did not seem the optimal way to infer the minimum combination of maximally informative variables.</p><p>3) Two of the reviewers also raised concerns about the way you attempted to simulate the effects of perturbation by manipulating the input signal of the regression model without changing its learned coefficients. One of the referees specifically believes that this practice is incorrect, simply because the model was learned and optimized based on 3 input signal, and should be re-optimized if you decided to remove one input. Again the model comparison should be conducted by a blind test, not all entire dataset used for training.</p><p>4) Two of the reviewers asked themselves whether it would not be of interest to test the stability of the models during development. One of the referees specifically encouraged the authors to extend their models to earlier stages of development, namely the first 20 min of nuclear cleavage cycle 14.</p><p>5) The utility of the models rely on their capacity to generate testable hypotheses. In this regard, one of the referees asked whether any novel prediction that could be experimentally tested has been derived from the model. This is an issue that should be made more explicit by the authors.</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.00522.029</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p><italic>1) Two of the reviewers raised concerns about the statistical methods employed to validate the model. Specifically, one of the reviewers stated that the conclusions should be drawn from a blind test or N-fold cross validation; however, the authors used this to test the model over-fitting as a separate section. Nevertheless, if we understood them correctly, the main conclusions presented are drawn from using the entire set of data points, including those used for training. This is circular, and the authors should clarify this point in the main text. The over-fitting assessment is not generally necessary here, as the number of parameters is considerably below the number of independent data points</italic>.</p><p>We thank the reviewers for their comments. Briefly, two reviewers suggest that the conclusions of the study should not be based on how well our model fits the training data (and hence recommend cross-validation or a blind test). Their concern is that this is circular, but whether it is or not depends on the conclusions being drawn. In this matter, it is perhaps best to understand our work as a regression model that describes the relationship observed between variables in a dataset—the Virtual Embryo—rather than as a machine-learning classifier whose expected performance on other data needs to be assessed in an unbiased way. For this reason, the first part of our work draws conclusions based on the fit to the data.</p><p>These data include the measurement of expression in the early embryo in the <italic>eve</italic> stripe of interest as well as in the nuclei that are outside any stripe. Our most important conclusion is that <italic>eve</italic> expression in these nuclei can be correctly separated into two classes using the measured concentrations of various transcription factors alone. This is biologically interesting and relevant—for instance, in its implications for positional information in the early embryo. Perhaps what is confusing is that in justifying these conclusions we make use of predictions across the whole embryo, rather than just on the training data. However, the intention here is not to test the generality of the model or its validity as a classifier of independent data, but rather to assess the applicability of the model across the whole embryo; that is, whether the information content in the other stripes is consistent with the current model (or, biologically-speaking, whether a different source of information would be necessary to control activation of the enhancer in these regions e.g. additional signaling, epigenetic state, etc). We have now clarified this in the main text.</p><p>We agree that an over-fitting assessment is not generally necessary, but nevertheless, it is useful to ask whether the statistical relationship expressed in the model is consistent across different subsets of the data. This was the primary point of the separate section where we presented the results of the random subsampling cross-validation test, as well as the tests where we considered other subsets, e.g., a narrow strip along the dorsoventral axis and the stripe and its neighbouring nuclei. Further, although our model up to this point in the manuscript demonstrates that transcription factor concentrations alone provide sufficient positional information, we were interested in whether the ability of our model to fit the data might be constrained in biologically relevant ways. It was for this reason that we also tested whether our model, with the known regulators, could fit any stripe. We presented these findings under the heading of “over-fitting”, but in light of the reviewers’ comments we clarified this in the section renamed “The model performs consistently across different subsets of the data”.</p><p>Going beyond these conclusions, we then make use of independent verifications, such as a comparison of our models’ predictions with mutant and misexpression data. In addition, in response to a reviewer’s request, we have added the predictions of our models at earlier time points. Although these data are from the BDTNP, they were not used during our models’ development, and hence this can be considered an independent test.</p><p><italic>2) Two of the reviewers also suggested that the authors should further investigate the correlation between the input variables and test models including fewer variables. One of the reviewers suggested that you might use stepwise logistic regression as a way to select those variables that are truly informative. Using all combinations of 4 variables (in the case of stripe 2) did not seem the optimal way to infer the minimum combination of maximally informative variables</italic>.</p><p>We did, in fact, explore stepwise logistic regression during model development, although these results were not reported in the original paper. We found that stepwise selection was a successful procedure for finding putative regulators, but that it generally includes more regulators than necessary for a good visual fit (for example, a stepwise selection procedure for a linear model of <italic>eve 3+7</italic> with the Bayesian Information Criterion finds 15 regulators). The stopping point (i.e., the penalty for adding an extra parameter) is effectively arbitrary in this case, or at least difficult to determine <italic>a priori</italic> in a justifiable manner. Since we are interested in a minimal model that can explain the observed expression and found that four regulators is sufficient, we could put an upper bound on the number of regulators to consider in our models, at least as a starting point.</p><p>In practice, though, we are also interested in models that contain the key regulators, and in the case of <italic>eve 2</italic>, there are four. So, for example, the best scoring model for <italic>eve 2</italic> with three regulators (Hb, Gt and Bcd) can produce a reasonable although less sharp fit than when Kr is included, but making use of this model would restrict our ability to consider <italic>Kr</italic> mutant data. For <italic>eve 3+7</italic>, the best scoring linear model with three regulators (Gt, kni and tll) does not include Hb, an important regulator. It is also worth noting that these three regulators are the same as those picked up in our regulatory discovery method (<xref ref-type="fig" rid="fig4s6">Figure 4—figure supplement 6</xref>).</p><p>Further, the goal of our discovery method was not simply to pick the best four (or three) regulator models (although these are reasonable). Instead, we are interested to see which regulators work together to control the spatial pattern observed, such as when two repressors define the borders of a stripe. In this case, it is valuable to consider how a transcription factor performs in the context of other transcription factors. This is not taken into account in stepwise selection. Instead, our approach rigorously compares the contribution of each pair of transcription factors in the context of the most informative two-regulator model from the remaining regulators. This, as we show, clearly highlights informative regulators, which are indeed able to predict expression in the early embryo. Nevertheless, we do not claim to have found all informative regulators, but rather propose a minimal model that works under the conditions analysed.</p><p><italic>3) Two of the reviewers also raised concerns about the way you attempted to simulate the effects of perturbation by manipulating the input signal of the regression model without changing its learned coefficients. One of the referees specifically believes that this practice is incorrect, simply because the model was learned and optimized based on 3 input signal, and should be re-optimized if you decided to remove one input. Again the model comparison should be conducted by a blind test, not all entire dataset used for training</italic>.</p><p>Briefly, two reviewers suggest that if we wish to manipulate the input signals (i.e., in the perturbation tests), we should re-optimise the parameters under these new conditions. We agree this would give the models a fairer chance of classifying the data points correctly; this would be particularly important if our goal was to consider how a model with fewer inputs might perform, or how the classification approach might work with different data.</p><p>However, at this point in our analysis we have already accepted the models as plausible, and we are rather considering the hypothesis that the selected models represent the underlying biological processes. The purpose in modifying the inputs to these models is to compare their predictions with the results of experimental perturbations. In these experiments the intention is to modify one of the molecular inputs to the enhancer while leaving the others unchanged. For this reason, to compare like with like, it is important that we do not change the other parameters of the models, and hence we do not to re-optimise them.</p><p><italic>4) Two of the reviewers asked themselves whether it would not be of interest to test the stability of the models during development. One of the referees specifically encouraged the authors to extend their models to earlier stages of development, namely the first 20 min of nuclear cleavage cycle 14</italic>.</p><p>We thank the reviewers for this suggestion. The Virtual Embryo dataset comprises 6 time points during nuclear cleavage cycle 14, each about 10 minutes apart. Most of the presented work was performed using data from time point 3, when <italic>eve</italic> expression becomes sharply defined. To address the reviewers’ suggestion, we selected the best performing models for <italic>eve 2</italic> and eve <italic>3+7</italic> trained on time point 3, and applied them to data from time points 1 and 2. As shown in the new <xref ref-type="fig" rid="fig6">Figure 6</xref> and described in the section “Models predict <italic>eve 2</italic> and <italic>3+7</italic> expression in early time points”, the models successfully reproduce the stripe formation apparent in the Virtual Embryo; the outputs are also consistent with previously published <italic>in situ</italic> hybridisations.</p><p><italic>5) The utility of the models rely on their capacity to generate testable hypotheses. In this regard, one of the referees asked whether any novel prediction that could be experimentally tested has been derived from the model. This is an issue that should be made more explicit by the authors</italic>.</p><p>We have now included a modified section (“Experimentally testable hypotheses”) describing: (i) specific experiments to clarify which regulatory mechanisms apply for the <italic>eve 2</italic> and <italic>eve 3+7</italic> enhancers (e.g., linear vs quadratic models); and (ii) further general experimental approaches that would advance our understanding of gene regulation in early fly embryos.</p></body></sub-article></article>