Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
1 lines (1 sloc) 232 KB
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN" "JATS-archivearticle1.dtd"><article article-type="research-article" dtd-version="1.1d1" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">02725</article-id><article-id pub-id-type="doi">10.7554/eLife.02725</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Genomics and evolutionary biology</subject></subj-group><subj-group subj-group-type="heading"><subject>Human biology and medicine</subject></subj-group></article-categories><title-group><article-title>Mismatch repair deficiency endows tumors with a unique mutation signature and sensitivity to DNA double-strand breaks</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes" id="author-12352"><name><surname>Zhao</surname><given-names>Hui</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-7"/><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf2"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-12398"><name><surname>Thienpont</surname><given-names>Bernard</given-names></name><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0002-8772-6845</contrib-id><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-7"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-12399"><name><surname>Yesilyurt</surname><given-names>Betül Tuba</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-8"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-12400"><name><surname>Moisse</surname><given-names>Matthieu</given-names></name><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0001-8880-9311</contrib-id><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="other" rid="par-8"/><xref ref-type="fn" rid="con6"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12653"><name><surname>Reumers</surname><given-names>Joke</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12401"><name><surname>Coenegrachts</surname><given-names>Lieve</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="other" rid="par-7"/><xref ref-type="fn" rid="con9"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12405"><name><surname>Sagaert</surname><given-names>Xavier</given-names></name><xref ref-type="aff" rid="aff4"/><xref ref-type="fn" rid="con7"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12406"><name><surname>Schrauwen</surname><given-names>Stefanie</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="fn" rid="con12"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12407"><name><surname>Smeets</surname><given-names>Dominiek</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="con13"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12409"><name><surname>Matthijs</surname><given-names>Gert</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="fn" rid="con10"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-3436"><name><surname>Aerts</surname><given-names>Stein</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="fn" rid="con14"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12410"><name><surname>Cools</surname><given-names>Jan</given-names></name><xref ref-type="aff" rid="aff5"/><xref ref-type="aff" rid="aff6"/><xref ref-type="fn" rid="con11"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12411"><name><surname>Metcalf</surname><given-names>Alex</given-names></name><xref ref-type="aff" rid="aff7"/><xref ref-type="fn" rid="con15"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-12412"><name><surname>Spurdle</surname><given-names>Amanda</given-names></name><xref ref-type="aff" rid="aff7"/><xref ref-type="other" rid="par-3"/><xref ref-type="other" rid="par-4"/><xref ref-type="other" rid="par-5"/><xref ref-type="other" rid="par-6"/><xref ref-type="other" rid="par-10"/><xref ref-type="fn" rid="con8"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="groupAu1"><contrib-id contrib-id-type="group-author-key">group-author-id1</contrib-id><collab>ANECS</collab><xref ref-type="aff" rid="aff8"/></contrib><contrib contrib-type="author" id="author-12414"><name><surname>Amant</surname><given-names>Frederic</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="fn" rid="con16"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" corresp="yes" id="author-11338"><name><surname>Lambrechts</surname><given-names>Diether</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-9"/><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/></contrib><aff id="aff1"><institution>VIB Vesalius Research Center, KU Leuven</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff2"><institution content-type="dept">Department of Oncology</institution>, <institution>KU Leuven</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff3"><institution content-type="dept">Division of Gynaecologic Oncology, Department of Obstetrics and Gynaecology</institution>, <institution>University Hospital Gasthuisberg</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff4"><institution content-type="dept">Division of Pathology</institution>, <institution>University Hospital Gasthuisberg</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff5"><institution content-type="dept">Department of Human Genetics</institution>, <institution>KU Leuven</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff6"><institution>VIB Center for the Biology of Disease, KU Leuven</institution>, <addr-line><named-content content-type="city">Leuven</named-content></addr-line>, <country>Belgium</country></aff><aff id="aff7"><institution content-type="dept">Division of Genetics and Computational Biology</institution>, <institution>Queensland Institute of Medical Research</institution>, <addr-line><named-content content-type="city">Brisbane</named-content></addr-line>, <country>Australia</country></aff><aff id="aff8"><institution content-type="dept">The Australian National Endometrial Cancer Study</institution>, <institution>PO Royal Brisbane Hospital</institution>, <addr-line><named-content content-type="city">Brisbane</named-content></addr-line>, <country>Australia</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Stamatoyannopoulos</surname><given-names>John</given-names></name><role>Reviewing editor</role><aff><institution>University of Washington</institution>, <country>United States</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>diether.lambrechts@vib-kuleuven.be</email></corresp><fn fn-type="con" id="equal-contrib"><label>†</label><p>These authors contributed equally to this work</p></fn></author-notes><pub-date date-type="pub" publication-format="electronic"><day>01</day><month>08</month><year>2014</year></pub-date><pub-date pub-type="collection"><year>2014</year></pub-date><volume>3</volume><elocation-id>e02725</elocation-id><history><date date-type="received"><day>08</day><month>03</month><year>2014</year></date><date date-type="accepted"><day>30</day><month>07</month><year>2014</year></date></history><permissions><copyright-statement>© 2014, Zhao et al</copyright-statement><copyright-year>2014</copyright-year><copyright-holder>Zhao et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/4.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife02725.pdf"/><abstract><object-id pub-id-type="doi">10.7554/eLife.02725.001</object-id><p>DNA replication errors that persist as mismatch mutations make up the molecular fingerprint of mismatch repair (MMR)-deficient tumors and convey them with resistance to standard therapy. Using whole-genome and whole-exome sequencing, we here confirm an MMR-deficient mutation signature that is distinct from other tumor genomes, but surprisingly similar to germ-line DNA, indicating that a substantial fraction of human genetic variation arises through mutations escaping MMR. Moreover, we identify a large set of recurrent indels that may serve to detect microsatellite instability (MSI). Indeed, using endometrial tumors with immunohistochemically proven MMR deficiency, we optimize a novel marker set capable of detecting MSI and show it to have greater specificity and selectivity than standard MSI tests. Additionally, we show that recurrent indels are enriched for the ‘DNA double-strand break repair by homologous recombination’ pathway. Consequently, DSB repair is reduced in MMR-deficient tumors, triggering a dose-dependent sensitivity of MMR-deficient tumor cultures to DSB inducers.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.001">http://dx.doi.org/10.7554/eLife.02725.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.02725.002</object-id><title>eLife digest</title><p>Before a cell divides, it must first copy all of its genetic material. Any mistakes that are made during this process are called mutations. Mutations can give rise to new traits but are mostly harmful to the cells, or cause cancer; therefore, cells have evolved tools that can efficiently spot these mistakes and repair them. One of the main tools is called mismatch repair (MMR).</p><p>Defects in the cell's mismatch repair tools can wreak havoc as this allows many mutations to accumulate. Zhao et al. looked at the genomes of tumors where mismatch repair was not working properly to see what makes these ‘MMR-deficient tumors’ different from other tumors. This revealed that MMR-deficient tumors have similar patterns of mutations to those seen in egg and sperm cells. This was unexpected and suggests that mutations that are not corrected by mismatch repair are an important source of the genetic differences found between different humans, and between humans and their ancestors.</p><p>Identifying cancerous tumors that are MMR-deficient is vital, as these tumors tend not to respond to commonly used cancer treatments. However, current clinical methods to identify MMR-deficient tumors often fail or produce results that are difficult to interpret. MMR-deficient tumors commonly contain mutations called indels, where short fragments of DNA are inserted or deleted into longer DNA sequences. Zhao et al. have found 59 indels that can be used to detect MMR-deficient tumors, where each indel had been identified in several tumors taken from different tissues. This new approach allowed MMR-deficiency to be identified in several types of tumor, including colon and ovarian cancers, with greater sensitivity and accuracy than the existing methods.</p><p>Zhao et al. also found that the indels in MMR-deficient tumors reduce the ability of the tumors to repair a type of DNA damage called double-strand breaks. In these, both strands of DNA that make up the double helix are broken and the DNA chain is severed. As this kind of damage is very harmful to a cell, making more double-strand breaks could therefore form part of a more effective treatment against MMR-deficient tumors; further research is needed to investigate this possibility.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.002">http://dx.doi.org/10.7554/eLife.02725.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>whole-genome sequencing</kwd><kwd>mismatch repair deficiency</kwd><kwd>mutation pattern</kwd><kwd>MSI</kwd><kwd>DNA double-strand breaks</kwd><kwd>DSB inducers</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>human</kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution>Stichting tegen Kanker</institution></institution-wrap></funding-source><award-id>ZKC6069</award-id><principal-award-recipient><name><surname>Lambrechts</surname><given-names>Diether</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100004040</institution-id><institution>KU Leuven</institution></institution-wrap></funding-source><award-id>PFV/10/016 SymBioSys</award-id><principal-award-recipient><name><surname>Lambrechts</surname><given-names>Diether</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100000925</institution-id><institution>National Health and Medical Research Council</institution></institution-wrap></funding-source><award-id>#339435</award-id><principal-award-recipient><name><surname>Spurdle</surname><given-names>Amanda</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001168</institution-id><institution>Cancer Council Queensland</institution></institution-wrap></funding-source><award-id>#4196615</award-id><principal-award-recipient><name><surname>Spurdle</surname><given-names>Amanda</given-names></name></principal-award-recipient></award-group><award-group id="par-5"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001169</institution-id><institution>Cancer Council Tasmania</institution></institution-wrap></funding-source><award-id>#403031, #457636</award-id><principal-award-recipient><name><surname>Spurdle</surname><given-names>Amanda</given-names></name></principal-award-recipient></award-group><award-group id="par-6"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001111</institution-id><institution>Cancer Australia</institution></institution-wrap></funding-source><award-id>1010859</award-id><principal-award-recipient><name><surname>Spurdle</surname><given-names>Amanda</given-names></name></principal-award-recipient></award-group><award-group id="par-7"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100003130</institution-id><institution>Fonds Wetenschappelijk Onderzoek</institution></institution-wrap></funding-source><award-id>Postdoctoral Fellowship</award-id><principal-award-recipient><name><surname>Zhao</surname><given-names>Hui</given-names></name><name><surname>Thienpont</surname><given-names>Bernard</given-names></name><name><surname>Coenegrachts</surname><given-names>Lieve</given-names></name></principal-award-recipient></award-group><award-group id="par-8"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100003130</institution-id><institution>Fonds Wetenschappelijk Onderzoek</institution></institution-wrap></funding-source><award-id>PhD fellowship</award-id><principal-award-recipient><name><surname>Yesilyurt</surname><given-names>Betül Tuba</given-names></name><name><surname>Moisse</surname><given-names>Matthieu</given-names></name></principal-award-recipient></award-group><award-group id="par-9"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100003130</institution-id><institution>Fonds Wetenschappelijk Onderzoek</institution></institution-wrap></funding-source><award-id>G.0772.13N</award-id><principal-award-recipient><name><surname>Lambrechts</surname><given-names>Diether</given-names></name></principal-award-recipient></award-group><award-group id="par-10"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100000925</institution-id><institution>National Health and Medical Research Council</institution></institution-wrap></funding-source><award-id>Senior Research Fellowship</award-id><principal-award-recipient><name><surname>Spurdle</surname><given-names>Amanda</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>A comprehensive catalogue of somatic mutations accumulating in MMR-deficient tumors highlights their relevance in the context of human genetic evolution, for the diagnosis of microsatellite instability and the provision of targeted treatment options.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>MMR-deficiency represents a well-established cause of Lynch syndrome, which is an autosomal dominantly inherited disorder of cancer susceptibility triggered by loss-of-function mutations in MMR genes (<italic>MLH1</italic>, <italic>MSH2</italic>, or <italic>MSH6</italic>) (<xref ref-type="bibr" rid="bib25">Jiricny, 2006</xref>). Lynch syndrome is responsible for 2–5% of endometrial (EM) or colorectal (CRC) tumors. Additionally, epigenetic silencing of <italic>MLH1</italic> contributes to another 15–28% of these tumors (<xref ref-type="bibr" rid="bib44">Parsons et al., 2012</xref>; <xref ref-type="bibr" rid="bib45">Peltomaki, 2014</xref>). Deficiency of the MMR machinery leads to DNA replication errors in the tumor tissue, but not the normal surrounding tissue. In particular, errors often accumulate as indel mutations in mono- and di-nucleotide repeats—a phenomenon referred to as microsatellite instability (MSI) (<xref ref-type="bibr" rid="bib47">Pinol et al., 2005</xref>).</p><p>MMR-deficient tumors exhibit a different prognosis and therapeutic outcome after standard chemotherapy (<xref ref-type="bibr" rid="bib40">Ng and Schrag, 2010</xref>). Untreated CRC patients with MMR-deficient tumors have a modestly better prognosis, but do not seem to benefit from 5-fluorouracil-based adjuvant chemotherapy, which is the first-choice chemotherapy for CRC. In particular, in MMR-deficient tumors, mismatches induced by 5-fluorouracil are tolerated, leading to failure to induce cell death (<xref ref-type="bibr" rid="bib16">Fischer et al., 2007</xref>). MMR-deficient tumors are also resistant to cisplatin and carboplatin, which are frequently used chemotherapies in EM cancer (<xref ref-type="bibr" rid="bib22">Hewish et al., 2010</xref>). Furthermore, MMR-deficient tumors can be resistant to targeted therapies, because they acquire secondary mutations in genes that activate alternative or downstream signaling pathways (e.g., <italic>PIK3CA</italic>). Another possibility is that epigenetic silencing of <italic>MLH1</italic> coincides with particular mutations, such as the <italic>BRAF</italic> V600E mutation (<xref ref-type="bibr" rid="bib11">Donehower et al., 2013</xref>), which represents an established negative predictor of response to targeted anti-EGFR therapies in advanced CRC (<xref ref-type="bibr" rid="bib52">Richman et al., 2009</xref>).</p><p>Efforts to individualize the treatment of MMR-deficient tumors have focused on identifying synthetic lethal interactions within the MMR pathway. In particular, increased oxidative damage (by methotrexate exposure or <italic>PINK1</italic> silencing [<xref ref-type="bibr" rid="bib33">Martin et al., 2011</xref>]) and interference with the base excision repair (BER) pathway (by DNA polymerase γ or β inhibition [<xref ref-type="bibr" rid="bib32">Martin et al., 2010</xref>]) can sensitize MMR-deficient tumors. Until now, these findings failed, however, to translate into clinically effective treatment options. Alternatively, as highlighted above, secondary mutations occurring because of MMR-deficiency may also critically determine therapeutic efficacy (<xref ref-type="bibr" rid="bib12">Dorard et al., 2011</xref>). These secondary mutation spectra have, however, been poorly characterized, mainly because studies often focused at one or a few reporter loci, or exclusively on mutations at known hotspot sequences. More recently, the first whole-exome sequencing of MMR-deficient tumors was performed, highlighting the clearly distinct mutational landscape of these tumors (<xref ref-type="bibr" rid="bib60">TCGA, 2012</xref>), whereas at the whole-genome level, <xref ref-type="bibr" rid="bib27">Kim et al. (2013)</xref> revealed overrepresentation of MSI in euchromatic and intronic regions compared to heterochromatic and intergenic regions.</p><p>To generate a more comprehensive picture of the mutation spectra arising in MMR-deficient tumors, and in particular, to interpret their clinical relevance with respect to diagnostically assessing MSI and therapeutically targeting MMR-deficient tumors, we sequenced another comprehensive set of MMR-deficient tumors. In particular, whole-genome and whole-exome sequencing was applied to 5 and 28 tumor–normal pairs, of which respectively 3 and 22 were MMR-deficient.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>Whole-genome sequencing of MMR-deficient tumors</title><p>To select MMR-deficient tumors for whole-genome sequencing, standard diagnostic tests were used, including immunohistochemistry of MMR proteins (MLH1, MSH2, and MSH6), assessment of MSI using the extended Bethesda panel and methylation profiling of the <italic>MLH1</italic> promoter. Three chemo-naive EM tumors, either deficient for MLH1, MSH2, or MSH6 and thus covering the full spectrum of MMR-deficiency, as well as two MMR-proficient EM tumors were selected (<xref ref-type="table" rid="tbl1">Table 1</xref>). Different sequencing technologies were leveraged to avoid potential technology biases in assessing mutation patterns in MMR-deficient tumor genomes, that is, Complete Genomics (CG) and Illumina short-read sequencing. We obtained high coverage sequencing data (30–120x) for tumor and matched normal samples (<xref ref-type="table" rid="tbl1">Table 1</xref>). Application of a standard annotation and filtering pipeline, as previously described (<xref ref-type="bibr" rid="bib51">Reumers et al., 2011</xref>), revealed that each MMR-deficient tumor exhibited a clear hypermutator phenotype, containing on average 50 times more novel somatic mutations than MMR-proficient tumors (<xref ref-type="fig" rid="fig1">Figure 1A</xref>, <xref ref-type="supplementary-material" rid="SD1-data">Figure 1—source data 1</xref>, <xref ref-type="supplementary-material" rid="SD2-data">Figure 1—source data 2</xref>). Orthogonal technologies validated 98% of substitutions and 88% of indels in the three MMR-deficient tumors, while only 62% of substitutions and 11% of indels were validated in the two MMR-proficient tumors (<xref ref-type="supplementary-material" rid="SD3-data">Figure 1—source data 3</xref>). This difference in validation rates between MMR-deficient and MMR-proficient tumors is probably due to the fact that in normal genomes, as well as MMR-proficient tumor genomes, the number of true-positive indels is low in comparison to the number of false-positive indels. However, in MMR-deficient tumors, due to their specific hypermutator phenotype, the number of true-positive indels is vastly increased, thereby rendering the false positive fraction proportionally much smaller. Notably, all tumors were negative for <italic>POLE</italic> mutations (<xref ref-type="bibr" rid="bib26">Kandoth et al., 2013</xref>; <xref ref-type="bibr" rid="bib42">Palles et al., 2013</xref>).<table-wrap id="tbl1" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.003</object-id><label>Table 1.</label><caption><p>Standard diagnostic tests to assess MMR-deficiency</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.003">http://dx.doi.org/10.7554/eLife.02725.003</ext-link></p></caption><table frame="hsides" rules="groups"><thead><tr><th rowspan="2">Tumor</th><th rowspan="2">Histopathology</th><th rowspan="2">Grade</th><th rowspan="2">Stage</th><th colspan="2">Coverage</th><th align="center" colspan="3">IHC</th><th rowspan="2">MSI</th><th rowspan="2"><italic>MLH1</italic> hyper-methylation</th></tr><tr><th>Tumor</th><th>Germ-line</th><th>MLH1</th><th>MSH2</th><th>MSH6</th></tr></thead><tbody><tr><td>MMR− 1</td><td>Endometrioid</td><td align="char" char=".">3</td><td>IIIc</td><td align="char" char=".">87.1</td><td align="char" char=".">81.1</td><td>+</td><td>+</td><td>−(<xref ref-type="table-fn" rid="tblfn1">*</xref>)</td><td>+</td><td>−</td></tr><tr><td>MMR− 2</td><td>Serous/clear cell</td><td align="char" char=".">3</td><td>Ib</td><td align="char" char=".">24.8</td><td align="char" char=".">21.9</td><td>+</td><td>−</td><td>−</td><td>−</td><td>−</td></tr><tr><td>MMR− 3</td><td>Endometrioid</td><td align="char" char=".">2</td><td>Ib</td><td align="char" char=".">28.5</td><td align="char" char=".">30.0</td><td>−</td><td>+</td><td>+</td><td>+</td><td>+</td></tr><tr><td>MMR+ 1</td><td>Endometrioid</td><td align="char" char=".">3</td><td>I</td><td align="char" char=".">119.4</td><td align="char" char=".">73.1</td><td>+</td><td>+</td><td>+</td><td>−</td><td>+</td></tr><tr><td>MMR+ 2</td><td>Serous</td><td align="char" char=".">3</td><td>Ia</td><td align="char" char=".">79.2</td><td align="char" char=".">77.0</td><td>+</td><td>+</td><td>+</td><td>−</td><td>−</td></tr></tbody></table><table-wrap-foot><fn><p>Tumors and matched germ-line were whole-genome sequenced using either Complete Genomics or Illumina sequencing technology. For each tumor, microsatellite instability (MSI) using the extended Bethesda panel, standard immunohistochemistry of MMR proteins (MLH1, MSH2, and MSH6), and methylation status of the <italic>MLH1</italic> promoter are shown.</p></fn><fn id="tblfn1"><label>*</label><p>a weak positive nuclear staining in the minority of the tumor cells.</p></fn></table-wrap-foot></table-wrap><fig-group><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.004</object-id><label>Figure 1.</label><caption><title>Somatic mutations in MMR-deficient tumors.</title><p>(<bold>A</bold>) The average frequency of mutations, indels, and substitutions in MMR-deficient tumors vs MMR-proficient tumors, expressed as number of mutations per base (mpb). (<bold>B</bold>) The fraction of indels and substitutions observed in microsatellites, homopolymers (length over 5 bp), short homopolymers (length of 3–5 bp), and ‘not in repeat regions’ compared to their expected fraction in these regions. (<bold>C</bold> and <bold>D</bold>) Frequencies of substitutions (<bold>C</bold>) and indels (<bold>D</bold>) in MMR-deficient tumors stratified into exonic, intergenic, and intronic regions. (<bold>E</bold>) Indel frequencies corrected for homopolymer number, length, and base composition. Indel frequencies in MMR-deficient tumors represent estimates only, as orthogonal technologies revealed false-positive rates of 12%, while false-negative rates in CG and Illumina whole-genomes were estimated to be 27.7% and 0.5%, respectively, by <xref ref-type="bibr" rid="bib68">Zook et al. (2014)</xref>. In MMR-proficient tumors all detected somatic indels were independently validated.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.004">http://dx.doi.org/10.7554/eLife.02725.004</ext-link></p><p><supplementary-material id="SD1-data"><object-id pub-id-type="doi">10.7554/eLife.02725.005</object-id><label>Figure 1—source data 1.</label><caption><title>Sequence statistics of MMR-proficient and MMR-deficient whole genome sequenced tumour samples, and a list of somatic substitutions detected therein.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.005">http://dx.doi.org/10.7554/eLife.02725.005</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s001.xlsx"/></supplementary-material></p><p><supplementary-material id="SD2-data"><object-id pub-id-type="doi">10.7554/eLife.02725.006</object-id><label>Figure 1—source data 2.</label><caption><title>List of somatic indels detected in the MMR-proficient and MMR-deficient, whole genome sequenced tumour samples.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.006">http://dx.doi.org/10.7554/eLife.02725.006</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s002.xlsx"/></supplementary-material></p><p><supplementary-material id="SD3-data"><object-id pub-id-type="doi">10.7554/eLife.02725.007</object-id><label>Figure 1—source data 3.</label><caption><title>List and overview of validated somatic mutations, detected in the MMR-proficient and MMR-deficient whole genome sequenced tumour samples.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.007">http://dx.doi.org/10.7554/eLife.02725.007</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s003.xlsx"/></supplementary-material></p></caption><graphic xlink:href="elife02725f001"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.008</object-id><label>Figure 1—figure supplement 1.</label><caption><title>The fraction of indels (left panel) and substitutions (right panel) observed in microsatellites, homopolymers, short homopolymers and in nonrepeat regions compared to their expected fraction in these regions.</title><p>Data are shown for the individual MMR-deficient tumors. In all three tumors, substitutions predominantly affected non-repeat regions, while indels were mainly confined to homopolymers.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.008">http://dx.doi.org/10.7554/eLife.02725.008</ext-link></p></caption><graphic xlink:href="elife02725fs001"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.009</object-id><label>Figure 1—figure supplement 2.</label><caption><title>The relative indel frequency defined as the number of indels divided by the total bases of non-homopolymer regions in MMR-deficient tumors stratified into intergenic, exonic, 5′UTR, 3′UTR, and intronic regions is shown.</title><p>Indel frequencies in homopolymers are shown in the left panel, whereas indel frequencies in non-homopolymer regions are shown in the right panel. The algorithm we used to correct for homopolymer content, composition, and length can be found in the ‘Materials and methods’ section under the header ‘Evidence of negative clonal selection’. In homopolymer regions, there was a 16% decrease in indel frequency in exonic regions. In non-homopolymer regions, a clear decrease was also observed for exonic regions, confirming that the decrease in exonic indels is not only due to differences in homopolymer characteristics between exonic regions and the rest of the genome. This reveals apparent negative selection in exonic regions, independent of homopolmer content, composition or length of the homopolymers.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.009">http://dx.doi.org/10.7554/eLife.02725.009</ext-link></p></caption><graphic xlink:href="elife02725fs002"/></fig><fig id="fig1s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.010</object-id><label>Figure 1—figure supplement 3.</label><caption><title>Copy number status of the 5 whole-genomes assessed by Illumina Human-Omni1 and CytoSNP-12 chips.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.010">http://dx.doi.org/10.7554/eLife.02725.010</ext-link></p></caption><graphic xlink:href="elife02725fs003"/></fig></fig-group></p></sec><sec id="s2-2"><title>Somatic mutation patterns in MMR-deficient hypermutators</title><p>Studies in model organisms and cell lines have shown that somatic mutations arising due to MMR-deficiency mostly involve indels affecting microsatellite sequences (di- to hexa-nucleotide repeats with a minimal length of six bases and at least two repeat units) and homopolymers (mononucleotide repeats with a minimal length of six bases) (<xref ref-type="bibr" rid="bib14">Ellegren, 2004</xref>). We observed that indels were indeed more frequent than single basepair substitutions in all three MMR-deficient tumors (<xref ref-type="fig" rid="fig1">Figure 1A</xref>). Indels predominantly affected homopolymers (40-fold enrichment over expected by chance) and to a lesser extent also microsatellites (2.3-fold enrichment; <xref ref-type="fig" rid="fig1">Figure 1B</xref>, <xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1</xref>). Substitutions were only slightly enriched in homopolymers and microsatellites (3- and 1.5-fold enrichment, respectively; <xref ref-type="fig" rid="fig1">Figure 1B</xref>). Mutations occurred as frequently in introns as in the rest of the genome, but were clearly less frequent in exons (excluding 5′ and 3′ untranslated regions [UTRs]). This decrease was caused by indels that were 91% less frequent in exons (<xref ref-type="fig" rid="fig1">Figure 1C,D</xref>). Correction for the number of homopolymers, the length of homopolymers or their basepair composition in exons versus other regions weakened this effect, but failed to completely alleviate it (<xref ref-type="fig" rid="fig1">Figure 1E</xref>, <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>). Since 92% of exonic indels resulted in frameshift mutations, which have a greater functional impact than substitutions (<xref ref-type="bibr" rid="bib38">Montgomery et al., 2013</xref>), this suggests that exonic indels are prone to negative clonal selection during tumorigenesis.</p></sec><sec id="s2-3"><title>Somatic substitutions in MMR-deficient hypermutators</title><p>There is extraordinary variation in the frequency and spectrum of somatic mutations affecting different cancers, shedding light on the underlying mutational processes and disease etiology of these tumors (<xref ref-type="bibr" rid="bib66">Wheeler and Whang, 2013</xref>). When assessing somatic substitutions in MMR-deficient tumors, we observed that 74% of all substitutions represent transitions (i.e., purine-to-purine or pyrimidine-to-pyrimidine substitutions), which is similar to the patterns observed in the matched germ-line of these tumors (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). This is surprising, since tumor genomes generally display patterns distinct from those found in the germ-line. Indeed, when extending these analyses to other hypermutators, that is, UV-light-induced melanoma (<xref ref-type="bibr" rid="bib48">Pleasance et al., 2010</xref>), tobacco smoke-induced small cell lung adenocarcinoma (SCLC) (<xref ref-type="bibr" rid="bib49">Pleasance et al., 2010</xref>), as well as breast tumors deficient for BRCA1 (<xref ref-type="bibr" rid="bib41">Nik-Zainal et al., 2012</xref>) or EM tumors proficient for MMR, patterns were clearly dissimilar from the matched germ-line (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). On the other hand, de novo germ-line substitutions identified through whole-genome sequencing of parent–offspring trios (<xref ref-type="bibr" rid="bib5">Campbell et al., 2012</xref>; <xref ref-type="bibr" rid="bib28">Kong et al., 2012</xref>), common genetic variation as catalogued by the 1000 Genomes Project (1 KG) (<xref ref-type="bibr" rid="bib1">1000 Genomes Project Consortium, 2012</xref>), and substitutions that occurred in the human lineage during the divergence of humans and chimpanzees correlated strongly to the MMR-deficient tumor genome (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). Given these remarkable parallels, we hypothesized that MMR-deficient genomes hypermutate in a way that mirrors the processes driving genetic variation on a population level, albeit somatically and on a shorter time scale.<fig-group><fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.011</object-id><label>Figure 2.</label><caption><title>Somatic substitution patterns in MMR-deficient tumors.</title><p>(<bold>A</bold>) Somatic substitution patterns in whole-genome sequences of MMR-deficient endometrial tumors (MMR−), matched germ-line (peripheral white blood cell) DNA from MMR-deficient tumors (MMR-germ-line), de novo mutations as identified in parent-offspring trios (de novo), 1000 Genomes Project (1 KG), the human–chimpanzee divergence panel (Divergence), melanoma and small-cell lung cancer (SCLC), BRCA-deficient breast tumors (BRCA−), MMR-proficient endometrial tumors (MMR+). (<bold>B</bold>) Somatic substitution frequency per million dinucleotides and per million substitutions. The first row lists the base following the mutated base, the second row lists the base that was mutated, and the third row lists the new base. Gray boxes indicate transitions. Frequencies are depicted color-coded following a logarithmic distribution as shown by the gradient on the left. (<bold>C</bold> and <bold>D</bold>) Squared coefficients of correlation (<italic>R</italic><sup>2</sup>) between dinucleotide substitution patterns (<bold>C</bold>) and between the number of intergenic substitutions per 1 Mb window (<bold>D</bold>). Substitutions in MMR-proficient and de novo data sets were too sparse for correlations at a 1 Mb scale. (<bold>E</bold>) Multivariate linear regression modeling of genomic features predicting substitutions frequencies per 1 Mb window in MMR-deficient tumors, and the outcome of the same multivariate linear regression modeling in the germ-line genetic variability panels. T-values resulting from the linear model are displayed as bar plots and indicate direction and significance of correlation (shaded grey box equals p &gt; 0.05, Bonferroni-corrected per model). The de novo substitution frequency was too low to be modeled at this resolution. (<bold>F</bold>) Frequency of transitions (excluding G:C&gt;A:T in CG) and transversions per 1 Mb window, binned per replication time. Frequencies are displayed relative to the earliest replicating bin. Linear regression analysis was performed to assess whether observed increases were significant and independent of other genomic features. All Bonferroni-corrected p-values were significant (p &lt; 2.0E−5) except for transversions in MMR-deficient tumors, which were not significant (NS; p = 0.23). (<bold>G</bold>) Effect of homopolymer nucleotide composition (A<sub>n</sub>, T<sub>n</sub>, C<sub>n</sub>, or G<sub>n</sub>) on substitutions immediately flanking a homopolymer. For example, the nucleotide B next to the poly-A repeat 'NNB(A)<sub>n</sub>BNN' is mostly converted to an A (NNB(A)<sub>n</sub>ANN) and not to a C, G, or T. The modest increase in A substitutions next to C<sub>n</sub> homopolymers and T substitutions near G<sub>n</sub> homopolymers is caused by C:G&gt;T:A transitions in a CpG context. (<bold>H</bold>) Substitution frequency in and outside CpG islands, relative to genome-wide substitution frequencies. Data combined for all three MMR-deficient genomes are represented for (<bold>B</bold>, <bold>E</bold>–<bold>H</bold>), but individual MMR-deficient genomes display similar patterns (<xref ref-type="fig" rid="fig2s1 fig2s2 fig2s3 fig2s4 fig2s5">Figure 2—figure supplements 1–5</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.011">http://dx.doi.org/10.7554/eLife.02725.011</ext-link></p></caption><graphic xlink:href="elife02725f002"/></fig><fig id="fig2s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.012</object-id><label>Figure 2—figure supplement 1.</label><caption><title>Somatic substitution frequency per million dinucleotides and per million substitutions for the individual MMR-deficient genomes.</title><p>The first row lists the base following the mutated base, the second row the base that was mutated, and the third row the new base. Transitions are indicated by grey boxes. Frequencies are depicted color-coded following a logarithmic distribution as shown by the gradient on the right. The average R<sup>2</sup> between the MMR-deficient tumors is 0.75.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.012">http://dx.doi.org/10.7554/eLife.02725.012</ext-link></p></caption><graphic xlink:href="elife02725fs004"/></fig><fig id="fig2s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.013</object-id><label>Figure 2—figure supplement 2.</label><caption><title>Multivariate linear regression modeling of genome features predicting substitutions frequencies per 1 Mb window in the individual MMR-deficient genomes.</title><p>T-values resulting from the linear model are displayed for each genome feature in the bar plots and indicate significance (shaded grey box equals p &gt; 0.05, Bonferroni-corrected per model) and direction of the correlation. High concordance between the individual tumors is observed.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.013">http://dx.doi.org/10.7554/eLife.02725.013</ext-link></p></caption><graphic xlink:href="elife02725fs005"/></fig><fig id="fig2s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.014</object-id><label>Figure 2—figure supplement 3.</label><caption><title>Frequency of transitions (excluding G:C&gt;A:T in CG) and transversions per 1 Mb window, binned per replication time, relative to the earliest replicating bin.</title><p>Mutations are divided in 7 bins (left to right bins represent early to late replication timing events). Linear regression analysis was performed to assess whether observed increases were significant and independent of other genomic features. Bonferroni-corrected p-values were significant (p &lt; 2.0E−5) for transitions and nonsignifciant for transversions. In none of the individual MMR-deficient genomes, transversions were significantly correlated to replication timing, whereas transitions correlated for each of the MMR-deficient genomes.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.014">http://dx.doi.org/10.7554/eLife.02725.014</ext-link></p></caption><graphic xlink:href="elife02725fs006"/></fig><fig id="fig2s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.015</object-id><label>Figure 2—figure supplement 4.</label><caption><title>Effect of homopolymer nucleotide composition (A<sub>n</sub>, T<sub>n</sub>, C<sub>n</sub>, or G<sub>n</sub>) on substitutions immediately flanking a homopolymer in the individual MMR-deficient genomes.</title><p>The slight increase in A substitutions next to C<sub>n</sub> homopolymers and T substitutions near G<sub>n</sub> homopolymers is exclusively caused by C:G&gt;T:A transitions in a CpG context, indicating they are likely deaminations of methylated cytosines.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.015">http://dx.doi.org/10.7554/eLife.02725.015</ext-link></p></caption><graphic xlink:href="elife02725fs007"/></fig><fig id="fig2s5" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.016</object-id><label>Figure 2—figure supplement 5.</label><caption><title>Frequency of transitions and tranvsersions in and outside of CpG Islands in the individual MMR-deficient genomes.</title><p>The frequency of transitions and transversions inside and outside CpG islands was determined as the number of mutations divided by the total size of each of the features, and expressed relative to the general, genome-wide frequencies of transitions and transversions. Individual genomes display similar patterns.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.016">http://dx.doi.org/10.7554/eLife.02725.016</ext-link></p></caption><graphic xlink:href="elife02725fs008"/></fig></fig-group></p><p>To further assess the similarities between MMR-deficient mutation patterns and germ-line genetic variability, we analyzed small-scale and large-scale context-dependent effects on substitution patterns. At the small-scale level, when assessing the effect of flanking nucleotides on substitution frequencies, the patterns of all four sets of germ-line genetic variants were highly correlated to MMR-deficient tumors (average <italic>R</italic><sup>2</sup> = 0.77), but less to the four other cancer genomes (average <italic>R</italic><sup>2</sup> = 0.45; <xref ref-type="fig" rid="fig2">Figure 2B,C</xref>), providing further support for our hypothesis. On a large-scale context, the number of intergenic substitutions per 1 Mb in germ-line genetic variability databases was similarly highly correlated to those in MMR-deficient genomes (average <italic>R</italic><sup>2</sup> = 0.67), but not to those in other cancer genomes (average <italic>R</italic><sup>2</sup> = 0.42; <xref ref-type="fig" rid="fig2">Figure 2D</xref>). This suggests that also on a large scale, substitutions are comparably distributed in MMR-deficient tumor genomes as in germ-line genomes. At the large-scale level, nine genomic features are linked with genetic variability (<xref ref-type="bibr" rid="bib23">Hodgkinson and Eyre-Walker, 2011</xref>). Each of these features correlated significantly with substitution frequencies in MMR-deficient tumors and germ-line genomes. Linear modeling revealed that six of these independently correlated with substitution rates in MMR-deficient tumors as well as with germ-line substitutions (<xref ref-type="fig" rid="fig2">Figure 2E</xref>). Overall, the types as well as the narrow and broad context-dependencies of substitutions thus appear to be largely shared between germ-line and MMR-deficient genomes, suggesting that a considerable fraction of human genetic diversity arises through mismatches escaping MMR.</p><p>Since MMR-proficient tumors carried 50 times fewer substitutions and displayed more disparate substitution patterns than MMR-proficient tumors, the observed correlations can almost exclusively be attributed to the MMR-deficient phenotype of these tumors. As such, these correlations also provide novel insights into the functioning of the MMR system. First, replication timing correlated with transitions but not transversions in all three MMR-deficient tumors (<xref ref-type="fig" rid="fig2">Figure 2F</xref>). This contrasts with the increase in late S phase transversions observed in all other genomes studied here (<xref ref-type="fig" rid="fig2">Figure 2F</xref>), as well as in lymphoblastoid cell lines (<xref ref-type="bibr" rid="bib29">Koren et al., 2012</xref>). The increase in MMR-proficient but not MMR-deficient cells suggests a reduced fidelity of DNA repair in late S phase, leading to an increase in transversions. Potential causes include a decreased MMR-activity in late S phase, or a longer window of time available for the repair of early vs late transversions in MMR-proficient cells (<xref ref-type="bibr" rid="bib24">Hombauer et al., 2011</xref>). In contrast, DNA repair fidelity in MMR-deficient cells is invariably low and therefore not affected by replication time. Secondly, a positive association with simple repeat content was noted. Indeed, a 1.6-fold increase in substitutions at bases immediately flanking simple repeats was noted, with a threefold increase next to homopolymers and a 1.3-fold increase next to microsatellites (<xref ref-type="fig" rid="fig2">Figure 2G</xref>). These substitutions for the vast majority converted the base flanking the repeat, to the base constituting the repeat (<xref ref-type="fig" rid="fig2">Figure 2G</xref>). They are thus probably the result of polymerase slippage events, following a mechanism akin to the previously described bacterial dislocation mutagenesis (<xref ref-type="bibr" rid="bib30">Kunkel and Soni, 1988</xref>). Thirdly, G:C&gt;A:T transitions in CpG sites strongly depend on CpG content, but are inversely correlated with the fraction of CpG islands (<xref ref-type="fig" rid="fig2">Figure 2E</xref>). Spontaneous, replication-independent deaminations of methyl-C to T underlie such transitions. Here, the much larger increase in CG&gt;TG transitions observed in MMR-deficient compared to MMR-proficient tumors (3449 vs 145) demonstrates that replication-independent MMR, recently described at the molecular level (<xref ref-type="bibr" rid="bib54">Shell et al., 2007</xref>; <xref ref-type="bibr" rid="bib46">Pena-Diaz et al., 2012</xref>), is also involved in deamination repair in vivo (<xref ref-type="bibr" rid="bib7">Chen et al., 2014</xref>). Finally, overall substitution frequencies correlated inversely with CpG islands. Indeed, irrespective of dinucleotide context, bases outside CpG islands were nearly two times more likely to undergo mutation than those inside CpG islands (<xref ref-type="fig" rid="fig2">Figure 2H</xref>). As CpG islands are generally unmethylated, DNA methylation thus appears to contribute to the mutagenic process. Explanations for this observation include the polymerase stalling that DNA methylation may induce (<xref ref-type="bibr" rid="bib56">Song et al., 2012</xref>), and the repair of spontaneously deaminated methyl-Cs, which is error-prone and thus mutagenic on its own (<xref ref-type="bibr" rid="bib7">Chen et al., 2014</xref>).</p></sec><sec id="s2-4"><title>Somatic indels in MMR-deficient hypermutators</title><p>We also evaluated somatic indel patterns in MMR-deficient tumors. As expected, since the majority of indels was located in homopolymers, a strong correlation between simple repeats and indel frequency was observed (<xref ref-type="fig" rid="fig3">Figure 3A</xref>). Indels were also predominantly 1 or 2 bps in length (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). Although the minority of homopolymers consists of C or G bases (7%), an even smaller fraction of indels affected C:G homopolymers (1.9%; <xref ref-type="fig" rid="fig3">Figure 3C</xref>), suggesting that C:G homopolymers are less likely to accumulate indels. As observed in other MMR-deficient tumors and also in MMR-deficient <italic>Caenorhabditis elegans</italic> (<xref ref-type="bibr" rid="bib9">Denver et al., 2005</xref>; <xref ref-type="bibr" rid="bib27">Kim et al., 2013</xref>), deletions were remarkably more frequent than insertions (81% vs 19%), confirming that DNA polymerases are more prone to remove than to add a base during DNA synthesis.<fig-group><fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.017</object-id><label>Figure 3.</label><caption><title>Somatic indel patterns in MMR-deficient tumors.</title><p>(<bold>A</bold>) Impact of genomic features in MMR-deficient tumors on indel frequency as assessed by multivariate linear regression modeling. T-values resulting from the linear model are displayed for each genomic feature in the bar plots and indicate significance (shaded grey box equals p &gt; 0.05, Bonferroni-corrected per model) and direction of the correlation. (<bold>B</bold>) Fraction of all indels inserting or deleting the indicated number of bases. (<bold>C</bold>) Fraction of homopolymers affected by an indel stratified per nucleotide, compared to the genome-wide fraction of homopolymers with that nucleotide content.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.017">http://dx.doi.org/10.7554/eLife.02725.017</ext-link></p></caption><graphic xlink:href="elife02725f003"/></fig><fig id="fig3s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.018</object-id><label>Figure 3—figure supplement 1.</label><caption><title>The distance between a somatic substitution and the nearest somatic indel (top left), substitution (top right), repeat (bottom left), or homopolymer (bottom right) in the individual MMR-deficient genomes, and the expected distance based on 200 random models.</title><p>The substitutions located nearby indels and substitutions were enriched respectively within a range of ∼30 bp and ∼200 bp, whereas substitutions near repeats were enriched only at the base immediately flanking the repeat.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.018">http://dx.doi.org/10.7554/eLife.02725.018</ext-link></p></caption><graphic xlink:href="elife02725fs009"/></fig></fig-group></p></sec><sec id="s2-5"><title>Exome-sequencing of additional MMR-deficient tumors</title><p>Next, we selected 13 additional MMR-deficient tumors, as well as four MMR-proficient tumors, collected from different tissues (i.e., endometrium, colon, and ovarium). Of these, six represented primary tumor cultures of low passage, which we preferred over cell lines, because the latter due to their hypermutator phenotype are no longer representative of the original tumor (<xref ref-type="supplementary-material" rid="SD5-data">Figure 4—source data 2</xref>). Exome-sequencing of tumor and matched germ-line DNA at an average coverage of 44x revealed that each MMR-deficient tumor contained ∼2015 somatic events vs 39 for MMR-proficient tumors (52-fold increase; <xref ref-type="fig" rid="fig4">Figure 4A</xref>, <xref ref-type="supplementary-material" rid="SD4-data">Figure 4—source data 1</xref>, <xref ref-type="supplementary-material" rid="SD5-data">Figure 4—source data 2</xref>). Validation rates for substitutions and indels were respectively 87% and 86%. Clustering analysis of all 13 MMR-deficient tumors for the genes affected by either a somatic substitution or indel in the coding regions revealed no obvious subgroups in terms of cancer of origin or between primary tumors and cell cultures (<xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref>). Presumably, because of negative clonal selection and differences in homopolymer content in exons vs other genomic regions, exonic substitutions outnumbered indels (<xref ref-type="fig" rid="fig4">Figure 4A</xref>, <xref ref-type="fig" rid="fig4s2">Figure 4—figure supplement 2</xref>), similar to what we observed in the MMR-deficient whole-genomes (<xref ref-type="fig" rid="fig1">Figure 1C,D</xref>). Only a minority of these indels affected microsatellites, confirming that homopolymers were most frequently affected by indels.<fig-group><fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.019</object-id><label>Figure 4.</label><caption><title>Recurrent somatic indels.</title><p>(<bold>A</bold>) The average mutation frequencies in the exons of 13 MMR-deficient tumors and four MMR-proficient tumors. No obvious difference was observed between MLH1-, MSH2-, and MSH6- deficiency in terms of the mutation frequencies, substitution patterns, and indel compositions (<xref ref-type="fig" rid="fig4s5">Figure 4—figure supplement 5</xref>). (<bold>B</bold>) Fraction of homopolymers affected by an indel in function of the homopolymer length stratified for exons, 5′ and 3′UTRs. (<bold>C</bold>) The fraction of homopolymers in exons, 5′ and 3′UTRs that are affected by an indel in function of the homopolymer length. (<bold>D</bold>) Average somatic indel frequencies in exons, 5′ and 3′UTRs of 16 MMR-deficient tumors. (<bold>E</bold>) The enrichment of observed over expected frequencies of recurrent indels. Enrichments were stratified by length of the affected homopolymer and calculated for recurrent indels in 2, 3, 4, and 5 or more out of 16 MMR-deficient tumors.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.019">http://dx.doi.org/10.7554/eLife.02725.019</ext-link></p><p><supplementary-material id="SD4-data"><object-id pub-id-type="doi">10.7554/eLife.02725.020</object-id><label>Figure 4—source data 1.</label><caption><title>Sample info and sequence statistics of MMR-deficient whole exome sequenced tumour samples, a list of somatic substitutions detected therein and results of validation of somatic substitutions.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.020">http://dx.doi.org/10.7554/eLife.02725.020</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s004.xlsx"/></supplementary-material></p><p><supplementary-material id="SD5-data"><object-id pub-id-type="doi">10.7554/eLife.02725.021</object-id><label>Figure 4—source data 2.</label><caption><title>A list of somatic indels detected in the MMR-deficient whole exome sequenced tumour samples, and results of their validation.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.021">http://dx.doi.org/10.7554/eLife.02725.021</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s005.xlsx"/></supplementary-material></p><p><supplementary-material id="SD6-data"><object-id pub-id-type="doi">10.7554/eLife.02725.022</object-id><label>Figure 4—source data 3.</label><caption><title>Somatic substitutions and indels in homopolymers together with their recurrence rate as identified by whole-exome and whole-genome sequencing.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.022">http://dx.doi.org/10.7554/eLife.02725.022</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s006.xlsx"/></supplementary-material></p></caption><graphic xlink:href="elife02725f004"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.023</object-id><label>Figure 4—figure supplement 1.</label><caption><title>Clustering analysis of 13 MMR-deficient exomes for the genes affected by either a somatic substitution or indel in the coding regions.</title><p>No obvious subgroups in terms of cancer of origin or between primary tumors and cell cultures were observed.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.023">http://dx.doi.org/10.7554/eLife.02725.023</ext-link></p></caption><graphic xlink:href="elife02725fs010"/></fig><fig id="fig4s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.024</object-id><label>Figure 4—figure supplement 2.</label><caption><title>The fraction of indels (left panel) and substitutions (right panel) identified by whole-exome sequencing, as observed in microsatellites, homopolymers (length over 5 bp), short homopolymers (length of 3–5 bp) and ‘not in repeat regions’ compared to their expected fraction in these regions.</title><p>Indels mainly affected homopolymers (59.0%), whereas microsatellites and short homopolymers were affected at a frequency that was expected based on their genome-wide occurrence. In contrast, indels were depleted in non-repeat regions. Substitutions affected the exome independent of repeat composition. These distributions mirror our observations in the MMR-deficient tumors undergoing whole-genome sequencing.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.024">http://dx.doi.org/10.7554/eLife.02725.024</ext-link></p></caption><graphic xlink:href="elife02725fs011"/></fig><fig id="fig4s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.025</object-id><label>Figure 4—figure supplement 3.</label><caption><title>Characteristics of the exonic homopolymers recurrently affected.</title><p>For the 477 homopolymers affected in at least 2 out of 16 tumors, respectively 176, 135, 85, and 81 consisted of A, T, G, or C stretches. Out of the 34 homopolymers affected in at least 6 out of 16 tumors, 15, 15, 1, and 3 consisted of A, T, G, or C stretches, respectively. The length of recurrently affected homopolymers (in at least 2 out of 16 tumors) varied from 6 nucleotides to 25 nucleotides, but recurrence was biased towards homopolymers with length 7–9 nucleotides.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.025">http://dx.doi.org/10.7554/eLife.02725.025</ext-link></p></caption><graphic xlink:href="elife02725fs012"/></fig><fig id="fig4s4" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.026</object-id><label>Figure 4—figure supplement 4.</label><caption><title>The observed and expected frequencies of indels recurrently affected in homopolymers (in at least 2 out of 16 tumors) stratified for homopolymer length and for those affecting coding exonic regions and the 3′UTR.</title><p>The difference between observed and expected recurrent indels is high for short homopolymers, but non-existent for long homopolymers.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.026">http://dx.doi.org/10.7554/eLife.02725.026</ext-link></p></caption><graphic xlink:href="elife02725fs013"/></fig><fig id="fig4s5" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.027</object-id><label>Figure 4—figure supplement 5.</label><caption><title>Mutation patterns obtained from MLH1-deficient, MSH2-deficient, and MSH6-deficient exomes.</title><p>(<bold>A</bold>–<bold>C</bold>) Mutation frequencies. (<bold>D</bold>) Somatic substitution patterns. (<bold>E</bold>–<bold>G</bold>) Indel compositions. No obvious difference is observed.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.027">http://dx.doi.org/10.7554/eLife.02725.027</ext-link></p></caption><graphic xlink:href="elife02725fs014"/></fig></fig-group></p><p>Remarkably, 1.6% of homopolymers was recurrently affected by an indel in the 16 MMR-deficient tumors that underwent whole-genome or exome sequencing (i.e., 2244 out of 29,663 homopolymers were affected at least once, whereas 477 were affected at least twice; <xref ref-type="fig" rid="fig4s3">Figure 4—figure supplement 3</xref>). Furthermore, 34 and 10 homopolymers were affected in ≥6 or ≥8 tumors (<xref ref-type="supplementary-material" rid="SD6-data">Figure 4—source data 3</xref>). In contrast, only 55 substitutions were recurrent, three of which were found in ≥2 tumors (i.e., two substitutions affecting <italic>KRAS</italic> codon 12 and 13 were found in three and four tumors [<xref ref-type="bibr" rid="bib61">Tie et al., 2011</xref>], whereas a substitution in <italic>ZNF648</italic> affected three tumors). When comparing homopolymer content of coding regions vs UTRs, long homopolymers (&gt;10 bps) were more frequent in UTRs than in coding regions (<xref ref-type="fig" rid="fig4">Figure 4B</xref>). Because these long homopolymers were also more frequently affected (<xref ref-type="fig" rid="fig4">Figure 4C</xref>), the overall indel rate in coding regions was lower than in UTRs (<xref ref-type="fig" rid="fig4">Figure 4D</xref>). As a consequence of this difference, recurrent indels also occurred more frequently in UTRs than coding regions (31,438 vs 1337; <xref ref-type="supplementary-material" rid="SD6-data">Figure 4—source data 3</xref>). Remarkably, however, recurrent indels were more frequently observed than expected based on indel frequency in short, but not in long homopolymers (<xref ref-type="fig" rid="fig4">Figure 4E</xref>, <xref ref-type="fig" rid="fig4s4">Figure 4—figure supplement 4</xref>). This suggests that features other than homopolymer length underlie indel recurrence rates. Positive clonal selection of indels affecting short homopolymers, which are predominant in coding regions, represents a possible explanation. Very similar results were obtained when the analysis was repeated only on the 13 whole-exomes, indicating that exonic mutations identified from whole-genome sequences did not introduce any bias.</p></sec><sec id="s2-6"><title>Recurrent indels reliably detect MSI in various cancer types</title><p>The extended Bethesda panel, which consists of eight microsatellite and two homopolymer markers, is currently used to diagnostically assess MSI (<xref ref-type="bibr" rid="bib47">Pinol et al., 2005</xref>). This panel was historically compiled from a limited set of markers known to be variable. Due to their length and variability, these markers are notoriously difficult to analyze and interpret. As a consequence, the Bethesda panel has reduced sensitivity to detect MSI. In an effort to improve MSI testing, we randomly selected 59 recurrent indels affecting ≥6 out of 16 tumors; 50 markers were in 5′ or 3′UTRs and 9 were in coding regions (<xref ref-type="supplementary-material" rid="SD7-data">Figure 5—source data 1</xref>). Furthermore, each of the markers was detected in both MMR-deficient EM and CRC. To facilitate high-throughput genotyping, the maximal length of affected homopolymers was restricted to 12 bps. First, we applied these 59 markers to a discovery set of 236 EM tumors for which MMR immunohistochemistry (IHC) data were available. This allowed us to determine three positive markers as the threshold with the best Matthew correlation coefficient to detect MMR-deficiency based on IHC and thus to define MSI (<xref ref-type="fig" rid="fig5">Figure 5A,B</xref>). At this threshold, our markers detected 40 out of 41 tumors MMR-deficient on IHC (sensitivity ∼98%), while only 1 out of 184 MMR-normal tumors on IHC were identified as MSI (specificity &gt; 99%). Notably, the latter patient had a familial history of cancer within the Lynch spectrum, suggesting that the tumor indeed exhibited MSI. Secondly, after having optimized the marker threshold, a head-to-head comparison against Bethesda panel was performed in 114 independent EM tumors as a validation. When observing discordances, we assessed MMR-deficiency using IHC to address which of both MSI panels was correct. Briefly, each MSI tumor on Bethesda (&gt;2 markers positive) was also MSI with the 59-marker panel (<xref ref-type="fig" rid="fig5">Figure 5C</xref>). However, 12 tumors were positive in the 59-marker panel, but negative in Bethesda. IHC on the nine discordant tumors for which a paraffin block was available confirmed that each of them was MMR-deficient either for MLH1 or MSH2, indicating that the 59-marker panel has a higher sensitivity compared to Bethesda.<fig id="fig5" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.028</object-id><label>Figure 5.</label><caption><title>The 56-marker panel for MSI testing.</title><p>(<bold>A</bold>) Receiver–operator curve assessing the impact of the number of positive homopolymer markers (out of 59) on the sensitivity and specificity of MSI testing, based on a panel of 236 EM tumors immunohistochemically characterized for their MMR status. (<bold>B</bold>) The Matthew Correlation Coefficient (MCC) of the ROC curve was calculated for each threshold, and a threshold of 3 resulted in the highest MCC-value (MCC = 0.97). (<bold>C</bold> and <bold>D</bold>) The extended Bethesda panel and the 59-marker panel were compared in an independent series of 114 unselected primary endometrial tumors (<bold>C</bold>) and 126 stage II or III CRC tumors (<bold>D</bold>). Results were color-coded according to high microsatellite instability (MSI-H; more than 1 markers positive), low microsatellite instability (MSI-L; 1 marker positive), or microsatellite stable status (MSS; 0 markers positive) as determined with the extended Bethesda panel. For endometrial tumors, 71 tumors (62%) were defined as MSS/MSI-L and 43 tumors (38%) as MSI-H by the 59-marker panel. Out of these 43 MSI-H tumors, Bethesda identified 32 tumors as MSI-H (&gt;2 markers positive), 7 tumors as MSI-L, and 5 tumors as MSS. Vice versa, Bethesda did not identify any MSI-H tumor that was not identified by our panel. For colorectal tumors, there were 97 MSS tumors in our 59-marker panel that were concordantly called MSS or MSI-L by the Bethesda panel. The remaining 29 samples were detected as MSI in the 59-marker panel. 28 of these were also called MSI-H by the Bethesda panel, whereas one was called MSS by the Bethesda panel.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.028">http://dx.doi.org/10.7554/eLife.02725.028</ext-link></p><p><supplementary-material id="SD7-data"><object-id pub-id-type="doi">10.7554/eLife.02725.029</object-id><label>Figure 5—source data 1.</label><caption><title>Recurrent indels selected for the 59-marker MSI panel and the results of a logistic regression analysis to detect differences between MSI-H and MSI-L/MSS tumors.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.029">http://dx.doi.org/10.7554/eLife.02725.029</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s007.xlsx"/></supplementary-material></p><p><supplementary-material id="SD8-data"><object-id pub-id-type="doi">10.7554/eLife.02725.030</object-id><label>Figure 5—source data 2.</label><caption><title>Clinical information, MMR-mutation status and sequencing statistics for ovarian tumors and leukemias.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.030">http://dx.doi.org/10.7554/eLife.02725.030</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s008.xlsx"/></supplementary-material></p></caption><graphic xlink:href="elife02725f005"/></fig></p><p>Likewise, we assessed MSI in 126 stage II or III CRC tumors. Each of the 28 MSI tumors on Bethesda was also positive with our 59-marker panel. In contrast, one tumor was MSI-positive in the 59-marker panel but not in the Bethesda panel (<xref ref-type="fig" rid="fig5">Figure 5D</xref>). This tumor contained a V600E BRAF mutation and was MLH1 hypermethylated, indicating that it was MMR-deficient and that our panel was also more sensitive for CRC (<xref ref-type="bibr" rid="bib8">Deng et al., 2004</xref>). Finally, we also assessed whether our 59-marker panel can detect MSI in other cancer types. In a limited set of ovarian tumors and leukemias, we indeed correctly identified MSI in each of the samples tested (<xref ref-type="supplementary-material" rid="SD8-data">Figure 5—source data 2</xref>).</p></sec><sec id="s2-7"><title>MMR-deficient tumors are enriched in indels affecting DSB repair</title><p>Since we observed clear signs of clonal indel selection in MMR-deficient tumors, we assessed whether specific pathways were enriched for indels. We focused on frameshift indels in exons and exon/intron boundaries as they represent loss-of-function mutations (<xref ref-type="bibr" rid="bib17">Ham et al., 2006</xref>), and thus have a less ambiguous functional impact than indels in UTRs. On average, each MMR-deficient tumor contained 472 such indels, 59 of which were recurrent indels. Pathway analyses using IPA of all genes affected by a somatic indel, excluding the core MMR genes, ranked the ‘<italic>Role of BRCA1 in DNA damage response’</italic> as the top enriched pathway. IPA analysis of genes affected by recurrent indels moreover revealed that the ‘<italic>Double-strand break repair by homologous recombination</italic>’ pathway (<italic>DSBR by HR</italic>) ranked top (<xref ref-type="table" rid="tbl2">Table 2</xref>). We also performed pathway analyses using the more advanced GenomeMuSiC, which takes background mutation rates into account and assigns weights depending on the number of tumors and genes affected in a given pathway. GenomeMuSiC analyses based on either the independently assembled Reactome or BioCarta pathway databases, ranked respectively the ‘<italic>ATR/BRCA pathway</italic> and the <italic>DNA repair’</italic> pathway first, with the more specific ‘<italic>Homologous recombination repair’</italic> pathway ranking third in the latter (<xref ref-type="table" rid="tbl2">Table 2</xref>). Based on an expert curated DNA repair database (DNARepairDB), ‘<italic>Homologous recombination’</italic> represented the only DNA repair pathway that was significantly enriched in indels. Since each pathway database differed with respect to the genes included, we finally compiled a literature-based set of genes with proven involvement in <italic>DSBR by HR</italic>, allowing us to more accurately estimate that each MMR-deficient tumor on average contained 3.3 ± 0.4 indels in the ‘<italic>DSBR by HR’</italic> pathway (<xref ref-type="table" rid="tbl2">Table 2</xref>, <xref ref-type="supplementary-material" rid="SD9-data">Table 2—source data 1</xref>). Notably, none of the top-ranking pathways for any of the databases contained significantly more homopolymers in their genes than expected.<table-wrap id="tbl2" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.031</object-id><label>Table 2.</label><caption><p>Pathways most significantly affected by exonic indels</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.031">http://dx.doi.org/10.7554/eLife.02725.031</ext-link></p><p><supplementary-material id="SD9-data"><object-id pub-id-type="doi">10.7554/eLife.02725.032</object-id><label>Table 2—source data 1.</label><caption><title>Results of pathway enrichments, custom definition of the DSBR by HR pathway and the allelic frequencies of mutations in HR genes.</title><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.032">http://dx.doi.org/10.7554/eLife.02725.032</ext-link></p></caption><media mime-subtype="xlsx" mimetype="application" xlink:href="elife02725s009.xlsx"/></supplementary-material></p></caption><table frame="hsides" rules="groups"><thead><tr><th align="center">Database</th><th align="center">Pathway</th><th align="center">Rank</th><th align="center">FDR</th><th align="center">Affected samples (n = 16)</th><th align="center">Mutations per sample</th></tr></thead><tbody><tr><td/><td><xref ref-type="table-fn" rid="tblfn2">*</xref>DSBR by HR (custom definition)</td><td>n.a.</td><td>n.a.</td><td align="char" char=".">16</td><td align="char" char=".">3.25</td></tr><tr><td rowspan="5">BioCarta (ranking by GenomeMusic)</td><td><xref ref-type="table-fn" rid="tblfn2">*</xref>ATR/BRCA pathway</td><td align="char" char=".">1</td><td>1.0E−16</td><td align="char" char=".">15</td><td align="char" char=".">3.50</td></tr><tr><td>ATM pathway</td><td align="char" char=".">2</td><td>5.9E−11</td><td align="char" char=".">15</td><td align="char" char=".">2.69</td></tr><tr><td>G2 pathway</td><td align="char" char=".">3</td><td>7.2E−08</td><td align="char" char=".">15</td><td align="char" char=".">2.81</td></tr><tr><td>IL10 pathway</td><td align="char" char=".">4</td><td>2.2E−05</td><td align="char" char=".">12</td><td align="char" char=".">1.75</td></tr><tr><td>CARM1 and regulation of the Estrogen Receptor pathway</td><td align="char" char=".">5</td><td>2.2E−05</td><td align="char" char=".">14</td><td align="char" char=".">3.19</td></tr><tr><td rowspan="4">DNA Repair DB (ranking by GenomeMusic)</td><td><xref ref-type="table-fn" rid="tblfn2">*</xref>Homologous recombination pathway</td><td align="char" char=".">1</td><td>1.3E−04</td><td align="char" char=".">13</td><td align="char" char=".">1.56</td></tr><tr><td>Base excision repair pathway</td><td align="char" char=".">2</td><td>9.0E−02</td><td align="char" char=".">10</td><td align="char" char=".">0.75</td></tr><tr><td>Non-homologous end joining pathway</td><td align="char" char=".">3</td><td>1.7E−01</td><td align="char" char=".">9</td><td align="char" char=".">0.69</td></tr><tr><td>Nucleotide excision repair pathway</td><td align="char" char=".">4</td><td>8.3E−01</td><td align="char" char=".">7</td><td align="char" char=".">0.50</td></tr><tr><td rowspan="8">Reactome (ranking by GenomeMusic)</td><td>DNA repair</td><td align="char" char=".">1</td><td>2.5E−11</td><td align="char" char=".">15</td><td align="char" char=".">6.69</td></tr><tr><td>Double strand break repair</td><td align="char" char=".">2</td><td>7.2E−08</td><td align="char" char=".">15</td><td align="char" char=".">2.94</td></tr><tr><td><xref ref-type="table-fn" rid="tblfn2">*</xref>Homologous recombination repair</td><td align="char" char=".">3</td><td>1.9E−07</td><td align="char" char=".">15</td><td align="char" char=".">2.31</td></tr><tr><td>G2/M checkpoints</td><td align="char" char=".">4</td><td>2.3E−07</td><td align="char" char=".">15</td><td align="char" char=".">3.50</td></tr><tr><td>Cell cycle checkpoints</td><td align="char" char=".">5</td><td>4.5E−05</td><td align="char" char=".">15</td><td align="char" char=".">4.75</td></tr><tr><td>Base excision repair</td><td align="char" char=".">15</td><td>8.3E−03</td><td align="char" char=".">10</td><td align="char" char=".">0.94</td></tr><tr><td>Non-homologous end joining</td><td align="char" char=".">59</td><td>1.0E+00</td><td align="char" char=".">8</td><td align="char" char=".">0.63</td></tr><tr><td>Nucleotide excision repair</td><td align="char" char=".">61</td><td>5.9E−01</td><td align="char" char=".">10</td><td align="char" char=".">1.50</td></tr><tr><td rowspan="6">IPA (ranking by IPA)</td><td><xref ref-type="table-fn" rid="tblfn2">*</xref>DNA double-strand break repair by homologous recombination</td><td align="char" char=".">1</td><td>4.7E−03</td><td align="char" char=".">15</td><td align="char" char=".">1.56</td></tr><tr><td>Ovarian cancer signaling</td><td/><td>4.7E−03</td><td align="char" char=".">16</td><td align="char" char=".">5.75</td></tr><tr><td>Role of NFAT in cardiac hypertrophy</td><td align="char" char=".">3</td><td>6.8E−03</td><td align="char" char=".">14</td><td align="char" char=".">3.88</td></tr><tr><td>Cell cycle: G2/M DNA damage checkpoint regulation</td><td align="char" char=".">4</td><td>1.3E−02</td><td align="char" char=".">15</td><td align="char" char=".">2.88</td></tr><tr><td>PPARα/RXRα activation</td><td align="char" char=".">5</td><td>1.4E−02</td><td align="char" char=".">15</td><td align="char" char=".">4.63</td></tr><tr><td>DNA double-strand break repair by non-homologous end joining</td><td align="char" char=".">60</td><td>1.7E−01</td><td align="char" char=".">14</td><td align="char" char=".">1.50</td></tr></tbody></table><table-wrap-foot><fn><p>The five top ranking pathways are listed, as well as all annotated pathways relevant for DNA repair. The custom definition used throughout this manuscript was added for illustrative purposes. n.a. = not applicable.</p></fn><fn id="tblfn2"><label>*</label><p>The DSBR by HR pathway.</p></fn></table-wrap-foot></table-wrap></p><p>In an effort to replicate these findings, we analyzed mutation data of 27 CRC and 65 EM tumors with MSI sequenced by The Cancer Genome Atlas (<xref ref-type="bibr" rid="bib60">TCGA, 2012</xref>; <xref ref-type="bibr" rid="bib26">Kandoth et al., 2013</xref>). Although most of these tumors were sequenced at low coverage depth, we identified 2183 and 3138 mutated genes from respectively the CRC and EM tumor data sets. IPA analysis confirmed that the <italic>Role of BRCA1 in DNA damage response</italic> was again amongst the top enriched pathways for each of the data sets. The corresponding p-values were 9.06E−3 and 2.97E−4, although only the latter survived multiple testing correction (p = 0.022; <xref ref-type="supplementary-material" rid="SD9-data">Table 2—source data 1</xref>). As raw data sets were not accessible, the more sensitive GenomeMuSiC could not be used.</p></sec><sec id="s2-8"><title>Reduced DSBR by HR activity in primary MMR-deficient cells</title><p>Homozygous mutations affecting genes in the <italic>DSBR by HR</italic> pathway cause DSB repair defects reminiscent of BRCA1 or BRCA2 loss, a phenotypic feature dubbed <italic>BRCAness</italic> (<xref ref-type="bibr" rid="bib34">McCabe et al., 2006</xref>). Having established that MMR-deficient tumors are enriched in heterozygous frameshift mutations in the <italic>DSBR by HR</italic> pathway, we investigated the functional impact of these events. First, we confirmed that indels affecting the <italic>DSBR by HR</italic> pathway were located in the major tumor subclone (<xref ref-type="table" rid="tbl2">Table 2</xref>, <xref ref-type="supplementary-material" rid="SD9-data">Table 2—source data 1</xref>). Then, we analyzed HR in seven MMR-deficient and four MMR-proficient patient-derived primary tumor cultures. We exposed these cultures to the PARP inhibitor olaparib, which induces DSBs upon DNA replication through single-strand break repair inhibition, and to mitomycin C, which induces DSBs through DNA cross-links and replication fork collapse (<xref ref-type="bibr" rid="bib4">Bunting et al., 2012</xref>). We then quantified the relative number of cells with γH2AX- and RAD51-positive foci, respectively, as a measure of induced DSBs and ongoing HR. Exposure to olaparib or mitomycin C triggered an increase in γH2AX-foci in all tumor cultures, regardless of MMR status. In contrast, although RAD51 foci formation was evident in MMR-deficient and MMR-proficient cultures, the increase was far less pronounced in MMR-deficient cultures (<xref ref-type="fig" rid="fig6">Figure 6A,B</xref>), and this for both olaparib (p = 0.021) and mitomycin C (p = 0.006) exposure. The reduction in RAD51 foci could not be ascribed to differences in RAD51 protein expression or differences in cell cycle between MMR-deficient and -proficient cells, as these were similar between both sets of cultures, under both treated and untreated conditions (<xref ref-type="fig" rid="fig6s1 fig6s2 fig6s3">Figure 6—figure supplements 1–3</xref>). Since RAD51 foci are completely absent upon PARP inhibition in cells with homozygous loss of <italic>BRCA1</italic>, but not affected in heterozygous mutation carriers (<xref ref-type="bibr" rid="bib15">Farmer et al., 2005</xref>), these ex vivo data suggest that the accumulation of indels in MMR-deficient tumors gradually impairs the <italic>DSBR by HR</italic> pathway to a level that is intermediate to that of cells heterozygous- and homozygous-deficient for BRCA1.<fig-group><fig id="fig6" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.033</object-id><label>Figure 6.</label><caption><title>Reduced DSBR by HR activity in MMR-deficient cells.</title><p>(<bold>A</bold>) Representative confocal images of MMR-deficient and MMR-proficient primary tumor cells exposed for 24 hr to vehicle, 26 μM olaparib, or 300 nM mitomycin C stained for the homologous repair marker RAD51 (green), the DNA damage marker γH2AX (red), and counterstained with DAPI (blue). The bar is 10 µm wide. (<bold>B</bold>) Quantification of cells containing &gt;5 RAD51 or γH2AX foci. Averages are shown for MMR-deficient and MMR-proficient primary tumor cultures after 24 hr of treatment with vehicle, 26 μM olaparib or 300 nM mitomycin C.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.033">http://dx.doi.org/10.7554/eLife.02725.033</ext-link></p></caption><graphic xlink:href="elife02725f006"/></fig><fig id="fig6s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.034</object-id><label>Figure 6—figure supplement 1.</label><caption><title>Cell cycle distribution in untreated MMR-deficient and MMR-proficient cell cultures.</title><p>No difference was observed in G1, S, or G2/M phase frequency between 7 MMR-deficient and 4 MMR-proficient cultures (p = 0.45, 0.30 or 0.94).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.034">http://dx.doi.org/10.7554/eLife.02725.034</ext-link></p></caption><graphic xlink:href="elife02725fs015"/></fig><fig id="fig6s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.035</object-id><label>Figure 6—figure supplement 2.</label><caption><title>MMR-deficient tumor cultures were challenged with olaparib (26 μM), camptothecin (30 nM), or mitomycin C (300 nM) for 24 hr, pulsed with BrdU for 2 hr and analyzed for cell cycle by propidium iodide staining (DNA content analysis) using flow cytometry.</title><p>The bar plot shows the fraction of unlabeled (arrested) cells in S and G2/M, normalized to the G1 fraction; bars indicate SEM; data represent the results from 7 cultures. All experiments were repeated twice. DNA damage provoked by exposure to camptothecin consistently increased stalled (BrdU-negative) cells in S phase (average 13-fold increase; p = 5.23E−5). Mitomycin C caused an increase of stalled cells in S phase (3.08-fold; p = 5.8E−3) and in G2/M phase (3.12-fold; p = 2.2E−7). Olaparib induced, as expected, an increase in stalled cells in S and G2/M (respectively, a 3.35 and a 2.54-fold increase; p = 2.1E−3 and 5.2E−4). Overall, this indicates that MMR-deficient cultures did not exhibit any loss of G2/M cell cycle checkpoints or DNA damage signaling.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.035">http://dx.doi.org/10.7554/eLife.02725.035</ext-link></p></caption><graphic xlink:href="elife02725fs016"/></fig><fig id="fig6s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.036</object-id><label>Figure 6—figure supplement 3.</label><caption><title>Example of a 2 hr BrdU pulse-labeled MMR-deficient cell culture, demonstrating S-phase stalling and G2/M stalling upon mitomycin C exposure, S-phase stalling upon camptothecin exposure and S-phase stalling and G2/M stalling upon olaparib exposure.</title><p>Cell cycle phases in unlabeled (stalled) fractions were determined as described by Watson et al.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.036">http://dx.doi.org/10.7554/eLife.02725.036</ext-link></p></caption><graphic xlink:href="elife02725fs017"/></fig></fig-group></p></sec><sec id="s2-9"><title>DSB inducers sensitize MMR-deficient tumors</title><p>As MMR-deficient tumors are compromised in their <italic>DSBR by HR</italic> activity, we wondered whether these tumors, similar to BRCA1-deficient tumors (<xref ref-type="bibr" rid="bib15">Farmer et al., 2005</xref>), are more sensitive to agents that induce DSBs. First, since PARP inhibitors are already used in clinical practice, all seven MMR-deficient and four MMR-proficient cultures were dose-dependently exposed to olaparib. This revealed that MMR-deficient cultures exhibited a dose-dependent decrease in proliferation upon exposure to olaparib, whereas MMR-proficient cultures were only affected at higher concentrations. Likewise, cell cytotoxicity assays revealed a dose-dependent sensitivity of MMR-deficient cells to olaparib that was more pronounced than in MMR-proficient cells (50% growth inhibition [GI50]) was reached at 26 µM vs 129 µM, respectively, p = 0.0064 (<xref ref-type="fig" rid="fig7">Figure 7A,B</xref>, <xref ref-type="fig" rid="fig7s1">Figure 7—figure supplement 1</xref>). Other DSB-inducing compounds such as mitomycin C or ionizing radiation similarly proved more detrimental for MMR-deficient than MMR-proficient cells (<xref ref-type="fig" rid="fig7">Figure 7B</xref>). In contrast, cytotoxicities of other chemotherapeutic compounds such as paclitaxel were comparable between both groups.<fig-group><fig id="fig7" position="float"><object-id pub-id-type="doi">10.7554/eLife.02725.037</object-id><label>Figure 7.</label><caption><title>MMR-deficient cells are sensitive to PARP inhibition.</title><p>(<bold>A</bold>) Dosimetry experiments assessing the effect of decreasing concentrations of olaparib on in vitro cell proliferation relative to the corresponding untreated cultures as measured by sulforhodamine B assays. (<bold>B</bold>) Cytotoxicity of olaparib, mitomycin C, ionizing radiation and paclitaxel as measured by sulforhodamine B assays. Displayed are the average concentrations (μM) or dose (Grey, Gy) that inhibit 50% of the normal growth. p-values are 0.0077, 0.040, and 0.038 for olaparib, mitomycin C, and ionizing radiation, while p-value is not significant (NS) for paclitaxel. (<bold>C</bold>) Effect of knock-down of BRCA1, BRCA2, and ATR mRNA on olaparib sensitivity of the MMR-proficient, HR-proficient MCF7 cell line. Cells were transfected with the indicated siRNA concentration (X axis), and after 24 hr incubated with 26 µM olaparib or vehicle. Another 48 hr later, cell viability was assessed using the sulforhodamine B assay. The siRNA concentration corresponding to a growth inhibition of 50% was subsequently assessed for the level of knock-down induced. The resulting values are indicated on the plots and are expressed as %. Values plotted were normalized to vehicle-treated cells transfected with a scrambled siRNA of matching concentration.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.037">http://dx.doi.org/10.7554/eLife.02725.037</ext-link></p></caption><graphic xlink:href="elife02725f007"/></fig><fig id="fig7s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.02725.038</object-id><label>Figure 7—figure supplement 1.</label><caption><title>Cell proliferation of MMR-deficient cultures was measured in real-time using the xCELLigence RTCA DP system (for up to 48 hr after treatment).</title><p>Values are normalized to the vehicle-treated control. Error bars represent SEM. The average cell proliferation of 7 MMR-deficient cells (<bold>A</bold>) and 4 MMR-proficient cells (<bold>B</bold>) with increasing concentrations of olaparib (1 μM, 3 μM, 10 μM) is shown. Overall, MMR-deficient cultures were characterized by a dose-dependent decrease in proliferation, whereas MMR-proficient cells did not response to olaparib.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.038">http://dx.doi.org/10.7554/eLife.02725.038</ext-link></p></caption><graphic xlink:href="elife02725fs018"/></fig></fig-group></p><p>Finally, in order to more accurately measure the level of HR-deficiency in MMR-deficient tumors, we assessed the level of knock-down of BRCA1, BRCA2, and ATR needed to achieve an olaparib sensitivity similar to that observed in MMR-deficient cells, that is, a GI50 of 26 µM. BRCA1, BRCA2, or ATR expression was dose-dependently reduced using siRNAs in the MMR- and HR-proficient cell line, MCF7. A growth inhibition of 50% was reached in MCF7 cells when applying 5.9 nM ATR, 0.88 nM BRCA1 or 0.41 nM BRCA2 siRNA, corresponding respectively to a reduction in expression of 69.5 ± 1.1%, 76.1 ± 4.4%, and 80.0 ± 2.4% (<xref ref-type="fig" rid="fig7">Figure 7C</xref>). These data thus suggest that the loss of <italic>DSBR by HR</italic> activity in MMR-deficient tumors corresponds to a loss of about 75–80% BRCA1 or BRCA2 expression.</p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>Here, we surveyed whole-genomes of MMR-deficient tumors to provide a comprehensive picture of the mutations associated with human MMR-deficiency. With respect to somatic substitutions, we observed that the majority represented transitions and not transversions, and that adjacent nucleotides and various genomic features had an important context-dependent effect on determining which nucleotides were affected. Remarkably, the observed substitution pattern, in particular how it was impacted by small and large-scale contexts, was very similar to that in the germ-line at different time scales: for germ-line substitutions as they currently arise (de novo), as they have accumulated in the human population or as they served as a substrate for human-chimpanzee divergence (<xref ref-type="bibr" rid="bib23">Hodgkinson and Eyre-Walker, 2011</xref>). Our observations thus suggest that, similar to bacterial populations and other lower organisms (<xref ref-type="bibr" rid="bib53">Saint-Ruf and Matic, 2006</xref>), incomplete mismatch repair in humans contributes significantly to genetic variability and probably also to natural selection through genetic adaptation. Additionally, our data provide fundamental insights into the function of the MMR machinery. We observed, for instance, a higher number of substitutions in methylated CpG sequences, implicating MMR in the repair of methylated cytosine deamination and demonstrating that MMR disconnected from the replication fork is also critical to maintain genomic integrity.</p><p>At the whole-genome level, ∼80% of somatic mutations represented indels. Although indel detection using high-throughput sequencing is burdened with high false-positive rates, 88.0% of the indels identified here validated favorably using orthogonal technologies. When focusing on the clinical relevance of indel mutation patterns to diagnose MSI, we observed that indels specifically affected homopolymer stretches, which is relevant as the extended Bethesda panel consists of eight microsatellite and only two homopolymer markers and possibly therefore has only limited sensitivity relative to IHC (∼75% for both EM and CRC tumors [<xref ref-type="bibr" rid="bib18">Hampel et al., 2005</xref>, <xref ref-type="bibr" rid="bib19">2006</xref>, <xref ref-type="bibr" rid="bib20">2008</xref>]). Our 59-marker panel consisting only of markers in homopolymers was clearly more sensitive than Bethesda, yielding sensitivity rates of 87% relative to IHC. This was not due to the fact that we genotyped more markers than Bethesda, as restricting our panel to 10 markers still resulted in a sensitivity rate of 85% (data not shown). Furthermore, since our panel was based on recurrent mutations present in both CRC and EM, and since 50 out of 59 markers were located in UTRs, which are less likely to drive clonal selection and thus to represent tissue-specific events, it could be used to detect MSI in cancers affecting various tissues. Finally, since all markers were located in homopolymers ≤12 bps in length, they are, in contrast to the 25 or 26 bps markers from Bethesda, compatible with various low- to high-throughput genotyping technologies, thereby greatly facilitating their clinical adoption. For instance, we were able to multiplex all 59 markers in just five PCR amplification reactions compatible with Sequenom MassArray genotyping.</p><p>Pathway analyses on all genes affected by exonic indels further revealed that the <italic>DSBR by HR</italic> pathway was enriched for somatic indels. Although mutations in genes involved in this pathway, such as <italic>MRE11A</italic> or <italic>RAD50</italic>, have previously been reported in MMR-deficient tumors, these studies focused on specific mutations in individual genes rather than on pathways, and for this reason could establish that only a fraction of MMR-deficient tumors was affected by mutations in these genes (<xref ref-type="bibr" rid="bib37">Miquel et al., 2007</xref>). In contrast, our study identified that every MMR-deficient tumors was affected by on average 3.3 somatic indels in the <italic>DSBR by HR</italic> pathway. Furthermore, although it is well established that cells deficient in BRCA1, BRCA2, Fanconi anemia, or other HR-related genes are hypersensitive to DSB inducers (<xref ref-type="bibr" rid="bib39">Murai et al., 2012</xref>), as for instance, synthetic lethality in BRCA1- or BRCA2-deficient tumors through PARP inhibition is already approved as therapy in breast and ovarian cancer (<xref ref-type="bibr" rid="bib36">Metzger-Filho et al., 2012</xref>), data demonstrating sensitivity of MMR-deficient cells to DSB inducers have not been conclusive (<xref ref-type="bibr" rid="bib58">Takahashi et al., 2011</xref>; <xref ref-type="bibr" rid="bib64">Vilar et al., 2011</xref>; <xref ref-type="bibr" rid="bib43">Park et al., 2013</xref>). For instance, although there are some reports highlighting the sensitivity of MSH3-deficient cell lines to DSB inducers, this appeared to occur through a non-canonical MMR pathway, as MLH1 was not involved in this process (<xref ref-type="bibr" rid="bib58">Takahashi et al., 2011</xref>; <xref ref-type="bibr" rid="bib43">Park et al., 2013</xref>). Furthermore, the only clinical study set-up so far to explore efficacy of PARP inhibitors as a single-agent therapy in previously treated patients with metastatic CRC stratified by MSI status, was unfortunately delayed due to patient accrual issues.</p><p>Our hypothesis-free discovery that <italic>DSBR by HR</italic> is the top pathway affected by heterozygous loss-of-function mutations in MMR-deficient tumors, both in our own data set and TCGA, also suggests that mutations in <italic>DSBR by HR</italic> genes converge in an oligogenic model, wherein the number of indels dose-dependently decreases <italic>DSBR by HR</italic> activity, thereby rendering them gradually more sensitive to DSB inducers. As a result of this double-hit, our ex vivo culture experiments are, however, difficult to compare to experiments relying on genotype-matched cells that have a single hit in the MMR or HR pathway. In addition, since <italic>MMR</italic> and <italic>DSB by HR</italic> pathway activities are not characterized in a clinical setting, it is difficult to relate our data to clinical studies assessing the outcome of therapeutics such as cisplatin or 5-fluorouracil, which have potential opposing activities on MMR- and HR-deficient tumors.</p><p>Clinical studies are therefore needed to assess whether DSB inducers, such as PARP inhibitors, are indeed also effective in MSI tumors. In particular, since on average 3.3 heterozygous loss-of-function mutations only partially inactivate the DSB repair by HR pathway (∼80% inactivation), it remains to be seen whether, compared to BRCA1 or BRCA2-deficient tumors, in which the HR pathway is completely inactivated, clinically relevant benefits are also achievable in MSI tumors. Possibly, only those MMR-deficient tumors containing large numbers of indels (≥5) in the <italic>DSBR by HR</italic> pathway will show a significant response. Nevertheless, there is a great clinical need for novel treatment options in MSI tumors. Indeed, although stage II or III CRC tumors with MSI are characterized by a modestly improved prognosis, MSI tumors in the advanced setting are generally associated with a more peritoneal metastasis and a worse overall survival independent of the chemotherapy regimen (<xref ref-type="bibr" rid="bib55">Smith et al., 2013</xref>; <xref ref-type="bibr" rid="bib67">Yoon et al., 2013</xref>). Our observations thus clearly warrant novel clinical studies assessing the therapeutic efficacy of DSB inducers in MMR-deficient tumors.</p></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><sec id="s4-1"><title>Standard diagnostic tests for MMR-deficiency</title><p>To assess MLH1-, MSH2-, and MSH6-deficiency immunohistochemistry using monoclonal antibodies against MLH1 (clone ES05; DAKO, Heverlee, Belgium), MSH2 (clone G219-1129; BD Pharmagen, Erembodegem, Belgium), and MSH6 (clone EP49; Epitomics, Burlingame, USA) were applied. Absence of nuclear staining in tumor cells and normal staining in the surrounding normal tissue were considered as MMR-deficient. Methylation of the <italic>MLH1</italic> promoter was determined using the SALSA MS-MLPA KIT (MRC-Holland, Amsterdam, The Netherlands). PCR reaction fragments covering the Deng C and Deng D regions were separated by capillary gel electrophoresis (ABI 3130; Applied Biosystems, Ghent, Belgium) and quantified using the Genemarker (v1.91) software (Softgenetics). MSI status was detected by the extended Bethesda panel using capillary gel electrophoresis, as described previously (<xref ref-type="bibr" rid="bib10">Dietmaier et al., 1997</xref>; <xref ref-type="bibr" rid="bib3">Boland et al., 1998</xref>).</p></sec><sec id="s4-2"><title>Sample selection and preparation</title><p>We selected 17 endometrial, three colorectal, and two ovarian tumor–normal pairs for either whole-genome or whole-exome sequencing. Samples were all chemo-naive. DNA was derived from fresh frozen, primary tumors. Matched normal DNA for these 22 samples was extracted from peripheral white blood cells.</p></sec><sec id="s4-3"><title>Whole-genome sequencing, analysis, and annotation</title><p>Five tumor–normal pairs were selected for whole-genome sequencing. Paired-end sequencing was performed using the Complete Genomics service (CG, Mountain View, California, USA) as described in <xref ref-type="bibr" rid="bib13">Drmanac et al. (2010)</xref> or by Illumina HiSeq2000. For CG sequencing, reads were initially mapped to the reference genome (hg18) using Complete Genomics' CGAtools. Between 207 and 338 Gb of sequencing data were obtained, resulting in a haploid coverage between 73× and 119×. Approximately, 2.7 × 10<sup>9</sup> bases were called in each genome, representing ∼95% of the total genome and ∼97% of the exome. Substitutions and indels were called by the variant caller in the CGAtools. On average, 3,132,715 substitutions and 357,153 indels were detected in each genome. The CGAtool (v1.0.3.9) <italic>calldiff</italic> method was used to detect somatic mutations in the tumor–normal pairs. For Illumina sequencing, 2 × 100 bp paired-end sequencing was performed, yielding 25–30x coverage per sample. Burrows-Wheeler Alignment (BWA) was used to align the raw reads to the reference genome (hg19) (<xref ref-type="bibr" rid="bib31">Li and Durbin, 2010</xref>). PCR duplicates were removed with Picard MarkDuplicates (v1.32). Base recalibration, local realignment around indels and single nucleotide variant calling were performed using the GenomeAnalysisToolKit (GATK v1.0.4487) (<xref ref-type="bibr" rid="bib35">McKenna et al., 2010</xref>). Small indels were detected using Dindel (v1.01) (<xref ref-type="bibr" rid="bib2">Albers et al., 2011</xref>). Substitutions and indels with quality score &gt;Q30 were considered. On average, 3,977,086 substitutions and 837,915 indels were detected in each genome. Somatic mutations were detected by means of <italic>intersectBed</italic> command of BEDTools (v2.12.0) (<xref ref-type="bibr" rid="bib50">Quinlan and Hall, 2010</xref>). Raw data for all whole-genomes are available under restricted access in the European Genome-Phenome Archive (EGA) with accession number EGAS00001000182.</p><p>Sequence data were annotated using ANNOVAR (v2013Jun21) and the UCSC RefGene annotation track. Germ-line substitutions and indels were eliminated from the list of somatic mutations using the following publicly available datasets: (i) common SNPs in dbSNP (v132) with a minor allele frequency of &gt;1%, (ii) substitutions identified in the November 2010 release of the 1000 Genomes Project, (iii) the Axiom Genotype Data Set containing common SNPs from 1261 HapMap3 individuals in 11 populations, and (iv) variant data identified in 46 HapMap individuals (CG diversity panel). Somatic mutations were validated using Sequenom MassARRAY genotyping, as previously described (<xref ref-type="bibr" rid="bib51">Reumers et al., 2011</xref>). Details of validation experiments are shown in <xref ref-type="supplementary-material" rid="SD3-data">Figure 1—source data 3</xref>. A quality score method to enrich for true somatic mutations by defining a threshold that differentiates false-positive and true-positive variants based on Sequenom validation data was applied to CG genomes and increased the validation rate for substitutions from 93.5%, 71.4%, and 55.6% to 97.7%, 100%, and 73.3% for MMR− 1, MMR+ 1, and MMR+, 2 respectively. Detailed data of all somatic mutations are in <xref ref-type="supplementary-material" rid="SD1-data">Figure 1—source data 1</xref> and <xref ref-type="supplementary-material" rid="SD2-data">Figure 1—source data 2</xref>. Copy number status of the sequenced tumors was determined by Illumina CytoSNP-12 chips and analyzed using the ASCAT algorithm (<xref ref-type="bibr" rid="bib62">Van Loo et al., 2010</xref>). Copy number status of the five whole-genomes was shown as <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplement 3</xref>.</p></sec><sec id="s4-4"><title>Genome annotation</title><p>The genome was annotated into the following functional genomic regions: (coding) exonic regions (1.12%), intronic regions (34.01%), 3′ untranslated regions (3′UTR, 0.78%), 5′ untranslated regions (5’UTR, 0.14%), noncoding RNA (ncRNA, 2.81%), upstream genic regions (defined as 1 kb before the start of the gene, 0.58%), downstream genic regions (defined as 1 kb after the end of the gene, 0.58%), and intergenic regions (59.98%).</p></sec><sec id="s4-5"><title>Evidence of negative clonal selection</title><p>Overall mutation frequencies were defined as the number of somatic mutations per base (mpb) in a given genomic region. To assess negative selection in the exome, we checked whether (i) there was a lower mutation frequency in the exome relative to the whole-genome, and whether (ii) the frequency of somatic mutations was more prominently decreased in the exome. As homopolymers in exomes have characteristics that differ from those in the rest of the genome in terms of number, base composition and length, we corrected indel frequencies for these confounding factors. We calculated the frequency of affected homopolymers for each genomic location (<bold>t</bold>: exonic, 5′UTR, 3′UTR, intronic, intergenic, or genomic), for each type of homopolymer (<bold>AT</bold> or CG composition) and each homopolymer length (6, 7, 8, etc[<bold>l</bold>]). <sup><italic>AT</italic></sup><italic>Freq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> <italic>=</italic> <sup><italic>AT</italic></sup><sub><italic>aff</italic></sub><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub>. Next, we calculate the relative increase of observed frequencies relative to the frequency observed at the genome-wide level: <sup><italic>AT</italic></sup><italic>rFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> = <sup><italic>AT</italic></sup><italic>Freq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub><italic>/</italic><sup><italic>AT</italic></sup><italic>Freq</italic><sup><italic>genome</italic></sup><sub><italic>l</italic></sub>. The frequency <sup><italic>AT</italic></sup><italic>rFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> was normalized for the number of homopolymers of a given length l, for each genomic location t and for homopolymer composition (<sup><italic>AT</italic></sup><italic>wrFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> = <sup><italic>AT</italic></sup><italic>rFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> <italic>×</italic> <sup><italic>AT</italic></sup><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub><italic>/∑</italic> <sup><italic>AT</italic></sup><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub>), and further normalized for the number of AT (or GC) homopolymers for each genomic location and homopolymer length (<sup><italic>AT</italic></sup><italic>nwrFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> = <sup><italic>AT</italic></sup><italic>wrFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> × <sup><italic>AT</italic></sup><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub><italic>/(</italic><sup><italic>AT</italic></sup><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> <italic>+</italic> <sup><italic>CG</italic></sup><italic>n</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub><italic>)</italic>). All the weighted frequencies are then summed for every genomic location (<italic>cFreq</italic><sup><italic>t</italic></sup> = <italic>∑</italic> <sup><italic>AT</italic></sup><italic>nwrFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub> <italic>+ ∑</italic> <sup><italic>CG</italic></sup><italic>nwrFreq</italic><sup><italic>t</italic></sup><sub><italic>l</italic></sub>) and divided by the overall summed genomic frequency (<italic>rFreq = cFreq</italic><sup><italic>t</italic></sup><italic>/cFreq</italic><sup><italic>genomic</italic></sup>).</p></sec><sec id="s4-6"><title>Data sets of germ-line and somatic variants</title><p>The following datasets were used: (i) the 1000 Genomes Project containing common variants with a minor allellic frequency &gt;10%, (ii) all germ-line variants identified in the 3 MMR-deficient tumors sequenced in this study, (iii) de novo mutations from 83 trios as published by <xref ref-type="bibr" rid="bib5">Campbell et al. (2012)</xref> and <xref ref-type="bibr" rid="bib28">Kong et al. (2012)</xref>, and (iv) a human-chimp divergence set of substitutions as previously described (<xref ref-type="bibr" rid="bib57">Stamatoyannopoulos et al., 2009</xref>). Somatic mutations identified in other tumor whole-genomes were: (i) BRCA-deficient breast cancer tumors as published by <xref ref-type="bibr" rid="bib41">Nik-Zainal et al. (2012)</xref>, (ii) MMR-proficient endometrial tumors sequenced in this study, (iii) melanoma genomes as published by <xref ref-type="bibr" rid="bib48">Pleasance et al. (2010)</xref>, and (iv) small cell lung cancer (SCLC) as published by <xref ref-type="bibr" rid="bib49">Pleasance et al. (2010)</xref>.</p></sec><sec id="s4-7"><title>Genomic features postulated to underlie the systematic variation of mutation rates</title><p>The distance to telomere was defined as the distance from the middle of the 1 Mb window to the beginning or the end of the chromosome whichever was the shortest. Replication time was considered as published by <xref ref-type="bibr" rid="bib6">Chen et al. (2010)</xref>. Simple repeats represented the number of homopolymer and microsatellite bases. GC% was calculated as (G+C)/(A+T+G+C), CpG content as the number of CG dinucleotide bases, CpG islands as the number of bases belonging to CpG islands, gene content as the number of bases belonging to each genomic region. DNase hypersensitivity (DNAseI size) and nuclear lamina binding sites were downloaded from UCSC and the number of bases per site was counted for both.</p></sec><sec id="s4-8"><title>Exome-sequencing, analysis, and annotation</title><p>We sequenced 11 tumor–normal pairs, 6 primary cell cultures (PC) and their match normal DNA samples. Detailed clinical information is shown in <xref ref-type="supplementary-material" rid="SD4-data">Figure 4—source data 1</xref>. Exomes were captured using Illumina's TruSeq Exome Enrichment Kit. The TruSeq capture regions encompass 62 Mb, consisting of 94.4%, 83.9%, and 91.9% of the exonic, 5′UTR and 3′UTR regions respectively. 2 × 75 bp paired-end sequencing reactions were used for all EM tumors, while 2 × 100 bp paired-end sequencing was performed on CRC tumors and PC samples. Analysis, annotation, and validation were performed similarly as for whole-genome sequencing. On average, the coverage was 44.5× and 95.1% of bases were called in the captured regions, yielding 51,782 substitutions and 30,290 indels per sample. Raw data are available under restricted access in EGA under accession number EGAS00001000182. Details of validated somatic mutations are available in <xref ref-type="supplementary-material" rid="SD4-data">Figure 4—source data 1</xref> and <xref ref-type="supplementary-material" rid="SD5-data">Figure 4—source data 2</xref>.</p></sec><sec id="s4-9"><title>Recurrent somatic mutations</title><p>The 13 MMR-deficient whole-exomes and whole-exome data extracted from 3 MMR-deficient whole-genomes were screened for recurrent mutations. Random selection and validation of 24 indels occurring in 6 or more samples revealed a validation rate of 100%. Given the high validation rate for somatic indels per se, and the even higher rate for recurrent indels, we considered all recurrent indels as true-positives. Subsequent analyses were limited to indels recurrently affecting homopolymer regions, that is, 29,663 Illumina TruSeq-captured exonic homopolymers. Details of recurrent mutations in these homopolymers are available in <xref ref-type="supplementary-material" rid="SD6-data">Figure 4—source data 3</xref>. We also screened 5430 and 60,942 homopolymers located in the exome-captured 5′ and 3′ UTRs for recurrent indels. Details of these recurrent indels are in <xref ref-type="supplementary-material" rid="SD6-data">Figure 4—source data 3</xref>. Recurrent indels meeting the following criteria were considered for a targeted Sequenom panel assessing MSI: (i) occurring in 6 or more samples, (ii) detected in both EM and CRC exomes, (iii) the maximal length of affected homopolymer &lt;12 bp. After extensive optimization experiments, 59 markers were chosen. Detailed information about each indel is given in <xref ref-type="supplementary-material" rid="SD7-data">Figure 5—source data 1</xref>.</p></sec><sec id="s4-10"><title>MSI panel</title><p>236 EM tumors used to establish MSI thresholds were drawn from the Australian National Endometrial Cancer Study (ANECS). IHC analyses of these tumors were independently performed at the Molecular Cancer Epidemiology Laboratory in Brisbane, Australia as described (<xref ref-type="bibr" rid="bib59">Tan et al., 2013</xref>). 11 out of 236 tumors were excluded for the 59-marker panel due to their low tumor percentage (≤10%). By varying the marker threshold, we calculated the number of true-positives and false-positives identified by our MSI panel relative to the IHC data. A ROC curve was constructed based on these values. The Matthew Correlation Coefficient of the ROC curve was calculated for each threshold. Tumors were considered MSI when they had three markers positive. We did not distinguish between MSI-low and microsatellite stable (MSS), as this is currently not clinically relevant. All tumors with less than three positive markers were thus considered MSS/MSI-L. For the Bethesda panel, we defined three categories as follows: microsatellite stable (MSS, 0 out of 10 markers), low microsatellite instability (MSI-L, 1–2 out of 10 markers), and high microsatellite instability (MSI-H, 3 or more out of 10 markers). Two sets of data (114 EM tumors and 97 CRC tumors) were used for the comparison. Details of these sample sets are given in <xref ref-type="supplementary-material" rid="SD7-data">Figure 5—source data 1</xref>.</p></sec><sec id="s4-11"><title>Mutation signatures in other tumor types</title><p>The 59-marker panel was applied to ovarian tumors and leukemia. Four samples with proven MSI status were selected, including one ovarian tumor (OV) and three leukemia cell lines (DND41, CCRF-CEM, and SUPT1). The MSI-H OV tumor, two MSS OV tumors, and their matched normal samples, as well as three MSI-H leukemia cell lines and a MSS leukemia cell line (RPMI-8402) were exome-sequenced. Detailed information for all samples can be found in <xref ref-type="supplementary-material" rid="SD8-data">Figure 5—source data 2</xref>. Raw data are available in EGA under the accession number EGAS00001000182.</p></sec><sec id="s4-12"><title>Pathway analyses for recurrent mutations</title><p>Two pathway tools (IPA and GenomeMuSiC) and three pathway databases (IPA, BioCarta, and Reactome [<xref ref-type="bibr" rid="bib21">Haw et al., 2011</xref>]) were used. We first selected all genes with somatic exonic indels, and then extended our mutation calling to indels occurring 25 bp up or down-stream of each exon. Mutation calling and filtering for the later set of mutations was done as described above. In total, 1989 additional indels in exon/intron boundaries were detected. These were combined with the previously described indels in exonic regions, which—after the removal of indels in MMR genes—yielded 7546 indels in 4116 genes. As a validation, we selected 27 CRC and 65 EM tumors with MSI sequenced by The Cancer Genome Atlas (<xref ref-type="bibr" rid="bib60">TCGA, 2012</xref>; <xref ref-type="bibr" rid="bib26">Kandoth et al., 2013</xref>). We selected genes recurrently affected not only by frameshift indels but also by non-synonymous substitutions. There were 2183 and 3138 genes from the CRC and EM tumor data sets, respectively. Detailed results of pathway analyses are given in <xref ref-type="supplementary-material" rid="SD9-data">Table 2—source data 1</xref>.</p></sec><sec id="s4-13"><title>Establishment of primary tumor cell cultures</title><p>11 primary endometrial and ovarian tumor cell cultures were established from tumors of patients undergoing surgery at the Division of Gynecologic Oncology, UZ Leuven (Belgium). Tissue was washed with PBS supplemented with penicillin/streptomycin and fungizone, digested with collagenases type IV (1 mg/ml; Roche, Vilvoorde, Belgium) and DNAse I (0.1 mg/ml; Roche) in RPMI+ medium. Single cell suspensions were prepared by filtration through a 70-µm filter. Red blood cells were lysed using ammonium chloride (Stem Cell Technologies, Grenoble, France). Single cells were plated into a 25-cm (<xref ref-type="bibr" rid="bib44">Parsons et al., 2012</xref>) culture flask. After 1–3 weeks, when cells reached 60–70% confluency, fibroblasts were removed using mouse anti-human CD90 (Clone AS02; Dianova, Hamburg, Germany) bound to Mouse Pan IgG Dynabeads (Life Technologies, Erembodegem, Belgium). Cell cultures were subsequently passaged at 70–90% confluency and banked at −80°C. Primary tumor cell cultures were grown in RPMI Medium 1640 supplemented with 20% fetal bovine serum (FBS), 2 mM L-Glutamine, 100U/ml penicillin, 100 μg/ml streptomycin, 1 μg/ml fungizone, and 10 μg/ml gentamicin (Life Technologies) up to 25 passages.</p></sec><sec id="s4-14"><title>Immunofluorescent double staining for γH2AX and RAD51</title><p>Cells were seeded in 8-well Lab-tek Permanox Chamber slides (Nunc, Zellik, Belgium), treated for 24 hr, fixed in 4% paraformaldehyde for 15 min at room temperature, and ice-cold methanol for 5 min. Primary antibodies recognizing γH2AX (JBW301, Millipore, Overijse, Belgium) and RAD51 (PC130, Merck, Darmstadt, Germany) followed by secondary antibodies conjugated to Alexa Fluor 647 and 488 (Life Technologies) were used. Images were acquired using an A1R Eclipse Ti inverted confocal microscope (Nikon, Brussels, Belgium) and processed using Fiji software, with compound or vehicle-treated cells being processed identically. Nuclei with &gt;5 foci were scored as positive, and at least 200 nuclei were counted per condition by two independent individuals, blinded to the genotypes.</p></sec><sec id="s4-15"><title>Cell cycle analysis with BrdU and propidium iodide</title><p>Cells were treated for 24 hr with 26 µM olaparib, 0.3 µM mitomycin C, 0.03 µM camptothecin or carrier, and incubated for 90 min with BrdU (10 µM) before harvesting. Cells were resuspended in ice-cold PBS and ice-cold ethanol was slowly added to 70%. Cells were fixed for 5 min at room temperature, treated with 2 M HCl for 30 min and stained with FITC-conjugated anti-BrdU antibody (BD). Cells were washed, resuspended in PI/RNase staining buffer (BD), and analyzed on a BD Biosciences FACSVerse flow cytometer. Cell cycle distributions were modeled using FlowJo software, and the fraction of cells in S-phase, G2/M and G1 determined as described by <xref ref-type="bibr" rid="bib65">Watson et al. (1987)</xref>.</p></sec><sec id="s4-16"><title>Cytotoxicity screening</title><p>5,000 cells/well were seeded in 96-well plates. After 24 hr, cells were treated in quadruplicate, incubated for 48 hr at 37°C and analyzed using the In Vitro Toxicology Assay Kit, Sulforhodamine B-based (Sigma, Diegem, Belgium) as per the manufacturer's instructions. Growth inhibition was calculated as described (<xref ref-type="bibr" rid="bib63">Vichai and Kirtikara, 2006</xref>).</p></sec><sec id="s4-17"><title>siRNA knockdown</title><p>siRNA ON-TARGETplus SMART pools (Thermo) were diluted in Optimem I reduced serum medium using Lipofectamine RNAiMAX (Life technologies) to reverse transfect MCF7 cells For cytotoxicity screening, transfections were in 96-well format and medium was changed 14 hr after transfection. Cells were treated with olaparib (26 μM) and after 48 hr processed for cytotoxicity screening. Simultaneously, siRNA transfections in 12-well plates were done to quantify knockdown.</p></sec><sec id="s4-18"><title>Gene expression</title><p>Total RNA was extracted using the RNeasy Mini kit (Qiagen, Venlo, The Netherlands) and reverse transcribed using the SuperScript III reverse transcription system (Life technologies). Quantitative RT-PCR (qRT-PCR) with <italic>ACTB</italic> an internal control was performed using TaqMan gene expression assay probes and 5 μl TaqMan Fast Universal PCR master mix (Life technologies). Reactions were amplified in a Roche LightCycler 480 using the following cycles: 50°C (2 min), 95°C (30 s), and 40 cycles of 95°C (3 s), 60°C (30 s).</p></sec><sec id="s4-19"><title>Antibodies, compounds, and other reagents</title><p>Mouse anti-phospho-Histone H2A.X (Ser139) monoclonal antibody (clone JBW301) was from Millipore Corporation, Billerica, MA, USA. Rabbit anti-Rad51 (PC130) polyclonal antibody was from Calbiochem/Merck, Darmstadt, Germany. Rabbit anti-ACTB (#4967) polyclonal antibody was from Cell Signalling, Danvers, MA, USA. FITC-conjugated anti-BrdU antibody (347583) was from Becton–Dickinson, San Jose, CA, USA. Alexa Fluor 647 goat anti-mouse IgG (A-21235) and Alexa Fluor 488 goat anti-rabbit IgG (A-11034) were from Life technologies, Carlsbad, CA, USA. Olaparib (AZD-2281, batch JSAR104) was purchased from JS Research Chemicals Trading, Schleswig Holstein, Germany. Cis-platinum (II) diammine dichloride (P4394), paclitaxel (T7402), mitomycin C (M4287), (S)-(+)-camptothecin (C9911) and carmustine (C0400) were purchased from Sigma-Aldrich, St. Louis, MO, USA, and prepared and stored according to the manufacturer's recommendations. siRNA ON-TARGETplus SMART pools were purchased from Thermo Scientific Dharmacon, Chicago, IL, USA: Non-targeting (D-001810-10-05); ATM (L-003201-00-0005); ATR (L-003202-00-0005); BRCA1 (L-003461-00-0005); and BRCA2 (L-003462-00-0005). TaqMan gene expression assays (Life technologies, Carlsbad, CA, USA) used in this study were as follows ATM: Hs01112355_g1; ATR: Hs00992123_m1; BRCA1: Hs01556193_m1; BRCA2: Hs00609073_m1; ACTB: Hs99999903_m1. Normal goat serum (005-000-121) was from Jackson Immunoresearch Labs, West Grove, PA USA.</p></sec></sec></body><back><ack id="ack"><title>Acknowlegements</title><p>We greatly appreciate the assistance of Mark Veugelers and Stéphane Plaisance of the VIB Technology Watch team. We acknowledge the contributions of Gilian Peuteman and Thomas Van Brussel for Sequenom validation experiments. We thank Penelope Webb, Daniel Buchanan, Kaltin Ferguson, Mike Walsh, Joanne Young, as well as ANECS collaborators, ANECS staff and participating Institutions (<ext-link ext-link-type="uri" xlink:href="http://www.anecs.org.au/html">http://www.anecs.org.au/html</ext-link>) for their roles in ANECS study setup and/or characterization of ANECS endometrial tumors. We are grateful to the Verelst Fund and Reliable Cancer Therapies. The research was funded by grants from the Fund for Scientific Research Flanders (FWO-F), the ‘Stichting tegen Kanker’ and the KULeuven (KUL PFV/10/016 SymBioSys). ANECS patient recruitment, data collection, biospecimen collection, and IHC analysis was supported by funding from the National Health and Medical Research Council (NHMRC) of Australia (Grant ID#339435); The Cancer Council Queensland (ID#4196615); and Cancer Council Tasmania (IDs#403031, #457636), and Cancer Australia (ID1010859). HZ, BT, LC, and JR hold a FWO postdoctoral fellowship, BTY and MM hold a FWO PhD fellowship. AS is supported by an NHMRC Senior Research Fellowship.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>DL, an inventor on a patent application regarding the use of recurrent indels to detect MSI. The VIB is owner of this patent application, and the said patent application has been licensed to an outside company. Neither VIB nor any of the authors have equity stakes in the company. However, VIB stands to eventually receive royalties.</p></fn><fn fn-type="conflict" id="conf2"><p>The other authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>HZ, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con2"><p>DL, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con3"><p>BT, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con4"><p>JR, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con5"><p>BTY, Conception and design, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con6"><p>MM, Conception and design, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con7"><p>XS, Conception and design, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con8"><p>AS, Conception and design, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con9"><p>LC, Conception and design, Acquisition of data</p></fn><fn fn-type="con" id="con10"><p>GM, Conception and design, Acquisition of data</p></fn><fn fn-type="con" id="con11"><p>JC, Conception and design, Acquisition of data</p></fn><fn fn-type="con" id="con12"><p>SS, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con13"><p>DS, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con14"><p>SA, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con15"><p>AM, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con16"><p>FA, Conception and design, Acquisition of data, Analysis and interpretation of data, Contributed unpublished essential data or reagents</p></fn></fn-group><fn-group content-type="ethics-information"><title>Ethics</title><fn fn-type="other"><p>Human subjects: Informed consent and consent to publish was obtained from all patients. Ethical approval was obtained at the ethical committee of University Hospital Gasthuisberg of Leuven with identifier ML2266.</p></fn></fn-group></sec><sec sec-type="supplementary-material"><title>Additional files</title><supplementary-material id="SD10-data"><object-id pub-id-type="doi">10.7554/eLife.02725.039</object-id><label>Supplementary file 1.</label><caption><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.02725.039">http://dx.doi.org/10.7554/eLife.02725.039</ext-link></p></caption><media mime-subtype="pdf" mimetype="application" xlink:href="elife02725s010.pdf"/></supplementary-material><sec sec-type="datasets"><title>Major datasets</title><p>The following datasets were generated:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro1"><name><surname>Zhao</surname><given-names>Hui</given-names></name>, <year>2013</year><x>, </x><source>Signatures of mismatch repair deficiency in cancer genomes</source><x>, </x><ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ega/studies/EGAS00001000182">https://www.ebi.ac.uk/ega/studies/EGAS00001000182</ext-link><x>, </x><comment>Access to datasets must be approved by the specified Data Access Committee (DAC).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro2"><name><surname>Zhao</surname><given-names>Hui</given-names></name>, <year>2012</year><x>, </x><source>Complete Genomics paired end sequencing; Ovarian cancer</source><x>, </x><ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ega/studies/EGAS00001000158">https://www.ebi.ac.uk/ega/studies/EGAS00001000158</ext-link><x>, </x><comment>Access to datasets must be approved by the specified Data Access Committee (DAC).</comment></related-object></p><p>The following previously published dataset was used:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro3"><collab>The Cancer Genome Atlas Network</collab>, <year>2013</year><x>, </x><source>Data from: Comprehensive molecular characterization of human colon and rectal cancer</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature11252">http://dx.doi.org/10.1038/nature11252</ext-link><x>, </x><comment>Available for download as a supplementary file.</comment></related-object></p></sec></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><collab>1000 Genomes Project Consortium</collab></person-group><year>2012</year><article-title>An integrated map of genetic variation from 1,092 human genomes</article-title><source>Nature</source><volume>491</volume><fpage>56</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1038/nature11632</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Albers</surname><given-names>CA</given-names></name><name><surname>Lunter</surname><given-names>G</given-names></name><name><surname>MacArthur</surname><given-names>DG</given-names></name><name><surname>McVean</surname><given-names>G</given-names></name><name><surname>Ouwehand</surname><given-names>WH</given-names></name><name><surname>Durbin</surname><given-names>R</given-names></name></person-group><year>2011</year><article-title>Dindel: accurate indel calls from short-read data</article-title><source>Genome Research</source><volume>21</volume><fpage>961</fpage><lpage>973</lpage><pub-id pub-id-type="doi">10.1101/gr.112326.110</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Boland</surname><given-names>CR</given-names></name><name><surname>Thibodeau</surname><given-names>SN</given-names></name><name><surname>Hamilton</surname><given-names>SR</given-names></name><name><surname>Sidransky</surname><given-names>D</given-names></name><name><surname>Eshleman</surname><given-names>JR</given-names></name><name><surname>Burt</surname><given-names>RW</given-names></name><name><surname>Meltzer</surname><given-names>SJ</given-names></name><name><surname>Rodriguez-Bigas</surname><given-names>MA</given-names></name><name><surname>Fodde</surname><given-names>R</given-names></name><name><surname>Ranzani</surname><given-names>GN</given-names></name><name><surname>Srivastava</surname><given-names>S</given-names></name></person-group><year>1998</year><article-title>A National Cancer Institute Workshop on Microsatellite Instability for cancer detection and familial predisposition: development of international criteria for the determination of microsatellite instability in colorectal cancer</article-title><source>Cancer Research</source><volume>58</volume><fpage>5248</fpage><lpage>5257</lpage></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bunting</surname><given-names>SF</given-names></name><name><surname>Callén</surname><given-names>E</given-names></name><name><surname>Kozak</surname><given-names>ML</given-names></name><name><surname>Kim</surname><given-names>JM</given-names></name><name><surname>Wong</surname><given-names>N</given-names></name><name><surname>López-Contreras</surname><given-names>AJ</given-names></name><name><surname>Ludwig</surname><given-names>T</given-names></name><name><surname>Baer</surname><given-names>R</given-names></name><name><surname>Faryabi</surname><given-names>RB</given-names></name><name><surname>Malhowski</surname><given-names>A</given-names></name><name><surname>Chen</surname><given-names>HT</given-names></name><name><surname>Fernandez-Capetillo</surname><given-names>O</given-names></name><name><surname>D'Andrea</surname><given-names>A</given-names></name><name><surname>Nussenzweig</surname><given-names>A</given-names></name></person-group><year>2012</year><article-title>BRCA1 functions independently of homologous recombination in DNA interstrand crosslink repair</article-title><source>Molecular Cell</source><volume>46</volume><fpage>125</fpage><lpage>135</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2012.02.015</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Campbell</surname><given-names>CD</given-names></name><name><surname>Chong</surname><given-names>JX</given-names></name><name><surname>Malig</surname><given-names>M</given-names></name><name><surname>Ko</surname><given-names>A</given-names></name><name><surname>Dumont</surname><given-names>BL</given-names></name><name><surname>Han</surname><given-names>L</given-names></name><name><surname>Vives</surname><given-names>L</given-names></name><name><surname>O'Roak</surname><given-names>BJ</given-names></name><name><surname>Sudmant</surname><given-names>PH</given-names></name><name><surname>Shendure</surname><given-names>J</given-names></name><name><surname>Abney</surname><given-names>M</given-names></name><name><surname>Ober</surname><given-names>C</given-names></name><name><surname>Eichler</surname><given-names>EE</given-names></name></person-group><year>2012</year><article-title>Estimating the human mutation rate using autozygosity in a founder population</article-title><source>Nature Genetics</source><volume>44</volume><fpage>1277</fpage><lpage>1281</lpage><pub-id pub-id-type="doi">10.1038/ng.2418</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>CL</given-names></name><name><surname>Rappailles</surname><given-names>A</given-names></name><name><surname>Duquenne</surname><given-names>L</given-names></name><name><surname>Huvet</surname><given-names>M</given-names></name><name><surname>Guilbaud</surname><given-names>G</given-names></name><name><surname>Farinelli</surname><given-names>L</given-names></name><name><surname>Audit</surname><given-names>B</given-names></name><name><surname>d'Aubenton-Carafa</surname><given-names>Y</given-names></name><name><surname>Arneodo</surname><given-names>A</given-names></name><name><surname>Hyrien</surname><given-names>O</given-names></name><name><surname>Thermes</surname><given-names>C</given-names></name></person-group><year>2010</year><article-title>Impact of replication timing on non-CpG and CpG substitution rates in mammalian genomes</article-title><source>Genome Research</source><volume>20</volume><fpage>447</fpage><lpage>457</lpage><pub-id pub-id-type="doi">10.1101/gr.098947.109</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>J</given-names></name><name><surname>Miller</surname><given-names>BF</given-names></name><name><surname>Furano</surname><given-names>AV</given-names></name></person-group><year>2014</year><article-title>Repair of naturally occurring mismatches can induce mutations in flanking DNA</article-title><source>eLife</source><volume>3</volume><fpage>e02001</fpage><pub-id pub-id-type="doi">10.7554/eLife.02001</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Deng</surname><given-names>G</given-names></name><name><surname>Bell</surname><given-names>I</given-names></name><name><surname>Crawley</surname><given-names>S</given-names></name><name><surname>Gum</surname><given-names>J</given-names></name><name><surname>Terdiman</surname><given-names>JP</given-names></name><name><surname>Allen</surname><given-names>BA</given-names></name><name><surname>Truta</surname><given-names>B</given-names></name><name><surname>Sleisenger</surname><given-names>MH</given-names></name><name><surname>Kim</surname><given-names>YS</given-names></name></person-group><year>2004</year><article-title>BRAF mutation is frequently present in sporadic colorectal cancer with methylated hMLH1, but not in hereditary nonpolyposis colorectal cancer</article-title><source>Clinical Cancer Research</source><volume>10</volume><fpage>191</fpage><lpage>195</lpage><pub-id pub-id-type="doi">10.1158/1078-0432.CCR-1118-3</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Denver</surname><given-names>DR</given-names></name><name><surname>Feinberg</surname><given-names>S</given-names></name><name><surname>Estes</surname><given-names>S</given-names></name><name><surname>Thomas</surname><given-names>WK</given-names></name><name><surname>Lynch</surname><given-names>M</given-names></name></person-group><year>2005</year><article-title>Mutation rates, spectra and hotspots in mismatch repair-deficient <italic>Caenorhabditis elegans</italic></article-title><source>Genetics</source><volume>170</volume><fpage>107</fpage><lpage>113</lpage><pub-id pub-id-type="doi">10.1534/genetics.104.038521</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dietmaier</surname><given-names>W</given-names></name><name><surname>Wallinger</surname><given-names>S</given-names></name><name><surname>Bocker</surname><given-names>T</given-names></name><name><surname>Kullmann</surname><given-names>F</given-names></name><name><surname>Fishel</surname><given-names>R</given-names></name><name><surname>Rüschoff</surname><given-names>J</given-names></name></person-group><year>1997</year><article-title>Diagnostic microsatellite instability: definition and correlation with mismatch repair protein expression</article-title><source>Cancer Research</source><volume>57</volume><fpage>4749</fpage><lpage>4756</lpage></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Donehower</surname><given-names>LA</given-names></name><name><surname>Creighton</surname><given-names>CJ</given-names></name><name><surname>Schultz</surname><given-names>N</given-names></name><name><surname>Shinbrot</surname><given-names>E</given-names></name><name><surname>Chang</surname><given-names>K</given-names></name><name><surname>Gunaratne</surname><given-names>PH</given-names></name><name><surname>Muzny</surname><given-names>D</given-names></name><name><surname>Sander</surname><given-names>C</given-names></name><name><surname>Hamilton</surname><given-names>SR</given-names></name><name><surname>Gibbs</surname><given-names>RA</given-names></name><name><surname>Wheeler</surname><given-names>D</given-names></name></person-group><year>2013</year><article-title>MLH1-silenced and non-silenced subgroups of hypermutated colorectal carcinomas have distinct mutational landscapes</article-title><source>The Journal of Pathology</source><volume>229</volume><fpage>99</fpage><lpage>110</lpage><pub-id pub-id-type="doi">10.1002/path.4087</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dorard</surname><given-names>C</given-names></name><name><surname>de Thonel</surname><given-names>A</given-names></name><name><surname>Collura</surname><given-names>A</given-names></name><name><surname>Marisa</surname><given-names>L</given-names></name><name><surname>Svrcek</surname><given-names>M</given-names></name><name><surname>Lagrange</surname><given-names>A</given-names></name><name><surname>Jego</surname><given-names>G</given-names></name><name><surname>Wanherdrick</surname><given-names>K</given-names></name><name><surname>Joly</surname><given-names>AL</given-names></name><name><surname>Buhard</surname><given-names>O</given-names></name><name><surname>Gobbo</surname><given-names>J</given-names></name><name><surname>Penard-Lacronique</surname><given-names>V</given-names></name><name><surname>Zouali</surname><given-names>H</given-names></name><name><surname>Tubacher</surname><given-names>E</given-names></name><name><surname>Kirzin</surname><given-names>S</given-names></name><name><surname>Selves</surname><given-names>J</given-names></name><name><surname>Milano</surname><given-names>G</given-names></name><name><surname>Etienne-Grimaldi</surname><given-names>MC</given-names></name><name><surname>Bengrine-Lefèvre</surname><given-names>L</given-names></name><name><surname>Louvet</surname><given-names>C</given-names></name><name><surname>Tournigand</surname><given-names>C</given-names></name><name><surname>Lefèvre</surname><given-names>JH</given-names></name><name><surname>Parc</surname><given-names>Y</given-names></name><name><surname>Tiret</surname><given-names>E</given-names></name><name><surname>Fléjou</surname><given-names>JF</given-names></name><name><surname>Gaub</surname><given-names>MP</given-names></name><name><surname>Garrido</surname><given-names>C</given-names></name><name><surname>Duval</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>Expression of a mutant HSP110 sensitizes colorectal cancer cells to chemotherapy and improves disease prognosis</article-title><source>Nature Medicine</source><volume>17</volume><fpage>1283</fpage><lpage>1289</lpage><pub-id pub-id-type="doi">10.1038/nm.2457</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Drmanac</surname><given-names>R</given-names></name><name><surname>Sparks</surname><given-names>AB</given-names></name><name><surname>Callow</surname><given-names>MJ</given-names></name><name><surname>Halpern</surname><given-names>AL</given-names></name><name><surname>Burns</surname><given-names>NL</given-names></name><name><surname>Kermani</surname><given-names>BG</given-names></name><name><surname>Carnevali</surname><given-names>P</given-names></name><name><surname>Nazarenko</surname><given-names>I</given-names></name><name><surname>Nilsen</surname><given-names>GB</given-names></name><name><surname>Yeung</surname><given-names>G</given-names></name><name><surname>Dahl</surname><given-names>F</given-names></name><name><surname>Fernandez</surname><given-names>A</given-names></name><name><surname>Staker</surname><given-names>B</given-names></name><name><surname>Pant</surname><given-names>KP</given-names></name><name><surname>Baccash</surname><given-names>J</given-names></name><name><surname>Borcherding</surname><given-names>AP</given-names></name><name><surname>Brownley</surname><given-names>A</given-names></name><name><surname>Cedeno</surname><given-names>R</given-names></name><name><surname>Chen</surname><given-names>L</given-names></name><name><surname>Chernikoff</surname><given-names>D</given-names></name><name><surname>Cheung</surname><given-names>A</given-names></name><name><surname>Chirita</surname><given-names>R</given-names></name><name><surname>Curson</surname><given-names>B</given-names></name><name><surname>Ebert</surname><given-names>JC</given-names></name><name><surname>Hacker</surname><given-names>CR</given-names></name><name><surname>Hartlage</surname><given-names>R</given-names></name><name><surname>Hauser</surname><given-names>B</given-names></name><name><surname>Huang</surname><given-names>S</given-names></name><name><surname>Jiang</surname><given-names>Y</given-names></name><name><surname>Karpinchyk</surname><given-names>V</given-names></name><name><surname>Koenig</surname><given-names>M</given-names></name><name><surname>Kong</surname><given-names>C</given-names></name><name><surname>Landers</surname><given-names>T</given-names></name><name><surname>Le</surname><given-names>C</given-names></name><name><surname>Liu</surname><given-names>J</given-names></name><name><surname>McBride</surname><given-names>CE</given-names></name><name><surname>Morenzoni</surname><given-names>M</given-names></name><name><surname>Morey</surname><given-names>RE</given-names></name><name><surname>Mutch</surname><given-names>K</given-names></name><name><surname>Perazich</surname><given-names>H</given-names></name><name><surname>Perry</surname><given-names>K</given-names></name><name><surname>Peters</surname><given-names>BA</given-names></name><name><surname>Peterson</surname><given-names>J</given-names></name><name><surname>Pethiyagoda</surname><given-names>CL</given-names></name><name><surname>Pothuraju</surname><given-names>K</given-names></name><name><surname>Richter</surname><given-names>C</given-names></name><name><surname>Rosenbaum</surname><given-names>AM</given-names></name><name><surname>Roy</surname><given-names>S</given-names></name><name><surname>Shafto</surname><given-names>J</given-names></name><name><surname>Sharanhovich</surname><given-names>U</given-names></name><name><surname>Shannon</surname><given-names>KW</given-names></name><name><surname>Sheppy</surname><given-names>CG</given-names></name><name><surname>Sun</surname><given-names>M</given-names></name><name><surname>Thakuria</surname><given-names>JV</given-names></name><name><surname>Tran</surname><given-names>A</given-names></name><name><surname>Vu</surname><given-names>D</given-names></name><name><surname>Zaranek</surname><given-names>AW</given-names></name><name><surname>Wu</surname><given-names>X</given-names></name><name><surname>Drmanac</surname><given-names>S</given-names></name><name><surname>Oliphant</surname><given-names>AR</given-names></name><name><surname>Banyai</surname><given-names>WC</given-names></name><name><surname>Martin</surname><given-names>B</given-names></name><name><surname>Ballinger</surname><given-names>DG</given-names></name><name><surname>Church</surname><given-names>GM</given-names></name><name><surname>Reid C</surname><given-names>A</given-names></name></person-group><year>2010</year><article-title>Human genome sequencing using unchained base reads on self-assembling DNA nanoarrays</article-title><source>Science</source><volume>327</volume><fpage>78</fpage><lpage>81</lpage><pub-id pub-id-type="doi">10.1126/science.1181498</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ellegren</surname><given-names>H</given-names></name></person-group><year>2004</year><article-title>Microsatellites: simple sequences with complex evolution</article-title><source>Nature Reviews Genetics</source><volume>5</volume><fpage>435</fpage><lpage>445</lpage><pub-id pub-id-type="doi">10.1038/nrg1348</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Farmer</surname><given-names>H</given-names></name><name><surname>McCabe</surname><given-names>N</given-names></name><name><surname>Lord</surname><given-names>CJ</given-names></name><name><surname>Tutt</surname><given-names>AN</given-names></name><name><surname>Johnson</surname><given-names>DA</given-names></name><name><surname>Richardson</surname><given-names>TB</given-names></name><name><surname>Santarosa</surname><given-names>M</given-names></name><name><surname>Dillon</surname><given-names>KJ</given-names></name><name><surname>Hickson</surname><given-names>I</given-names></name><name><surname>Knights</surname><given-names>C</given-names></name><name><surname>Martin</surname><given-names>NM</given-names></name><name><surname>Jackson</surname><given-names>SP</given-names></name><name><surname>Smith</surname><given-names>GC</given-names></name><name><surname>Ashworth</surname><given-names>A</given-names></name></person-group><year>2005</year><article-title>Targeting the DNA repair defect in BRCA mutant cells as a therapeutic strategy</article-title><source>Nature</source><volume>434</volume><fpage>917</fpage><lpage>921</lpage><pub-id pub-id-type="doi">10.1038/nature03445</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fischer</surname><given-names>F</given-names></name><name><surname>Baerenfaller</surname><given-names>K</given-names></name><name><surname>Jiricny</surname><given-names>J</given-names></name></person-group><year>2007</year><article-title>5-Fluorouracil is efficiently removed from DNA by the base excision and mismatch repair systems</article-title><source>Gastroenterology</source><volume>133</volume><fpage>1858</fpage><lpage>1868</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2007.09.003</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ham</surname><given-names>MF</given-names></name><name><surname>Takakuwa</surname><given-names>T</given-names></name><name><surname>Luo</surname><given-names>WJ</given-names></name><name><surname>Liu</surname><given-names>A</given-names></name><name><surname>Horii</surname><given-names>A</given-names></name><name><surname>Aozasa</surname><given-names>K</given-names></name></person-group><year>2006</year><article-title>Impairment of double-strand breaks repair and aberrant splicing of ATM and MRE11 in leukemia-lymphoma cell lines with microsatellite instability</article-title><source>Cancer Science</source><volume>97</volume><fpage>226</fpage><lpage>234</lpage><pub-id pub-id-type="doi">10.1111/j.1349-7006.2006.00165.x</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hampel</surname><given-names>H</given-names></name><name><surname>Frankel</surname><given-names>WL</given-names></name><name><surname>Martin</surname><given-names>E</given-names></name><name><surname>Arnold</surname><given-names>M</given-names></name><name><surname>Khanduja</surname><given-names>K</given-names></name><name><surname>Kuebler</surname><given-names>P</given-names></name><name><surname>Clendenning</surname><given-names>M</given-names></name><name><surname>Sotamaa</surname><given-names>K</given-names></name><name><surname>Prior</surname><given-names>T</given-names></name><name><surname>Westman</surname><given-names>JA</given-names></name><name><surname>Panescu</surname><given-names>J</given-names></name><name><surname>Fix</surname><given-names>D</given-names></name><name><surname>Lockman</surname><given-names>J</given-names></name><name><surname>LaJeunesse</surname><given-names>J</given-names></name><name><surname>Comeras</surname><given-names>I</given-names></name><name><surname>de la Chapelle</surname><given-names>A</given-names></name></person-group><year>2008</year><article-title>Feasibility of screening for Lynch syndrome among patients with colorectal cancer</article-title><source>Journal of Clinical Oncology</source><volume>26</volume><fpage>5783</fpage><lpage>5788</lpage><pub-id pub-id-type="doi">10.1200/JCO.2008.17.5950</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hampel</surname><given-names>H</given-names></name><name><surname>Frankel</surname><given-names>WL</given-names></name><name><surname>Martin</surname><given-names>E</given-names></name><name><surname>Arnold</surname><given-names>M</given-names></name><name><surname>Khanduja</surname><given-names>K</given-names></name><name><surname>Kuebler</surname><given-names>P</given-names></name><name><surname>Nakagawa</surname><given-names>H</given-names></name><name><surname>Sotamaa</surname><given-names>K</given-names></name><name><surname>Prior</surname><given-names>TW</given-names></name><name><surname>Westman</surname><given-names>J</given-names></name><name><surname>Panescu</surname><given-names>J</given-names></name><name><surname>Fix</surname><given-names>D</given-names></name><name><surname>Lockman</surname><given-names>J</given-names></name><name><surname>Comeras</surname><given-names>I</given-names></name><name><surname>de la Chapelle</surname><given-names>A</given-names></name></person-group><year>2005</year><article-title>Screening for the Lynch syndrome (Hereditary nonpolyposis colorectal cancer)</article-title><source>The New England Journal of Medicine</source><volume>352</volume><fpage>1851</fpage><lpage>1860</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa043146</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hampel</surname><given-names>H</given-names></name><name><surname>Frankel</surname><given-names>W</given-names></name><name><surname>Panescu</surname><given-names>J</given-names></name><name><surname>Lockman</surname><given-names>J</given-names></name><name><surname>Sotamaa</surname><given-names>K</given-names></name><name><surname>Fix</surname><given-names>D</given-names></name><name><surname>Comeras</surname><given-names>I</given-names></name><name><surname>La Jeunesse</surname><given-names>J</given-names></name><name><surname>Nakagawa</surname><given-names>H</given-names></name><name><surname>Westman</surname><given-names>JA</given-names></name><name><surname>Prior</surname><given-names>TW</given-names></name><name><surname>Clendenning</surname><given-names>M</given-names></name><name><surname>Penzone</surname><given-names>P</given-names></name><name><surname>Lombardi</surname><given-names>J</given-names></name><name><surname>Dunn</surname><given-names>P</given-names></name><name><surname>Cohn</surname><given-names>DE</given-names></name><name><surname>Copeland</surname><given-names>L</given-names></name><name><surname>Eaton</surname><given-names>L</given-names></name><name><surname>Fowler</surname><given-names>J</given-names></name><name><surname>Lewandowski</surname><given-names>G</given-names></name><name><surname>Vaccarello</surname><given-names>L</given-names></name><name><surname>Bell</surname><given-names>J</given-names></name><name><surname>Reid</surname><given-names>G</given-names></name><name><surname>de la Chapelle</surname><given-names>A</given-names></name></person-group><year>2006</year><article-title>Screening for Lynch syndrome (hereditary nonpolyposis colorectal cancer) among endometrial cancer patients</article-title><source>Cancer Research</source><volume>66</volume><fpage>7810</fpage><lpage>7817</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-06-1114</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haw</surname><given-names>RA</given-names></name><name><surname>Croft</surname><given-names>D</given-names></name><name><surname>Yung</surname><given-names>CK</given-names></name><name><surname>Ndegwa</surname><given-names>N</given-names></name><name><surname>D'Eustachio</surname><given-names>P</given-names></name><name><surname>Hermjakob</surname><given-names>H</given-names></name><name><surname>Stein</surname><given-names>LD</given-names></name></person-group><year>2011</year><article-title>The reactome BioMart</article-title><source>Database</source><volume>2011</volume><fpage>bar031</fpage><pub-id pub-id-type="doi">10.1093/database/bar031</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hewish</surname><given-names>M</given-names></name><name><surname>Lord</surname><given-names>CJ</given-names></name><name><surname>Martin</surname><given-names>SA</given-names></name><name><surname>Cunningham</surname><given-names>D</given-names></name><name><surname>Ashworth</surname><given-names>A</given-names></name></person-group><year>2010</year><article-title>Mismatch repair deficient colorectal cancer in the era of personalized treatment</article-title><source>Nature Reviews Clinical Oncology</source><volume>7</volume><fpage>197</fpage><lpage>208</lpage><pub-id pub-id-type="doi">10.1038/nrclinonc.2010.18</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hodgkinson</surname><given-names>A</given-names></name><name><surname>Eyre-Walker</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>Variation in the mutation rate across mammalian genomes</article-title><source>Nature Reviews Genetics</source><volume>12</volume><fpage>756</fpage><lpage>766</lpage><pub-id pub-id-type="doi">10.1038/nrg3098</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hombauer</surname><given-names>H</given-names></name><name><surname>Campbell</surname><given-names>CS</given-names></name><name><surname>Smith</surname><given-names>CE</given-names></name><name><surname>Desai</surname><given-names>A</given-names></name><name><surname>Kolodner</surname><given-names>RD</given-names></name></person-group><year>2011</year><article-title>Visualization of eukaryotic DNA mismatch repair reveals distinct recognition and repair intermediates</article-title><source>Cell</source><volume>147</volume><fpage>1040</fpage><lpage>1053</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2011.10.025</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jiricny</surname><given-names>J</given-names></name></person-group><year>2006</year><article-title>The multifaceted mismatch-repair system</article-title><source>Nature Reviews Molecular Cell Biology</source><volume>7</volume><fpage>335</fpage><lpage>346</lpage><pub-id pub-id-type="doi">10.1038/nrm1907</pub-id></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kandoth</surname><given-names>C</given-names></name>, <collab>Cancer Genome Atlas Research Network</collab><name><surname>Schultz</surname><given-names>N</given-names></name><name><surname>Cherniack</surname><given-names>AD</given-names></name><name><surname>Akbani</surname><given-names>R</given-names></name><name><surname>Liu</surname><given-names>Y</given-names></name><name><surname>Shen</surname><given-names>H</given-names></name><name><surname>Robertson</surname><given-names>AG</given-names></name><name><surname>Pashtan</surname><given-names>I</given-names></name><name><surname>Shen</surname><given-names>R</given-names></name><name><surname>Benz</surname><given-names>CC</given-names></name><name><surname>Yau</surname><given-names>C</given-names></name><name><surname>Laird</surname><given-names>PW</given-names></name><name><surname>Ding</surname><given-names>L</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Mills</surname><given-names>GB</given-names></name><name><surname>Kucherlapati</surname><given-names>R</given-names></name><name><surname>Mardis</surname><given-names>ER</given-names></name><name><surname>Levine</surname><given-names>DA</given-names></name></person-group><year>2013</year><article-title>Integrated genomic characterization of endometrial carcinoma</article-title><source>Nature</source><volume>497</volume><fpage>67</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1038/nature12113</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>TM</given-names></name><name><surname>Laird</surname><given-names>PW</given-names></name><name><surname>Park</surname><given-names>PJ</given-names></name></person-group><year>2013</year><article-title>The landscape of microsatellite instability in colorectal and endometrial cancer genomes</article-title><source>Cell</source><volume>155</volume><fpage>858</fpage><lpage>868</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2013.10.015</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname><given-names>A</given-names></name><name><surname>Frigge</surname><given-names>ML</given-names></name><name><surname>Masson</surname><given-names>G</given-names></name><name><surname>Besenbacher</surname><given-names>S</given-names></name><name><surname>Sulem</surname><given-names>P</given-names></name><name><surname>Magnusson</surname><given-names>G</given-names></name><name><surname>Gudjonsson</surname><given-names>SA</given-names></name><name><surname>Sigurdsson</surname><given-names>A</given-names></name><name><surname>Jonasdottir</surname><given-names>A</given-names></name><name><surname>Jonasdottir</surname><given-names>A</given-names></name><name><surname>Wong</surname><given-names>WS</given-names></name><name><surname>Sigurdsson</surname><given-names>G</given-names></name><name><surname>Walters</surname><given-names>GB</given-names></name><name><surname>Steinberg</surname><given-names>S</given-names></name><name><surname>Helgason</surname><given-names>H</given-names></name><name><surname>Thorleifsson</surname><given-names>G</given-names></name><name><surname>Gudbjartsson</surname><given-names>DF</given-names></name><name><surname>Helgason</surname><given-names>A</given-names></name><name><surname>Magnusson</surname><given-names>OT</given-names></name><name><surname>Thorsteinsdottir</surname><given-names>U</given-names></name><name><surname>Stefansson</surname><given-names>K</given-names></name></person-group><year>2012</year><article-title>Rate of de novo mutations and the importance of father's age to disease risk</article-title><source>Nature</source><volume>488</volume><fpage>471</fpage><lpage>475</lpage><pub-id pub-id-type="doi">10.1038/nature11396</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koren</surname><given-names>A</given-names></name><name><surname>Polak</surname><given-names>P</given-names></name><name><surname>Nemesh</surname><given-names>J</given-names></name><name><surname>Michaelson</surname><given-names>JJ</given-names></name><name><surname>Sebat</surname><given-names>J</given-names></name><name><surname>Sunyaev</surname><given-names>SR</given-names></name><name><surname>McCarroll</surname><given-names>SA</given-names></name></person-group><year>2012</year><article-title>Differential relationship of DNA replication timing to different forms of human mutation and variation</article-title><source>American Journal of Human Genetics</source><volume>91</volume><fpage>1033</fpage><lpage>1040</lpage><pub-id pub-id-type="doi">10.1016/j.ajhg.2012.10.018</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kunkel</surname><given-names>TA</given-names></name><name><surname>Soni</surname><given-names>A</given-names></name></person-group><year>1988</year><article-title>Mutagenesis by transient misalignment</article-title><source>The Journal of Biological Chemistry</source><volume>263</volume><fpage>14784</fpage><lpage>14789</lpage></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H</given-names></name><name><surname>Durbin</surname><given-names>R</given-names></name></person-group><year>2010</year><article-title>Fast and accurate long-read alignment with Burrows-Wheeler transform</article-title><source>Bioinformatics</source><volume>26</volume><fpage>589</fpage><lpage>595</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp698</pub-id></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Martin</surname><given-names>SA</given-names></name><name><surname>McCabe</surname><given-names>N</given-names></name><name><surname>Mullarkey</surname><given-names>M</given-names></name><name><surname>Cummins</surname><given-names>R</given-names></name><name><surname>Burgess</surname><given-names>DJ</given-names></name><name><surname>Nakabeppu</surname><given-names>Y</given-names></name><name><surname>Oka</surname><given-names>S</given-names></name><name><surname>Kay</surname><given-names>E</given-names></name><name><surname>Lord</surname><given-names>CJ</given-names></name><name><surname>Ashworth</surname><given-names>A</given-names></name></person-group><year>2010</year><article-title>DNA polymerases as potential therapeutic targets for cancers deficient in the DNA mismatch repair proteins MSH2 or MLH1</article-title><source>Cancer Cell</source><volume>17</volume><fpage>235</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1016/j.ccr.2009.12.046</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Martin</surname><given-names>SA</given-names></name><name><surname>Hewish</surname><given-names>M</given-names></name><name><surname>Sims</surname><given-names>D</given-names></name><name><surname>Lord</surname><given-names>CJ</given-names></name><name><surname>Ashworth</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>Parallel high-throughput RNA interference screens identify PINK1 as a potential therapeutic target for the treatment of DNA mismatch repair-deficient cancers</article-title><source>Cancer Research</source><volume>71</volume><fpage>1836</fpage><lpage>1848</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-10-2836</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McCabe</surname><given-names>N</given-names></name><name><surname>Turner</surname><given-names>NC</given-names></name><name><surname>Lord</surname><given-names>CJ</given-names></name><name><surname>Kluzek</surname><given-names>K</given-names></name><name><surname>Bialkowska</surname><given-names>A</given-names></name><name><surname>Swift</surname><given-names>S</given-names></name><name><surname>Giavara</surname><given-names>S</given-names></name><name><surname>O'Connor</surname><given-names>MJ</given-names></name><name><surname>Tutt</surname><given-names>AN</given-names></name><name><surname>Zdzienicka</surname><given-names>MZ</given-names></name><name><surname>Smith</surname><given-names>GC</given-names></name><name><surname>Ashworth</surname><given-names>A</given-names></name></person-group><year>2006</year><article-title>Deficiency in the repair of DNA damage by homologous recombination and sensitivity to poly(ADP-ribose) polymerase inhibition</article-title><source>Cancer Research</source><volume>66</volume><fpage>8109</fpage><lpage>8115</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-06-0140</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McKenna</surname><given-names>A</given-names></name><name><surname>Hanna</surname><given-names>M</given-names></name><name><surname>Banks</surname><given-names>E</given-names></name><name><surname>Sivachenko</surname><given-names>A</given-names></name><name><surname>Cibulskis</surname><given-names>K</given-names></name><name><surname>Kernytsky</surname><given-names>A</given-names></name><name><surname>Garimella</surname><given-names>K</given-names></name><name><surname>Altshuler</surname><given-names>D</given-names></name><name><surname>Gabriel</surname><given-names>S</given-names></name><name><surname>Daly</surname><given-names>M</given-names></name><name><surname>DePristo</surname><given-names>MA</given-names></name></person-group><year>2010</year><article-title>The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data</article-title><source>Genome Research</source><volume>20</volume><fpage>1297</fpage><lpage>1303</lpage><pub-id pub-id-type="doi">10.1101/gr.107524.110</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Metzger-Filho</surname><given-names>O</given-names></name><name><surname>Tutt</surname><given-names>A</given-names></name><name><surname>de Azambuja</surname><given-names>E</given-names></name><name><surname>Saini</surname><given-names>KS</given-names></name><name><surname>Viale</surname><given-names>G</given-names></name><name><surname>Loi</surname><given-names>S</given-names></name><name><surname>Bradbury</surname><given-names>I</given-names></name><name><surname>Bliss</surname><given-names>JM</given-names></name><name><surname>Azim</surname><given-names>HA</given-names><suffix>Jnr</suffix></name><name><surname>Ellis</surname><given-names>P</given-names></name><name><surname>Di Leo</surname><given-names>A</given-names></name><name><surname>Baselga</surname><given-names>J</given-names></name><name><surname>Sotiriou</surname><given-names>C</given-names></name><name><surname>Piccart-Gebhart</surname><given-names>M</given-names></name></person-group><year>2012</year><article-title>Dissecting the heterogeneity of triple-negative breast cancer</article-title><source>Journal of Clinical Oncology</source><volume>30</volume><fpage>1879</fpage><lpage>1887</lpage><pub-id pub-id-type="doi">10.1200/JCO.2011.38.2010</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miquel</surname><given-names>C</given-names></name><name><surname>Jacob</surname><given-names>S</given-names></name><name><surname>Grandjouan</surname><given-names>S</given-names></name><name><surname>Aimé</surname><given-names>A</given-names></name><name><surname>Viguier</surname><given-names>J</given-names></name><name><surname>Sabourin</surname><given-names>JC</given-names></name><name><surname>Sarasin</surname><given-names>A</given-names></name><name><surname>Duval</surname><given-names>A</given-names></name><name><surname>Praz</surname><given-names>F</given-names></name></person-group><year>2007</year><article-title>Frequent alteration of DNA damage signalling and repair pathways in human colorectal cancers with microsatellite instability</article-title><source>Oncogene</source><volume>26</volume><fpage>5919</fpage><lpage>5926</lpage><pub-id pub-id-type="doi">10.1038/sj.onc.1210419</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Montgomery</surname><given-names>SB</given-names></name><name><surname>Goode</surname><given-names>DL</given-names></name><name><surname>Kvikstad</surname><given-names>E</given-names></name><name><surname>Albers</surname><given-names>CA</given-names></name><name><surname>Zhang</surname><given-names>ZD</given-names></name><name><surname>Mu</surname><given-names>XJ</given-names></name><name><surname>Ananda</surname><given-names>G</given-names></name><name><surname>Howie</surname><given-names>B</given-names></name><name><surname>Karczewski</surname><given-names>KJ</given-names></name><name><surname>Smith</surname><given-names>KS</given-names></name><name><surname>Anaya</surname><given-names>V</given-names></name><name><surname>Richardson</surname><given-names>R</given-names></name><name><surname>Davis</surname><given-names>J</given-names></name>, <collab>1000 Genomes Project Consortium</collab><name><surname>MacArthur</surname><given-names>DG</given-names></name><name><surname>Sidow</surname><given-names>A</given-names></name><name><surname>Duret</surname><given-names>L</given-names></name><name><surname>Gerstein</surname><given-names>M</given-names></name><name><surname>Makova</surname><given-names>KD</given-names></name><name><surname>Marchini</surname><given-names>J</given-names></name><name><surname>McVean</surname><given-names>G</given-names></name><name><surname>Lunter</surname><given-names>G</given-names></name></person-group><year>2013</year><article-title>The origin, evolution, and functional impact of short insertion-deletion variants identified in 179 human genomes</article-title><source>Genome Research</source><volume>23</volume><fpage>749</fpage><lpage>761</lpage><pub-id pub-id-type="doi">10.1101/gr.148718.112</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murai</surname><given-names>J</given-names></name><name><surname>Huang</surname><given-names>SY</given-names></name><name><surname>Das</surname><given-names>BB</given-names></name><name><surname>Renaud</surname><given-names>A</given-names></name><name><surname>Zhang</surname><given-names>Y</given-names></name><name><surname>Doroshow</surname><given-names>JH</given-names></name><name><surname>Ji</surname><given-names>J</given-names></name><name><surname>Takeda</surname><given-names>S</given-names></name><name><surname>Pommier</surname><given-names>Y</given-names></name></person-group><year>2012</year><article-title>Trapping of PARP1 and PARP2 by Clinical PARP Inhibitors</article-title><source>Cancer Research</source><volume>72</volume><fpage>5588</fpage><lpage>5599</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-12-2753</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ng</surname><given-names>K</given-names></name><name><surname>Schrag</surname><given-names>D</given-names></name></person-group><year>2010</year><article-title>Microsatellite instability and adjuvant fluorouracil chemotherapy: a mismatch?</article-title><source>Journal of Clinical Oncology</source><volume>28</volume><fpage>3207</fpage><lpage>3210</lpage><pub-id pub-id-type="doi">10.1200/JCO.2010.28.9314</pub-id></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nik-Zainal</surname><given-names>S</given-names></name><name><surname>Alexandrov</surname><given-names>LB</given-names></name><name><surname>Wedge</surname><given-names>DC</given-names></name><name><surname>Van Loo</surname><given-names>P</given-names></name><name><surname>Greenman</surname><given-names>CD</given-names></name><name><surname>Raine</surname><given-names>K</given-names></name><name><surname>Jones</surname><given-names>D</given-names></name><name><surname>Hinton</surname><given-names>J</given-names></name><name><surname>Marshall</surname><given-names>J</given-names></name><name><surname>Stebbings</surname><given-names>LA</given-names></name><name><surname>Menzies</surname><given-names>A</given-names></name><name><surname>Martin</surname><given-names>S</given-names></name><name><surname>Leung</surname><given-names>K</given-names></name><name><surname>Chen</surname><given-names>L</given-names></name><name><surname>Leroy</surname><given-names>C</given-names></name><name><surname>Ramakrishna</surname><given-names>M</given-names></name><name><surname>Rance</surname><given-names>R</given-names></name><name><surname>Lau</surname><given-names>KW</given-names></name><name><surname>Mudie</surname><given-names>LJ</given-names></name><name><surname>Varela</surname><given-names>I</given-names></name><name><surname>McBride</surname><given-names>DJ</given-names></name><name><surname>Bignell</surname><given-names>GR</given-names></name><name><surname>Cooke</surname><given-names>SL</given-names></name><name><surname>Shlien</surname><given-names>A</given-names></name><name><surname>Gamble</surname><given-names>J</given-names></name><name><surname>Whitmore</surname><given-names>I</given-names></name><name><surname>Maddison</surname><given-names>M</given-names></name><name><surname>Tarpey</surname><given-names>PS</given-names></name><name><surname>Davies</surname><given-names>HR</given-names></name><name><surname>Papaemmanuil</surname><given-names>E</given-names></name><name><surname>Stephens</surname><given-names>PJ</given-names></name><name><surname>McLaren</surname><given-names>S</given-names></name><name><surname>Butler</surname><given-names>AP</given-names></name><name><surname>Teague</surname><given-names>JW</given-names></name><name><surname>Jönsson</surname><given-names>G</given-names></name><name><surname>Garber</surname><given-names>JE</given-names></name><name><surname>Silver</surname><given-names>D</given-names></name><name><surname>Miron</surname><given-names>P</given-names></name><name><surname>Fatima</surname><given-names>A</given-names></name><name><surname>Boyault</surname><given-names>S</given-names></name><name><surname>Langerød</surname><given-names>A</given-names></name><name><surname>Tutt</surname><given-names>A</given-names></name><name><surname>Martens</surname><given-names>JW</given-names></name><name><surname>Aparicio</surname><given-names>SA</given-names></name><name><surname>Borg</surname><given-names>Å</given-names></name><name><surname>Salomon</surname><given-names>AV</given-names></name><name><surname>Thomas</surname><given-names>G</given-names></name><name><surname>Børresen-Dale</surname><given-names>AL</given-names></name><name><surname>Richardson</surname><given-names>AL</given-names></name><name><surname>Neuberger</surname><given-names>MS</given-names></name><name><surname>Futreal</surname><given-names>PA</given-names></name><name><surname>Campbell</surname><given-names>PJ</given-names></name><name><surname>Stratton</surname><given-names>MR</given-names></name></person-group><person-group person-group-type="author"><collab>Breast Cancer Working Group of the International Cancer Genome Consortium</collab></person-group><year>2012</year><article-title>Mutational processes molding the genomes of 21 breast cancers</article-title><source>Cell</source><volume>149</volume><fpage>979</fpage><lpage>993</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2012.04.024</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Palles</surname><given-names>C</given-names></name><name><surname>Cazier</surname><given-names>JB</given-names></name><name><surname>Howarth</surname><given-names>KM</given-names></name><name><surname>Domingo</surname><given-names>E</given-names></name><name><surname>Jones</surname><given-names>AM</given-names></name><name><surname>Broderick</surname><given-names>P</given-names></name><name><surname>Kemp</surname><given-names>Z</given-names></name><name><surname>Spain</surname><given-names>SL</given-names></name><name><surname>Guarino</surname><given-names>E</given-names></name><name><surname>Salguero</surname><given-names>I</given-names></name><name><surname>Sherborne</surname><given-names>A</given-names></name><name><surname>Chubb</surname><given-names>D</given-names></name><name><surname>Carvajal-Carmona</surname><given-names>LG</given-names></name><name><surname>Ma</surname><given-names>Y</given-names></name><name><surname>Kaur</surname><given-names>K</given-names></name><name><surname>Dobbins</surname><given-names>S</given-names></name><name><surname>Barclay</surname><given-names>E</given-names></name><name><surname>Gorman</surname><given-names>M</given-names></name><name><surname>Martin</surname><given-names>L</given-names></name><name><surname>Kovac</surname><given-names>MB</given-names></name><name><surname>Humphray</surname><given-names>S</given-names></name>, <collab>CORGI Consortium</collab>, <collab>WGS500 Consortium</collab><name><surname>Lucassen</surname><given-names>A</given-names></name><name><surname>Holmes</surname><given-names>CC</given-names></name><name><surname>Bentley</surname><given-names>D</given-names></name><name><surname>Donnelly</surname><given-names>P</given-names></name><name><surname>Taylor</surname><given-names>J</given-names></name><name><surname>Petridis</surname><given-names>C</given-names></name><name><surname>Roylance</surname><given-names>R</given-names></name><name><surname>Sawyer</surname><given-names>EJ</given-names></name><name><surname>Kerr</surname><given-names>DJ</given-names></name><name><surname>Clark</surname><given-names>S</given-names></name><name><surname>Grimes</surname><given-names>J</given-names></name><name><surname>Kearsey</surname><given-names>SE</given-names></name><name><surname>Thomas</surname><given-names>HJ</given-names></name><name><surname>McVean</surname><given-names>G</given-names></name><name><surname>Houlston</surname><given-names>RS</given-names></name><name><surname>Tomlinson</surname><given-names>I</given-names></name></person-group><year>2013</year><article-title>Germline mutations affecting the proofreading domains of POLE and POLD1 predispose to colorectal adenomas and carcinomas</article-title><source>Nature Genetics</source><volume>45</volume><fpage>136</fpage><lpage>144</lpage><pub-id pub-id-type="doi">10.1038/ng.2503</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Park</surname><given-names>JM</given-names></name><name><surname>Huang</surname><given-names>S</given-names></name><name><surname>Tougeron</surname><given-names>D</given-names></name><name><surname>Sinicrope</surname><given-names>FA</given-names></name></person-group><year>2013</year><article-title>MSH3 mismatch repair protein regulates sensitivity to cytotoxic drugs and a histone deacetylase inhibitor in human colon carcinoma cells</article-title><source>PLOS ONE</source><volume>8</volume><fpage>e65369</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0065369</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Parsons</surname><given-names>MT</given-names></name><name><surname>Buchanan</surname><given-names>DD</given-names></name><name><surname>Thompson</surname><given-names>B</given-names></name><name><surname>Young</surname><given-names>JP</given-names></name><name><surname>Spurdle</surname><given-names>AB</given-names></name></person-group><year>2012</year><article-title>Correlation of tumour BRAF mutations and MLH1 methylation with germline mismatch repair (MMR) gene mutation status: a literature review assessing utility of tumour features for MMR variant classification</article-title><source>Journal of Medical Genetics</source><volume>49</volume><fpage>151</fpage><lpage>157</lpage><pub-id pub-id-type="doi">10.1136/jmedgenet-2011-100714</pub-id></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Peltomaki</surname><given-names>P</given-names></name></person-group><year>2014</year><article-title>Epigenetic mechanisms in the pathogenesis of Lynch syndrome</article-title><source>Clinical Genetics</source><volume>85</volume><fpage>403</fpage><lpage>412</lpage><pub-id pub-id-type="doi">10.1111/cge.12349</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pena-Diaz</surname><given-names>J</given-names></name><name><surname>Bregenhorn</surname><given-names>S</given-names></name><name><surname>Ghodgaonkar</surname><given-names>M</given-names></name><name><surname>Follonier</surname><given-names>C</given-names></name><name><surname>Artola-Borán</surname><given-names>M</given-names></name><name><surname>Castor</surname><given-names>D</given-names></name><name><surname>Lopes</surname><given-names>M</given-names></name><name><surname>Sartori</surname><given-names>AA</given-names></name><name><surname>Jiricny</surname><given-names>J</given-names></name></person-group><year>2012</year><article-title>Noncanonical mismatch repair as a source of genomic instability in human cells</article-title><source>Molecular Cell</source><volume>47</volume><fpage>669</fpage><lpage>680</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2012.07.006</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pinol</surname><given-names>V</given-names></name><name><surname>Castells</surname><given-names>A</given-names></name><name><surname>Andreu</surname><given-names>M</given-names></name><name><surname>Castellví-Bel</surname><given-names>S</given-names></name><name><surname>Alenda</surname><given-names>C</given-names></name><name><surname>Llor</surname><given-names>X</given-names></name><name><surname>Xicola</surname><given-names>RM</given-names></name><name><surname>Rodríguez-Moranta</surname><given-names>F</given-names></name><name><surname>Payá</surname><given-names>A</given-names></name><name><surname>Jover</surname><given-names>R</given-names></name><name><surname>Bessa</surname><given-names>X</given-names></name>, <collab>Gastrointestinal Oncology Group of the Spanish Gastroenterological Association</collab></person-group><year>2005</year><article-title>Accuracy of revised Bethesda guidelines, microsatellite instability, and immunohistochemistry for the identification of patients with hereditary nonpolyposis colorectal cancer</article-title><source>The Journal of the American Medical Association</source><volume>293</volume><fpage>1986</fpage><lpage>1994</lpage></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pleasance</surname><given-names>ED</given-names></name><name><surname>Cheetham</surname><given-names>RK</given-names></name><name><surname>Stephens</surname><given-names>PJ</given-names></name><name><surname>McBride</surname><given-names>DJ</given-names></name><name><surname>Humphray</surname><given-names>SJ</given-names></name><name><surname>Greenman</surname><given-names>CD</given-names></name><name><surname>Varela</surname><given-names>I</given-names></name><name><surname>Lin</surname><given-names>ML</given-names></name><name><surname>Ordóñez</surname><given-names>GR</given-names></name><name><surname>Bignell</surname><given-names>GR</given-names></name><name><surname>Ye</surname><given-names>K</given-names></name><name><surname>Alipaz</surname><given-names>J</given-names></name><name><surname>Bauer</surname><given-names>MJ</given-names></name><name><surname>Beare</surname><given-names>D</given-names></name><name><surname>Butler</surname><given-names>A</given-names></name><name><surname>Carter</surname><given-names>RJ</given-names></name><name><surname>Chen</surname><given-names>L</given-names></name><name><surname>Cox</surname><given-names>AJ</given-names></name><name><surname>Edkins</surname><given-names>S</given-names></name><name><surname>Kokko-Gonzales</surname><given-names>PI</given-names></name><name><surname>Gormley</surname><given-names>NA</given-names></name><name><surname>Grocock</surname><given-names>RJ</given-names></name><name><surname>Haudenschild</surname><given-names>CD</given-names></name><name><surname>Hims</surname><given-names>MM</given-names></name><name><surname>James</surname><given-names>T</given-names></name><name><surname>Jia</surname><given-names>M</given-names></name><name><surname>Kingsbury</surname><given-names>Z</given-names></name><name><surname>Leroy</surname><given-names>C</given-names></name><name><surname>Marshall</surname><given-names>J</given-names></name><name><surname>Menzies</surname><given-names>A</given-names></name><name><surname>Mudie</surname><given-names>LJ</given-names></name><name><surname>Ning</surname><given-names>Z</given-names></name><name><surname>Royce</surname><given-names>T</given-names></name><name><surname>Schulz-Trieglaff</surname><given-names>OB</given-names></name><name><surname>Spiridou</surname><given-names>A</given-names></name><name><surname>Stebbings</surname><given-names>LA</given-names></name><name><surname>Szajkowski</surname><given-names>L</given-names></name><name><surname>Teague</surname><given-names>J</given-names></name><name><surname>Williamson</surname><given-names>D</given-names></name><name><surname>Chin</surname><given-names>L</given-names></name><name><surname>Ross</surname><given-names>MT</given-names></name><name><surname>Campbell</surname><given-names>PJ</given-names></name><name><surname>Bentley</surname><given-names>DR</given-names></name><name><surname>Futreal</surname><given-names>PA</given-names></name><name><surname>Stratton</surname><given-names>MR</given-names></name></person-group><year>2010</year><article-title>A comprehensive catalogue of somatic mutations from a human cancer genome</article-title><source>Nature</source><volume>463</volume><fpage>191</fpage><lpage>196</lpage><pub-id pub-id-type="doi">10.1038/nature08658</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pleasance</surname><given-names>ED</given-names></name><name><surname>Stephens</surname><given-names>PJ</given-names></name><name><surname>O'Meara</surname><given-names>S</given-names></name><name><surname>McBride</surname><given-names>DJ</given-names></name><name><surname>Meynert</surname><given-names>A</given-names></name><name><surname>Jones</surname><given-names>D</given-names></name><name><surname>Lin</surname><given-names>ML</given-names></name><name><surname>Beare</surname><given-names>D</given-names></name><name><surname>Lau</surname><given-names>KW</given-names></name><name><surname>Greenman</surname><given-names>C</given-names></name><name><surname>Varela</surname><given-names>I</given-names></name><name><surname>Nik-Zainal</surname><given-names>S</given-names></name><name><surname>Davies</surname><given-names>HR</given-names></name><name><surname>Ordoñez</surname><given-names>GR</given-names></name><name><surname>Mudie</surname><given-names>LJ</given-names></name><name><surname>Latimer</surname><given-names>C</given-names></name><name><surname>Edkins</surname><given-names>S</given-names></name><name><surname>Stebbings</surname><given-names>L</given-names></name><name><surname>Chen</surname><given-names>L</given-names></name><name><surname>Jia</surname><given-names>M</given-names></name><name><surname>Leroy</surname><given-names>C</given-names></name><name><surname>Marshall</surname><given-names>J</given-names></name><name><surname>Menzies</surname><given-names>A</given-names></name><name><surname>Butler</surname><given-names>A</given-names></name><name><surname>Teague</surname><given-names>JW</given-names></name><name><surname>Mangion</surname><given-names>J</given-names></name><name><surname>Sun</surname><given-names>YA</given-names></name><name><surname>McLaughlin</surname><given-names>SF</given-names></name><name><surname>Peckham</surname><given-names>HE</given-names></name><name><surname>Tsung</surname><given-names>EF</given-names></name><name><surname>Costa</surname><given-names>GL</given-names></name><name><surname>Lee</surname><given-names>CC</given-names></name><name><surname>Minna</surname><given-names>JD</given-names></name><name><surname>Gazdar</surname><given-names>A</given-names></name><name><surname>Birney</surname><given-names>E</given-names></name><name><surname>Rhodes</surname><given-names>MD</given-names></name><name><surname>McKernan</surname><given-names>KJ</given-names></name><name><surname>Stratton</surname><given-names>MR</given-names></name><name><surname>Futreal</surname><given-names>PA</given-names></name><name><surname>Campbell</surname><given-names>PJ</given-names></name></person-group><year>2010</year><article-title>A small-cell lung cancer genome with complex signatures of tobacco exposure</article-title><source>Nature</source><volume>463</volume><fpage>184</fpage><lpage>190</lpage><pub-id pub-id-type="doi">10.1038/nature08629</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Quinlan</surname><given-names>AR</given-names></name><name><surname>Hall</surname><given-names>IM</given-names></name></person-group><year>2010</year><article-title>BEDTools: a flexible suite of utilities for comparing genomic features</article-title><source>Bioinformatics</source><volume>26</volume><fpage>841</fpage><lpage>842</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btq033</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reumers</surname><given-names>J</given-names></name><name><surname>De Rijk</surname><given-names>P</given-names></name><name><surname>Zhao</surname><given-names>H</given-names></name><name><surname>Liekens</surname><given-names>A</given-names></name><name><surname>Smeets</surname><given-names>D</given-names></name><name><surname>Cleary</surname><given-names>J</given-names></name><name><surname>Van Loo</surname><given-names>P</given-names></name><name><surname>Van Den Bossche</surname><given-names>M</given-names></name><name><surname>Catthoor</surname><given-names>K</given-names></name><name><surname>Sabbe</surname><given-names>B</given-names></name><name><surname>Despierre</surname><given-names>E</given-names></name><name><surname>Vergote</surname><given-names>I</given-names></name><name><surname>Hilbush</surname><given-names>B</given-names></name><name><surname>Lambrechts</surname><given-names>D</given-names></name><name><surname>Del-Favero</surname><given-names>J</given-names></name></person-group><year>2011</year><article-title>Optimized filtering reduces the error rate in detecting genomic variants by short-read sequencing</article-title><source>Nature Biotechnology</source><volume>30</volume><fpage>61</fpage><lpage>68</lpage><pub-id pub-id-type="doi">10.1038/nbt.2053</pub-id></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Richman</surname><given-names>SD</given-names></name><name><surname>Seymour</surname><given-names>MT</given-names></name><name><surname>Chambers</surname><given-names>P</given-names></name><name><surname>Elliott</surname><given-names>F</given-names></name><name><surname>Daly</surname><given-names>CL</given-names></name><name><surname>Meade</surname><given-names>AM</given-names></name><name><surname>Taylor</surname><given-names>G</given-names></name><name><surname>Barrett</surname><given-names>JH</given-names></name><name><surname>Quirke</surname><given-names>P</given-names></name></person-group><year>2009</year><article-title>KRAS and BRAF mutations in advanced colorectal cancer are associated with poor prognosis but do not preclude benefit from oxaliplatin or irinotecan: results from the MRC FOCUS trial</article-title><source>Journal of Clinical Oncology</source><volume>27</volume><fpage>5931</fpage><lpage>5937</lpage><pub-id pub-id-type="doi">10.1200/JCO.2009.22.4295</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Saint-Ruf</surname><given-names>C</given-names></name><name><surname>Matic</surname><given-names>I</given-names></name></person-group><year>2006</year><article-title>Environmental tuning of mutation rates</article-title><source>Environmental Microbiology</source><volume>8</volume><fpage>193</fpage><lpage>199</lpage><pub-id pub-id-type="doi">10.1111/j.1462-2920.2005.00968.x</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shell</surname><given-names>SS</given-names></name><name><surname>Putnam</surname><given-names>CD</given-names></name><name><surname>Kolodner</surname><given-names>RD</given-names></name></person-group><year>2007</year><article-title>The N terminus of <italic>Saccharomyces cerevisiae</italic> Msh6 is an unstructured tether to PCNA</article-title><source>Molecular Cell</source><volume>26</volume><fpage>565</fpage><lpage>578</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2007.04.024</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname><given-names>CG</given-names></name><name><surname>Fisher</surname><given-names>D</given-names></name><name><surname>Claes</surname><given-names>B</given-names></name><name><surname>Maughan</surname><given-names>TS</given-names></name><name><surname>Idziaszczyk</surname><given-names>S</given-names></name><name><surname>Peuteman</surname><given-names>G</given-names></name><name><surname>Harris</surname><given-names>R</given-names></name><name><surname>James</surname><given-names>MD</given-names></name><name><surname>Meade</surname><given-names>A</given-names></name><name><surname>Jasani</surname><given-names>B</given-names></name><name><surname>Adams</surname><given-names>RA</given-names></name><name><surname>Kenny</surname><given-names>S</given-names></name><name><surname>Kaplan</surname><given-names>R</given-names></name><name><surname>Lambrechts</surname><given-names>D</given-names></name><name><surname>Cheadle</surname><given-names>JP</given-names></name></person-group><year>2013</year><article-title>Somatic profiling of the epidermal growth factor receptor pathway in tumors from patients with advanced colorectal cancer treated with chemotherapy {+/−} cetuximab</article-title><source>Clinical Cancer Research</source><volume>19</volume><fpage>4104</fpage><lpage>4113</lpage><pub-id pub-id-type="doi">10.1158/1078-0432.CCR-12-2581</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Song</surname><given-names>CX</given-names></name><name><surname>Clark</surname><given-names>TA</given-names></name><name><surname>Lu</surname><given-names>XY</given-names></name><name><surname>Kislyuk</surname><given-names>A</given-names></name><name><surname>Dai</surname><given-names>Q</given-names></name><name><surname>Turner</surname><given-names>SW</given-names></name><name><surname>He</surname><given-names>C</given-names></name><name><surname>Korlach</surname><given-names>J</given-names></name></person-group><year>2012</year><article-title>Sensitive and specific single-molecule sequencing of 5-hydroxymethylcytosine</article-title><source>Nature Methods</source><volume>9</volume><fpage>75</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1038/nmeth.1779</pub-id></element-citation></ref><ref id="bib57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stamatoyannopoulos</surname><given-names>JA</given-names></name><name><surname>Adzhubei</surname><given-names>I</given-names></name><name><surname>Thurman</surname><given-names>RE</given-names></name><name><surname>Kryukov</surname><given-names>GV</given-names></name><name><surname>Mirkin</surname><given-names>SM</given-names></name><name><surname>Sunyaev</surname><given-names>SR</given-names></name></person-group><year>2009</year><article-title>Human mutation rate associated with DNA replication timing</article-title><source>Nature Genetics</source><volume>41</volume><fpage>393</fpage><lpage>395</lpage><pub-id pub-id-type="doi">10.1038/ng.363</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Takahashi</surname><given-names>M</given-names></name><name><surname>Koi</surname><given-names>M</given-names></name><name><surname>Balaguer</surname><given-names>F</given-names></name><name><surname>Boland</surname><given-names>CR</given-names></name><name><surname>Goel</surname><given-names>A</given-names></name></person-group><year>2011</year><article-title>MSH3 mediates sensitization of colorectal cancer cells to cisplatin, oxaliplatin, and a poly(ADP-ribose) polymerase inhibitor</article-title><source>The Journal of Biological Chemistry</source><volume>286</volume><fpage>12157</fpage><lpage>12165</lpage><pub-id pub-id-type="doi">10.1074/jbc.M110.198804</pub-id></element-citation></ref><ref id="bib59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname><given-names>YY</given-names></name><name><surname>McGaughran</surname><given-names>J</given-names></name><name><surname>Ferguson</surname><given-names>K</given-names></name><name><surname>Walsh</surname><given-names>MD</given-names></name><name><surname>Buchanan</surname><given-names>DD</given-names></name><name><surname>Young</surname><given-names>JP</given-names></name><name><surname>Webb</surname><given-names>PM</given-names></name><name><surname>Obermair</surname><given-names>A</given-names></name><name><surname>Spurdle</surname><given-names>AB</given-names></name>, <collab>ANECS Group</collab></person-group><year>2013</year><article-title>Improving identification of lynch syndrome patients: a comparison of research data with clinical records</article-title><source>International Journal of Cancer</source><volume>132</volume><fpage>2876</fpage><lpage>2883</lpage><pub-id pub-id-type="doi">10.1002/ijc.27978</pub-id></element-citation></ref><ref id="bib60"><element-citation publication-type="journal"><person-group person-group-type="author"><collab>TCGA</collab></person-group><year>2012</year><article-title>Comprehensive molecular characterization of human colon and rectal cancer</article-title><source>Nature</source><volume>487</volume><fpage>330</fpage><lpage>337</lpage><pub-id pub-id-type="doi">10.1038/nature11252</pub-id></element-citation></ref><ref id="bib61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tie</surname><given-names>J</given-names></name><name><surname>Lipton</surname><given-names>L</given-names></name><name><surname>Desai</surname><given-names>J</given-names></name><name><surname>Gibbs</surname><given-names>P</given-names></name><name><surname>Jorissen</surname><given-names>RN</given-names></name><name><surname>Christie</surname><given-names>M</given-names></name><name><surname>Drummond</surname><given-names>KJ</given-names></name><name><surname>Thomson</surname><given-names>BN</given-names></name><name><surname>Usatoff</surname><given-names>V</given-names></name><name><surname>Evans</surname><given-names>PM</given-names></name><name><surname>Pick</surname><given-names>AW</given-names></name><name><surname>Knight</surname><given-names>S</given-names></name><name><surname>Carne</surname><given-names>PW</given-names></name><name><surname>Berry</surname><given-names>R</given-names></name><name><surname>Polglase</surname><given-names>A</given-names></name><name><surname>McMurrick</surname><given-names>P</given-names></name><name><surname>Zhao</surname><given-names>Q</given-names></name><name><surname>Busam</surname><given-names>D</given-names></name><name><surname>Strausberg</surname><given-names>RL</given-names></name><name><surname>Domingo</surname><given-names>E</given-names></name><name><surname>Tomlinson</surname><given-names>IP</given-names></name><name><surname>Midgley</surname><given-names>R</given-names></name><name><surname>Kerr</surname><given-names>D</given-names></name><name><surname>Sieber</surname><given-names>OM</given-names></name></person-group><year>2011</year><article-title>KRAS mutation is associated with lung metastasis in patients with curatively resected colorectal cancer</article-title><source>Clinical Cancer Research</source><volume>17</volume><fpage>1122</fpage><lpage>1130</lpage><pub-id pub-id-type="doi">10.1158/1078-0432.CCR-10-1720</pub-id></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Van Loo</surname><given-names>P</given-names></name><name><surname>Nordgard</surname><given-names>SH</given-names></name><name><surname>Lingjærde</surname><given-names>OC</given-names></name><name><surname>Russnes</surname><given-names>HG</given-names></name><name><surname>Rye</surname><given-names>IH</given-names></name><name><surname>Sun</surname><given-names>W</given-names></name><name><surname>Weigman</surname><given-names>VJ</given-names></name><name><surname>Marynen</surname><given-names>P</given-names></name><name><surname>Zetterberg</surname><given-names>A</given-names></name><name><surname>Naume</surname><given-names>B</given-names></name><name><surname>Perou</surname><given-names>CM</given-names></name><name><surname>Børresen-Dale</surname><given-names>AL</given-names></name><name><surname>Kristensen</surname><given-names>VN</given-names></name></person-group><year>2010</year><article-title>Allele-specific copy number analysis of tumors</article-title><source>Proceedings of the National Academy of Sciences of USA</source><volume>107</volume><fpage>16910</fpage><lpage>16915</lpage><pub-id pub-id-type="doi">10.1073/pnas.1009843107</pub-id></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vichai</surname><given-names>V</given-names></name><name><surname>Kirtikara</surname><given-names>K</given-names></name></person-group><year>2006</year><article-title>Sulforhodamine B colorimetric assay for cytotoxicity screening</article-title><source>Nature Protocols</source><volume>1</volume><fpage>1112</fpage><lpage>1116</lpage><pub-id pub-id-type="doi">10.1038/nprot.2006.179</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vilar</surname><given-names>E</given-names></name><name><surname>Bartnik</surname><given-names>CM</given-names></name><name><surname>Stenzel</surname><given-names>SL</given-names></name><name><surname>Raskin</surname><given-names>L</given-names></name><name><surname>Ahn</surname><given-names>J</given-names></name><name><surname>Moreno</surname><given-names>V</given-names></name><name><surname>Mukherjee</surname><given-names>B</given-names></name><name><surname>Iniesta</surname><given-names>MD</given-names></name><name><surname>Morgan</surname><given-names>MA</given-names></name><name><surname>Rennert</surname><given-names>G</given-names></name><name><surname>Gruber</surname><given-names>SB</given-names></name></person-group><year>2011</year><article-title>MRE11 deficiency increases sensitivity to poly(ADP-ribose) polymerase inhibition in microsatellite unstable colorectal cancers</article-title><source>Cancer Research</source><volume>71</volume><fpage>2632</fpage><lpage>2642</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-10-1120</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Watson</surname><given-names>JV</given-names></name><name><surname>Chambers</surname><given-names>SH</given-names></name><name><surname>Smith</surname><given-names>PJ</given-names></name></person-group><year>1987</year><article-title>A pragmatic approach to the analysis of DNA histograms with a definable G1 peak</article-title><source>Cytometry</source><volume>8</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1002/cyto.990080101</pub-id></element-citation></ref><ref id="bib66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wheeler</surname><given-names>DA</given-names></name><name><surname>Wang</surname><given-names>L</given-names></name></person-group><year>2013</year><article-title>From human genome to cancer genome: the first decade</article-title><source>Genome Research</source><volume>23</volume><fpage>1054</fpage><lpage>1062</lpage><pub-id pub-id-type="doi">10.1101/gr.157602.113</pub-id></element-citation></ref><ref id="bib67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yoon</surname><given-names>K</given-names></name><name><surname>Lee</surname><given-names>S</given-names></name><name><surname>Han</surname><given-names>TS</given-names></name><name><surname>Moon</surname><given-names>SY</given-names></name><name><surname>Yun</surname><given-names>SM</given-names></name><name><surname>Kong</surname><given-names>SH</given-names></name><name><surname>Jho</surname><given-names>S</given-names></name><name><surname>Choe</surname><given-names>J</given-names></name><name><surname>Yu</surname><given-names>J</given-names></name><name><surname>Lee</surname><given-names>HJ</given-names></name><name><surname>Park</surname><given-names>JH</given-names></name><name><surname>Kim</surname><given-names>HM</given-names></name><name><surname>Lee</surname><given-names>SY</given-names></name><name><surname>Park</surname><given-names>J</given-names></name><name><surname>Kim</surname><given-names>WH</given-names></name><name><surname>Bhak</surname><given-names>J</given-names></name><name><surname>Yang</surname><given-names>HK</given-names></name><name><surname>Kim</surname><given-names>SJ</given-names></name></person-group><year>2013</year><article-title>Comprehensive genome- and transcriptome-wide analyses of mutations associated with microsatellite instability in Korean gastric cancers</article-title><source>Genome Research</source><volume>23</volume><fpage>1109</fpage><lpage>1117</lpage><pub-id pub-id-type="doi">10.1101/gr.145706.112</pub-id></element-citation></ref><ref id="bib68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zook</surname><given-names>JM</given-names></name><name><surname>Chapman</surname><given-names>B</given-names></name><name><surname>Wang</surname><given-names>J</given-names></name><name><surname>Mittelman</surname><given-names>D</given-names></name><name><surname>Hofmann</surname><given-names>O</given-names></name><name><surname>Hide</surname><given-names>W</given-names></name><name><surname>Salit</surname><given-names>M</given-names></name></person-group><year>2014</year><article-title>Integrating human sequence data sets provides a resource of benchmark SNP and indel genotype calls</article-title><source>Nature Biotechnology</source><volume>32</volume><fpage>246</fpage><lpage>251</lpage><pub-id pub-id-type="doi">10.1038/nbt.2835</pub-id></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.02725.040</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Stamatoyannopoulos</surname><given-names>John</given-names></name><role>Reviewing editor</role><aff><institution>University of Washington</institution>, <country>United States</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>Thank you for sending your work entitled “Mismatch repair deficiency endows tumors with a unique mutation signature and sensitivity to DNA double-strand breaks” for consideration at <italic>eLife.</italic> Your article has been favorably evaluated by Stylianos Antonarakis (Senior editor), a Reviewing editor, and 2 reviewers, one of whom, Thilo Dörk, has agreed to reveal his identity.</p><p>The Senior editor has assembled the following comments to help you prepare a revised submission.</p><p>1) There is a major issue of mapping and calling the in-dels within short repeats that are typical of MSI + cancers. This can lead to unmapped reads, mismapping and type 1 and type 2 errors. This problem especially affects the gapped reads in the Complete Genomics platform. Strategies to mitigate these issues are not mentioned. I have considerable doubts about the inclusion of the whole-genome data in this manuscript - after all, N = 3 is a very small number anyway – and I suggest that the issue should be addressed in exome data (even if some of those data are from the WGS cancers).</p><p>2) The sample set is heterogeneous in terms of cancer of origin and derivation from primary cancers and cultures that are likely to have been subjected to considerable in vitro selection pressure and/or founder effects.</p><p>3) The similarity between germline and somatic mutation spectra might, in part be caused by many somatic mutations occurring in normal progenitors prior to loss of MMR. Is there a way of investigating this in comparison with non-MMR tumors (e.g. examining effects of age)?</p><p>4) The 59-marker exomic MSI panel is useful, especially for Lynch syndrome, and appears to perform well.</p><p>5) The pathway analysis presumably incorporated all detected variants. Whilst strongly suggestive, does filtering for variants with strong evidence of functionality alter these conclusions? Moreover, the burden of mutations in these pathways might relate to redundant function – noting that MSI + cancers are usually near-diploid - rather than positive selection. This may or may not matter for therapeutic purposes, but can it be checked in some way?</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.02725.041</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p><italic>1) There is a major issue of mapping and calling the in-dels within short repeats that are typical of MSI + cancers. This can lead to unmapped reads, mismapping and type 1 and type 2 errors. This problem especially affects the gapped reads in the Complete Genomics platform. Strategies to mitigate these issues are not mentioned. I have considerable doubts about the inclusion of the whole-genome data in this manuscript - after all, N=3 is a very small number anyway – and I suggest that the issue should be addressed in exome data (even if some of those data are from the WGS cancers).</italic></p><p>We thank the reviewer for this comment. Indeed, different sequencing platforms are each characterized by their specific false-positive and false-negative variant detection rates. For example, 10.5% of indels was false-positive in Complete Genomics (CG) datasets, whereas 27.7% of indels was missed (false-negatives), as discussed by Zook et al (<xref ref-type="bibr" rid="bib25">Jiricny, 2006</xref>). For Illumina genomes, 6.9% of indels were false-positive and 0.5% of indels were false-negative.</p><p>First, to address the issue of false-positives (type 1 error), we expanded the number of randomly selected indels from the CG-sequenced MMR-deficient tumor (MMR-1) and Illumina-sequenced MMR-deficient tumor (MMR-2), respectively. We chose an orthogonal validation technology, i.e., Sequenom MassARRAY, to validate a total number of 391 indels. The overall validation rate that we obtained for indels was 90.3% in CG genomes and 85.9% in Illumina -sequenced genomes (see <xref ref-type="table" rid="tbl1">table 1</xref> below and <xref ref-type="supplementary-material" rid="SD3-data">Figure 1—source data 3</xref>). These validation rates are thus very similar between both platforms. The validation rates are also much higher than those that we observed for indels in MMR-proficient tumors and those the reviewer is correctly referring to in the literature (<xref ref-type="bibr" rid="bib44">Parsons et al., 2012</xref>; <xref ref-type="bibr" rid="bib45">Peltomaki, 2014</xref>). The low validation rates for indels in the MMR-proficient whole-genome tumors most probably reflect the fact that in germ-line genomes, as well as MMR -proficient tumor genomes, the number of true-positive indels is low in comparison to the number of false-positive indels that are detected. However, in MMR-deficient tumors, due to their specific hypermutator phenotype, the number of true-positive indels is vastly increased, thereby rendering the false positive fraction proportionally much smaller. In the revised manuscript, we have therefore highlighted the possibility of having false-positive and false-negative findings in the whole-genomes and indicate that this may affect observed indel rates. We also discuss that validation rates in MMR-deficient tumors are much higher than in the MMR-proficient tumors and explain the reasons for this. As highlighted by the reviewer, sequence reads containing both indels and substitutions (i.e., reads with a relatively high percentage of mismatches) are more prone to mismapping than sequence reads containing only substitutions or indels. We acknowledge this issue, and have in response deleted the paragraph describing that somatic indels and substitutions are often located close to each other in the 3 MMR-deficient tumors that were whole-genome sequenced. Indeed, such observations are prone to contamination by false-positives.</p><p><table-wrap id="tbl3" position="anchor"><label>Table 1.</label><caption><p>Validation rate of somatic indels detected upon whole-genome sequencing. These additional validation experiments for indels have been included in the revised manuscript</p></caption><table frame="hsides" rules="groups"><thead><tr><th rowspan="2">Tumor</th><th colspan="3">Somatic Indels</th></tr><tr><th>Confirmed</th><th>Not confirmed</th><th>Validation rate</th></tr></thead><tbody><tr><td>MMR- 1</td><td>187</td><td>20</td><td>90.3%</td></tr><tr><td>MMR- 2</td><td>103</td><td>21</td><td>83.1%</td></tr><tr><td>MMR- 3</td><td>54</td><td>6</td><td>90.0%</td></tr><tr><td>MMR+ 1</td><td>0</td><td>9</td><td>0.0%</td></tr><tr><td>MMR+ 2</td><td>2</td><td>7</td><td>22.2%</td></tr></tbody></table></table-wrap></p><p>Secondly, we have followed the reviewer’s suggestion and have removed the 3 whole-genomes from the analyses aimed at identifying recurrent mutations and constructing a novel MSI panel. In particular, we repeated all analyses with only the 13 genomes subjected to Illumina exome-sequencing. We observed that in coding regions, 1.4% of homopolymers were affected at least once (i.e. in 2073 homopolymers out of a total of 29,663), whereas 414 were affected at least twice. Furthermore, 47 homopolymers were affected in ≥5 samples. In 3’UTR and 5’UTRs, 2296 and 105 homopolymers were affected in ≥5 samples respectively. When randomly selecting recurrent indels to design a panel of recurrent markers capable of assessing MSI, 54 out of the 59 originally selected markers were still selected, as they affected ≥5 out of 13 tumors (compared to 59 recurrent indels affecting ≥6 out of 16 tumors). Of these 54 markers, 45 markers were in UTRs and 9 were in coding regions. Applying the 54-marker panel on the same discovery set of 236 EM tumors as described in the original manuscript, we also found 3 positive markers as the threshold with the best Matthew Correlation Coefficient (<xref ref-type="fig" rid="fig8">Author response image 1A and 1B</xref>).</p><p>When comparing our 54 markers against the Bethesda panel, we equally found that the 54-maker panel had a higher sensitivity compared to Bethesda. Specifically, we applied the 54 -marker panel to a set of 114 endometrial tumors and a set of 126 colorectal tumors as described in the original manuscript. For the EM tumors, 73 tumors (64%) were defined as MSS/MSI-L and 41 tumors (36%) as MSI-H. Out of these 41 MSI-H tumors, Bethesda identified 29 tumors as MSI-H (&gt;2 markers positive), 7 tumors as MSI-L and 5 tumors as MSS. Vice versa, Bethesda did not identify any MSI-H tumor that was not identified by our novel MSI panel (<xref ref-type="fig" rid="fig8">Author response image 1C</xref>). IHC on 9 out of 12 discordant samples confirmed that each of these samples was deficient either for MLH1 or MSH2, and thus MMR-deficient. No tumor slides were available for the remaining 3 samples. The 9 discordant samples, we had access to, were confirmed as true positives by IHC. For CRC tumors, there were 97 MSS tumors in our 54-marker panel that were concordantly called MSS or MSI-L by the Bethesda panel. The remaining 29 samples were detected as MSI in the 54-marker panel (<xref ref-type="fig" rid="fig8">Author response image 1D</xref>). 28 of these were also called MSI-H by the Bethesda panel, whereas one was called MSS by the Bethesda panel. It had a <italic>BRAF</italic> mutation and was <italic>MLH1</italic> hypermethylated, thereby confirming MMR-deficiency and correct classification by the 54-marker panel.</p><p>Finally, we also repeated our pathway analyses on the genes affected by indels in the 13 exomes, rather than on the whole set of 16 exomes, 3 of which were generated by whole-genome sequencing. Pathway analyses on the 3856 genes affected by a somatic indel using IPA<sup>®</sup> revealed that the “<italic>Role of BRCA1 in DNA damage response</italic>” was the top enriched pathway (<italic>P</italic>=4.2E-04). IPA<sup>®</sup> analysis of 1302 genes affected by recurrent indels revealed that the <italic>“DNA double-strand break (DSB) repair by Homologous Recombination (HR)”</italic> was the top enriched pathways (<italic>P</italic>=4.7E-03). Pathway analyses of 6736 indels in MMR-deficient tumors using GenomeMuSiC revealed that the “<italic>ATR/BRCA pathway</italic>”, “<italic>Homologous recombination repair</italic>” and “<italic>DNA repair</italic>” pathways were ranked highest in BioCarta, DNARepairDB and Reactome databases respectively (<italic>P</italic>=4.9E-13, <italic>P</italic>=1.8E-03and<italic>P</italic>=5.7E-08, respectively). Overall, these results are nearly identical to the data generated on the 16 exomes, as presented in the original manuscript.</p><p>In conclusion, since our data were not significantly affected or did not change any of our conclusions, depending on whether we analyzed 13 or 16 genomes, we chose to present the data of the 16 exomes as the main analysis. However, in the revised manuscript, we have now added a sentence highlighting that data and conclusions did not change when the analysis was limited to the 13 exomes generated by Illumina exome-sequencing only. Furthermore, since the response to this comment will be published in parallel to the manuscript, a critical reader will be able to assess in full detail that data did not change after removing the 3 whole-genomes from the analysis.<fig id="fig8" position="float"><label>Author response image 1.</label><caption><p>The 54-marker panel generated from 13 Illumina-sequenced exomes for MSI testing.</p></caption><graphic xlink:href="elife02725f008"/></fig></p><p><italic>2) The sample set is heterogeneous in terms of cancer of origin and derivation from primary cancers and cultures that are likely to have been subjected to considerable</italic> in vitro <italic>selection pressure and/or founder effects</italic>.</p><p>We agree with the reviewer that it is important to consider the heterogeneity of the tumors in terms of cancer of origin or derivation procedure. We have therefore performed a clustering analysis of all MMR-deficient tumors that we sequenced for the genes affected by either a somatic substitution or indel in the coding region. As can be appreciated from <xref ref-type="fig" rid="fig9">Author response image 2</xref> below, no obvious subgroup in terms of cancer of origin was observed. In the revised manuscript, this figure has also been added as <xref ref-type="fig" rid="fig4s1">Figure 4–figure supplement 1</xref>.<fig id="fig9" position="float"><label>Author response image 2.</label><caption><p>Clustering analysis of all samples based on the genes carrying somatic mutations in their coding regions.</p></caption><graphic xlink:href="elife02725f009"/></fig></p><p>The figure also shows that there is no distinct difference between primary tumors and data generated on the primary cell cultures. As mentioned in the revised manuscript, we specifically chose to use primary tumor cultures of low passage rather than tumor cell lines, because the latter have been subject to much more selective pressure as primary tumor cultures of low passage. In addition to the above cluster analysis, we also performed pathway analysis on the 10 primary tumor tissues only. Pathway analyses of all genes affected by a somatic indel using IPA<sup>®</sup> revealed that the “<italic>Role of BRCA1 in DNA damage response</italic>” and “<italic>DNA double-strand break (DSB) repair by Homologous Recombination (HR)</italic>” were the top enriched pathways (<italic>P</italic>=6.5E-03 and <italic>P</italic>=1.1E −02, respectively). IPA<sup>®</sup> analysis of genes affected by recurrent indels revealed that the “<italic>Role of BRCA1 in DNA damage response</italic>” was also enriched (p = 2.0E-03). Pathway analyses of all indels in MMR-deficient tumors using GenomeMuSiC revealed that the “<italic>ATR/BRCA pathway</italic>”, “<italic>Homologous recombination repair</italic>” and “<italic>DNA repair</italic>” pathways were ranked highest in BioCarta, DNARepairDB and Reactome databases respectively (<italic>P</italic>=1.0E -09, <italic>P</italic>=0.4E-02and <italic>P=</italic>3.4E-06, respectively). The results derived only from the primary tumors suggest that MMR-deficient tumors are indeed enriched in indels affecting the DSB repair pathway. Data are thus highly concordant with the results shown in the manuscript.</p><p><italic>3) The similarity between germline and somatic mutation spectra might, in part be caused by many somatic mutations occurring in normal progenitors prior to loss of MMR. Is there a way of investigating this in comparison with non-MMR tumors (e.g</italic>. <italic>examining effects of age)?</italic></p><p>We thank the reviewer for this insightful hypothesis. However, we found no correlation between age at diagnosis and the number of mutations detectable (<italic>P</italic>=0.86). Moreover, although the age at diagnosis of patients with MMR-proficient and MMR-deficient tumors was very comparable (67 and 62 years respectively), MMR-deficient tumors carried &gt;55 times more mutations than MMR-proficient tumors. When compared to MMR- deficient tumors, MMR-proficient somatic mutations thus comprise at most only 2% of all mutations in MMR-deficient tumors, a fraction that is unlikely to contribute significantly to the similarity in patterns between MMR -deficient and germline mutation patterns.</p><p>Finally, as described in the original manuscript (<xref ref-type="fig" rid="fig2">Figure 2A</xref> in the original manuscript), no extensive similarity was noted between MMR-proficient and germline mutation patterns. Consequently, even if somatic mutations (as reflected in MMR-proficient mutations) would contribute significantly to MMR-deficient mutation patterns, they would not display extensive similarity to germline variation patterns and could therefore not be responsible for the patterns observed in MMR - deficient tumors. In the revised manuscript, we have indicated this.</p><p><italic>4) The 59-marker exomic MSI panel is useful, especially for Lynch syndrome, and appears to perform well</italic>.</p><p>We are happy to read that the reviewers appreciate our work.</p><p><italic>5) The pathway analysis presumably incorporated all detected variants. Whilst strongly suggestive, does filtering for variants with strong evidence of functionality alter these conclusions? Moreover, the burden of mutations in these pathways might relate to redundant function – noting that MSI + cancers are usually near-diploid - rather than positive selection</italic>. <italic>This may or may not matter for therapeutic purposes, but can it be checked in some way?</italic></p><p>We apologize for not more clearly explaining our pathway analyses in the original manuscript. We described two types of pathway analyses: the first involved somatic frameshift indels in exons, the second involved somatic indels both in exons and exon/intron boundaries. We thus already restricted the presented pathway analyses to variants with strong evidence of functionality. Indeed, in the first analysis each of the selected somatic indels in exons already represented an out-of-frame mutation, thus conferring a heterozygous loss-of function mutation on the gene affected in the tumor.</p><p>In an effort to further enrich for mutations with a functional effect in the tumor, we additionally restricted our pathway analyses to genes expressed in endometrial or colorectal normal tissue. RNA-sequencing data generated on normal endometrium and colon tissue were downloaded from TCGA (<xref ref-type="bibr" rid="bib47">Pinol et al., 2005</xref>; <xref ref-type="bibr" rid="bib40">Ng et al., 2010</xref>). For both EM and CRC datasets, we calculated the mean normalized read count for each gene in 12 normal endometrial samples and 40 normal colorectal samples respectively. Transcripts with over 10 reads per kb and per million reads were considered expressed. This resulted in 12,851 and 12,293 genes that were expressed in endometrial and colorectal tissues respectively. We then limited the pathway analyses to indels affecting genes expressed either in normal endometrium or in normal colon tissue. Pathway analysis using IPA<sup>®</sup> of 2,126 expressed genes affected by a somatic indel ranked the “<italic>Role of BRCA1 in DNA damage response</italic>” as the top enriched pathway. IPA<sup>®</sup> analysis of 851 expressed genes affected by recurrent indels revealed that the “<italic>Double-strand break repair by homologous recombination</italic>” pathway ranked top. GenomeMuSiC ranked “<italic>ATR/BRCA pathway</italic>”, “<italic>DNA repair</italic>” and “<italic>Homologous recombination</italic>” pathways as the top pathways for BioCarta, Reactome and DNARepair DB respectively. By restricting ourselves to frameshift indels affecting genes that are expressed in endometrial tissue, similar results were thus obtained.</p><p>In order not to burden the reader with too many pathway analyses, we have chosen not to present these data in the revised manuscript. Furthermore, since the response to this comment will be published online, critical readers will be able to appreciate in detail that the outcome was not altered by removing genes that are affected by indels but not expressed in the normal corresponding tissue.</p><p><bold>References</bold></p><p>1. Zook, J. M. et al. Integrating human sequence data sets provides a resource of benchmark SNP and indel genotype calls. Nat Biotechnol 32, 246‐251, <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nbt.2835">doi:10.1038/nbt.2835</ext-link> (2014).</p><p>2. Jia, P. et al. Consensus rules in variant detection from next-‐generation sequencing data. PLoS One 7, e38470, <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pone.0038470">doi:10.1371/journal.pone.0038470</ext-link> (2012).</p><p>3. O'Rawe, J. et al. Low concordance of multiple variant-‐calling pipelines: practical implications for exome and genome sequencing. Genome medicine 5, 28, <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1186/gm432">doi:10.1186/gm432</ext-link> (2013).</p><p>4. TCGA. Comprehensive molecular characterization of human colon and rectal cancer. Nature 487, 330‐337, <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature11252">doi:10.1038/nature11252</ext-link> (2012).</p><p>5. Cancer Genome Atlas Research, N. et al. Integrated genomic characterization of endometrial carcinoma. Nature 497, 67‐73, <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature12113">doi:10.1038/nature12113</ext-link> (2013).</p></body></sub-article></article>