-
Notifications
You must be signed in to change notification settings - Fork 0
/
introduction.html
295 lines (254 loc) · 18.4 KB
/
introduction.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>2 Introduction | Statistical Analysis of Microarray data</title>
<meta name="description" content="2 Introduction | Statistical Analysis of Microarray data" />
<meta name="generator" content="bookdown 0.17.2 and GitBook 2.6.7" />
<meta property="og:title" content="2 Introduction | Statistical Analysis of Microarray data" />
<meta property="og:type" content="book" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:title" content="2 Introduction | Statistical Analysis of Microarray data" />
<meta name="author" content="Based on Gonzalo, Ricardo and Sanchez-Pla, Alex (2019)" />
<meta name="date" content="2020-03-29" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black" />
<link rel="prev" href="summaryabstract.html"/>
<link rel="next" href="materials.html"/>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
{ position: relative; left: -4em; }
pre.numberSource a.sourceLine::before
{ content: attr(data-line-number);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; pointer-events: all; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
</head>
<body>
<div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
<div class="book-summary">
<nav role="navigation">
<ul class="summary">
<li class="chapter" data-level="1" data-path="summaryabstract.html"><a href="summaryabstract.html"><i class="fa fa-check"></i><b>1</b> Summary/Abstract</a><ul>
<li class="chapter" data-level="1.1" data-path="summaryabstract.html"><a href="summaryabstract.html#key-words"><i class="fa fa-check"></i><b>1.1</b> Key Words</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="introduction.html"><a href="introduction.html"><i class="fa fa-check"></i><b>2</b> Introduction</a></li>
<li class="chapter" data-level="3" data-path="materials.html"><a href="materials.html"><i class="fa fa-check"></i><b>3</b> Materials</a><ul>
<li class="chapter" data-level="3.1" data-path="materials.html"><a href="materials.html#software"><i class="fa fa-check"></i><b>3.1</b> Software</a></li>
<li class="chapter" data-level="3.2" data-path="materials.html"><a href="materials.html#data"><i class="fa fa-check"></i><b>3.2</b> Data</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="methods.html"><a href="methods.html"><i class="fa fa-check"></i><b>4</b> Methods</a><ul>
<li class="chapter" data-level="4.1" data-path="methods.html"><a href="methods.html#environment-preparation"><i class="fa fa-check"></i><b>4.1</b> Environment preparation:</a></li>
<li class="chapter" data-level="4.2" data-path="methods.html"><a href="methods.html#prepare-the-data-for-the-analysis"><i class="fa fa-check"></i><b>4.2</b> Prepare the data for the analysis</a></li>
<li class="chapter" data-level="4.3" data-path="methods.html"><a href="methods.html#packages-installation-in-r"><i class="fa fa-check"></i><b>4.3</b> Packages installation in R</a></li>
<li class="chapter" data-level="4.4" data-path="methods.html"><a href="methods.html#read-the-cel-files"><i class="fa fa-check"></i><b>4.4</b> Read the CEL files</a></li>
<li class="chapter" data-level="4.5" data-path="methods.html"><a href="methods.html#quality-control-of-raw-data"><i class="fa fa-check"></i><b>4.5</b> Quality control of raw data</a></li>
<li class="chapter" data-level="4.6" data-path="methods.html"><a href="methods.html#data-normalization"><i class="fa fa-check"></i><b>4.6</b> Data normalization</a></li>
<li class="chapter" data-level="4.7" data-path="methods.html"><a href="methods.html#quality-control-of-normalized-data"><i class="fa fa-check"></i><b>4.7</b> Quality control of normalized data</a></li>
<li class="chapter" data-level="4.8" data-path="methods.html"><a href="methods.html#batch-detection"><i class="fa fa-check"></i><b>4.8</b> Batch detection</a></li>
<li class="chapter" data-level="4.9" data-path="methods.html"><a href="methods.html#detecting-most-variable-genes"><i class="fa fa-check"></i><b>4.9</b> Detecting most variable genes</a></li>
<li class="chapter" data-level="4.10" data-path="methods.html"><a href="methods.html#filtering-least-variable-genes"><i class="fa fa-check"></i><b>4.10</b> Filtering least variable genes</a></li>
<li class="chapter" data-level="4.11" data-path="methods.html"><a href="methods.html#saving-normalized-and-filtered-data"><i class="fa fa-check"></i><b>4.11</b> Saving normalized and filtered data</a></li>
<li class="chapter" data-level="4.12" data-path="methods.html"><a href="methods.html#defining-the-experimental-setup-the-design-matrix"><i class="fa fa-check"></i><b>4.12</b> Defining the experimental setup: The design matrix</a></li>
<li class="chapter" data-level="4.13" data-path="methods.html"><a href="methods.html#defining-comparisons-with-the-contrasts-matrix"><i class="fa fa-check"></i><b>4.13</b> Defining comparisons with the Contrasts Matrix</a></li>
<li class="chapter" data-level="4.14" data-path="methods.html"><a href="methods.html#model-estimation-and-gene-selection"><i class="fa fa-check"></i><b>4.14</b> Model estimation and gene selection</a></li>
<li class="chapter" data-level="4.15" data-path="methods.html"><a href="methods.html#obtaining-lists-of-differentially-expressed-genes"><i class="fa fa-check"></i><b>4.15</b> Obtaining lists of differentially expressed genes</a></li>
<li class="chapter" data-level="4.16" data-path="methods.html"><a href="methods.html#gene-annotation"><i class="fa fa-check"></i><b>4.16</b> Gene Annotation</a></li>
<li class="chapter" data-level="4.17" data-path="methods.html"><a href="methods.html#visualizing-differential-expression"><i class="fa fa-check"></i><b>4.17</b> Visualizing differential expression</a></li>
<li class="chapter" data-level="4.18" data-path="methods.html"><a href="methods.html#multiple-comparisons"><i class="fa fa-check"></i><b>4.18</b> Multiple Comparisons</a></li>
<li class="chapter" data-level="4.19" data-path="methods.html"><a href="methods.html#heatmaps"><i class="fa fa-check"></i><b>4.19</b> Heatmaps</a></li>
<li class="chapter" data-level="4.20" data-path="methods.html"><a href="methods.html#biological-significance-of-results"><i class="fa fa-check"></i><b>4.20</b> Biological Significance of results</a></li>
<li class="chapter" data-level="4.21" data-path="methods.html"><a href="methods.html#summary-of-results"><i class="fa fa-check"></i><b>4.21</b> Summary of results</a></li>
</ul></li>
<li class="chapter" data-level="5" data-path="notes.html"><a href="notes.html"><i class="fa fa-check"></i><b>5</b> Notes</a></li>
<li class="chapter" data-level="" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i>References</a></li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Statistical Analysis of Microarray data</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<section class="normal" id="section-">
<div id="introduction" class="section level1">
<h1><span class="header-section-number">2</span> Introduction</h1>
<p>Microarray data analysis is one of the clearest cases where interaction between bioinformatics and statistics has been highly beneficial for both disciplines. Efron <span class="citation">Efron (<a href="#ref-Efron2013">2013</a>)</span> even calls the 21st century as the century of microarrays.</p>
<p>What is generically described as “microarray data analysis” is a process that starts with the design of the experiment intended to answer with one or more biological questions and ends with a tentative answer for these questions. Statistics is involved at every step of this process, for preparing, transforming visualizing or analyzing data. And, of course, every step can be done in different way that use either classical statistics or new methods developed ad-hoc for these often high dimensional problems. The detailed description of these steps is out of the scope of this chapter and the reader is assumed to be familiar with them. It is assumed that the reader is already familiar with microarrays such as they are introduced in <span class="citation">Sánchez-Pla (<a href="#ref-Sanchez-Pla2014">2014</a>)</span> and also with the general ideas of microarray data analysis such as can be found in <span class="citation">Draghici (<a href="#ref-Draghici2012">2012</a>)</span>. In any case, for the sake of completeness basic ideas will be briefly introduced and citations provided the first time they are discussed.</p>
<p>For our objectives we can assume that a microarray dataset is a matrix of continuous values that represent the expressions of a set of genes (one gene per row), in a variety of conditions or samples (one sample per column). See figure <a href="introduction.html#fig:dataset">2.1</a> for an example.</p>
<div class="figure"><span id="fig:dataset"></span>
<img src="figures/Figure1.jpg" alt="A simplified view of a gene expression matrix" />
<p class="caption">
Figure 2.1: A simplified view of a gene expression matrix
</p>
</div>
<p>Note that we have described the row contents as “genes”. Strictly speaking depending on the type of array, each row may correspond to one distinct, but related, entity, a “probeset” or a “transcript”.</p>
<ul>
<li>A transcript describes how the gene has been transcribed into messenger RNA. If transcription was unique there would be a single transcript per gene. However, due to the phenomenon of alternative splicing, <span class="citation">Sánchez-Pla et al. (<a href="#ref-Sanchez-Pla2012">2012</a>)</span>, there may be different transcriptions of the same gene (the associated proteins are called “isoforms”). That there may be multiple transcripts per gene.</li>
<li>A probeset is, as indicated by its name, a set of “probes”, which are designed to map different fragments of a given gene. Altogether it is expected that each set of probes, or probeset, uniquely characterizes one gene. However, given that this characterization is not always possible it may be convenient to have more than one probeset per gene. That is although it is common to exchange the terms “probeset” and “gene”, it is important to be aware that there may be severalprobestes per each gene.</li>
</ul>
<p>In practice, given that either probesets or transcripts map to genes, it is common to describe the array rows as “genes”.</p>
<p>Our main goal is to describe a workflow, a series of ordered steps that takes us from the raw data, the digitized images as produced by the hybridization system, to one or more lists of genes that can be used to help answering a certain biological question.
This can be done in distinct ways. What we present here is an approach that has become very popular along the last decades based on analyzing the data from the images to the lists of genes, using the R Statistical language and some of the packages developed specifically for this in the Bioconductor project.</p>
<p>A summary of the process can be found in figure <a href="introduction.html#fig:MDAProcess">2.2</a>.</p>
<div class="figure"><span id="fig:MDAProcess"></span>
<img src="figures/Figure2.png" alt="The microarray data analysis process" width="500" />
<p class="caption">
Figure 2.2: The microarray data analysis process
</p>
</div>
</div>
<h3>References</h3>
<div id="refs" class="references">
<div id="ref-Draghici2012">
<p>Draghici, Sorin. 2012. <em>Statistics and data analysis for microarrays using R and Bioconductor</em>. CRC Press. <a href="https://www.crcpress.com/Statistics-and-Data-Analysis-for-Microarrays-Using-R-and-Bioconductor/Draghici/p/book/9781439809754">https://www.crcpress.com/Statistics-and-Data-Analysis-for-Microarrays-Using-R-and-Bioconductor/Draghici/p/book/9781439809754</a>.</p>
</div>
<div id="ref-Efron2013">
<p>Efron, Bradley. 2013. <em>Large-scale inference : empirical Bayes methods for estimation, testing, and prediction</em>. Cambridge University Press. <a href="http://admin.cambridge.org/academic/subjects/statistics-probability/statistical-theory-and-methods/large-scale-inference-empirical-bayes-methods-estimation-testing-and-prediction">http://admin.cambridge.org/academic/subjects/statistics-probability/statistical-theory-and-methods/large-scale-inference-empirical-bayes-methods-estimation-testing-and-prediction</a>.</p>
</div>
<div id="ref-Sanchez-Pla2014">
<p>Sánchez-Pla, Alex. 2014. “DNA Microarrays Technology: Overview and Current Status.” In, edited by Alejandro Cifuentes Carolina Simó and Virginia García-Cañas, Volume 63:1–23. Fundamentals of Advanced Omics Technologies: From Genes to Metabolites. Elsevier. <a href="http://www.sciencedirect.com/science/article/pii/B9780444626516000015">http://www.sciencedirect.com/science/article/pii/B9780444626516000015</a>.</p>
</div>
<div id="ref-Sanchez-Pla2012">
<p>Sánchez-Pla, Alex, Ferran Reverter, M. Carme Ruíz de Villa, and Manuel Comabella. 2012. “Transcriptomics: mRNA and alternative splicing.” <em>Journal of Neuroimmunology</em> 248 (1-2): 23–31. <a href="https://doi.org/10.1016/j.jneuroim.2012.04.008">https://doi.org/10.1016/j.jneuroim.2012.04.008</a>.</p>
</div>
</div>
</section>
</div>
</div>
</div>
<a href="summaryabstract.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="materials.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
</div>
</div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"toc": {
"collapse": "subsection"
},
"search": false
});
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
var src = "true";
if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
if (location.protocol !== "file:")
if (/^https?:/.test(src))
src = src.replace(/^https?:/, '');
script.src = src;
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>