forked from bigdatagenomics/adam
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Coverage.scala
136 lines (125 loc) · 4.28 KB
/
Coverage.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.models
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.sql.{ Feature => FeatureProduct }
import org.bdgenomics.formats.avro.Feature
/**
* Singleton object for converting from Avro Feature to Coverage.
*/
private[adam] object Coverage {
/**
* Creates Coverage from ReferenceRegion and coverage count in that ReferenceRegion.
*
* @param region ReferenceRegion in which Coverage spans
* @param count Coverage count for each base pair in region
* @return Coverage spanning the specified ReferenceRegion
*/
def apply(region: ReferenceRegion, count: Double, name: Option[String]): Coverage = {
Coverage(region.referenceName, region.start, region.end, count, name)
}
/**
* Creates Coverage from Feature, extracting region information and feature score for coverage.
*
* @param feature Feature to create coverage from
* @return Coverage spanning the specified feature
*/
def apply(feature: Feature): Coverage = {
require(feature.getContigName != null && feature.getContigName.length > 0,
"Features must have Contig name to convert to Coverage")
require(feature.getStart != null && feature.getEnd != null,
"Features must have valid position data to convert to Coverage")
require(feature.getScore != null,
"Features must have valid score to convert to Coverage")
Coverage(feature.getContigName,
feature.getStart,
feature.getEnd,
feature.getScore,
Option(feature.getName))
}
/**
* Creates an RDD of Coverage from RDD of Features.
*
* @param rdd RDD of Features to extract Coverage from
* @return RDD of Coverage spanning all features in rdd
*/
def apply(rdd: RDD[Feature]): RDD[Coverage] = {
rdd.map(f => Coverage(f))
}
}
/**
* Coverage record for CoverageRDD.
*
* Contains Region indexed by contig name, start and end, as well as the average
* coverage at each base pair in that region.
*
* @param contigName The chromosome that this coverage was observed on.
* @param start The start coordinate of the region where this coverage value was
* observed.
* @param end The end coordinate of the region where this coverage value was
* observed.
* @param count The average coverage across this region.
*/
case class Coverage(contigName: String, start: Long, end: Long, count: Double, name: Option[String] = None) {
/**
* Converts Coverage to Feature, setting Coverage count in the score attribute.
*
* @return Feature built from Coverage
*/
def toFeature: Feature = {
val featureBuilder = Feature.newBuilder()
.setContigName(contigName)
.setStart(start)
.setEnd(end)
.setScore(count)
// set name, if applicable
if (name.isDefined) {
featureBuilder.setName(name.get)
}
featureBuilder.build()
}
/**
* Converts Coverage to a Feature case class, for use with Spark SQL.
*/
def toSqlFeature: FeatureProduct = {
new FeatureProduct(featureId = None,
name = name,
source = None,
featureType = None,
contigName = Some(contigName),
start = Some(start),
end = Some(end),
strand = None,
phase = None,
frame = None,
score = Some(count),
geneId = None,
transcriptId = None,
exonId = None,
aliases = Seq.empty,
parentIds = Seq.empty,
target = None,
gap = None,
derivesFrom = None,
notes = Seq.empty,
dbxrefs = Seq.empty,
ontologyTerms = Seq.empty,
circular = None,
attributes = Map.empty)
}
}