-
Notifications
You must be signed in to change notification settings - Fork 42
/
cigar-utils.go
94 lines (82 loc) · 3.02 KB
/
cigar-utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// elPrep: a high-performance tool for preparing SAM/BAM files.
// Copyright (c) 2017, 2018 imec vzw.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version, and Additional Terms
// (see below).
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public
// License and Additional Terms along with this program. If not, see
// <https://github.com/ExaScience/elprep/blob/master/LICENSE.txt>.
package filters
import (
"log"
"github.com/exascience/elprep/v4/sam"
)
var (
cigarConsumesReadBases = map[byte]int32{'M': 1, 'I': 1, 'S': 1, '=': 1, 'X': 1}
cigarConsumesReferenceBases = map[byte]int32{'M': 1, 'D': 1, 'N': 1, '=': 1, 'X': 1}
)
// Sums the lengths of all CIGAR operations that consume reference
// bases.
func end(aln *sam.Alignment, cigars []sam.CigarOperation) int32 {
var length int32
for _, op := range cigars {
length += cigarConsumesReferenceBases[op.Operation] * op.Length
}
return aln.POS + length - 1
}
var (
operatorConsumesReadBases = map[byte]bool{'M': true, 'I': true, 'S': true, '=': true, 'X': true}
operatorConsumesReferenceBases = map[byte]bool{'M': true, 'D': true, 'N': true, '=': true, 'X': true}
)
// Sums the lengths of all CIGAR operations that consume read bases.
func readLengthFromCigar(cigars []sam.CigarOperation) int32 {
var length int32
for _, op := range cigars {
length += cigarConsumesReadBases[op.Operation] * op.Length
}
return length
}
func elementStradlessClippedRead(newCigar []sam.CigarOperation, operator byte, relativeClippingPosition, clippedBases int32) []sam.CigarOperation {
if operatorConsumesReadBases[operator] {
if operatorConsumesReferenceBases[operator] {
if relativeClippingPosition > 0 {
newCigar = append(newCigar, sam.CigarOperation{
Length: relativeClippingPosition,
Operation: operator,
})
}
} else {
clippedBases += relativeClippingPosition
}
} else if relativeClippingPosition != 0 {
log.Fatal("Unexpected non-0 relative clipping position in CleanSam.")
}
return append(newCigar, sam.CigarOperation{
Length: clippedBases,
Operation: 'S',
})
}
func softClipEndOfRead(clipFrom int32, cigars []sam.CigarOperation) []sam.CigarOperation {
var pos int32
clipFrom--
var newCigar []sam.CigarOperation
for _, op := range cigars {
endPos := pos
endPos += cigarConsumesReadBases[op.Operation] * op.Length
if endPos < clipFrom {
newCigar = append(newCigar, op)
} else {
clippedBases := readLengthFromCigar(cigars) + clipFrom
newCigar = elementStradlessClippedRead(newCigar, op.Operation, clipFrom-pos, clippedBases)
break
}
pos += endPos
}
return newCigar
}