/
utils.go
140 lines (124 loc) · 3.02 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package protein
import (
"fmt"
"math"
"github.com/evolbioinfo/goalign/align"
)
type seqpairdist struct {
i, j int
seq1, seq2 []uint8
seq1Ambigu []bool
seq2Ambigu []bool
}
/* Returns the sites of the alignments that contains only nucleotides and no gaps */
func selectedSites(al align.Alignment, weights []float64, removeGappedPositions bool) (float64, []bool) {
selectedSites := make([]bool, al.Length())
numSites := 0.0
for l := 0; l < al.Length(); l++ {
w := 1.0
if weights != nil {
w = weights[l]
}
selectedSites[l] = true
for i := 0; i < al.NbSequences() && removeGappedPositions; i++ {
seq, _ := al.GetSequenceCharById(i)
if al.AlphabetCharToIndex(seq[l]) == -1 || seq[l] == '*' || seq[l] == '?' || seq[l] == '-' {
selectedSites[l] = false
}
}
if selectedSites[l] {
numSites += w
}
}
return numSites, selectedSites
}
func aaFrequency(a align.Alignment, weights []float64, selected []bool) ([]float64, error) {
var i, j int
var w, sum float64
if a.Alphabet() != align.AMINOACIDS {
return nil, fmt.Errorf("Alphabet is not AminoAcids")
}
ns := len(a.AlphabetCharacters())
var num []float64 = make([]float64, 20)
var freq []float64 = make([]float64, 20)
for i = 0; i < ns; i++ {
freq[i] = 1. / float64(ns)
num[i] = 0.0
}
// Count occurences of different amino acids
a.IterateChar(func(name string, sequence []uint8) bool {
for j = 0; j < len(sequence); j++ {
if selected[j] {
w = weights[j]
idx := a.AlphabetCharToIndex(sequence[j])
if idx >= 0 {
num[idx] += w
} else {
for i = 0; i < ns; i++ {
num[i] = w * freq[i]
}
}
}
}
return false
})
// if at least one frequency equals 0 then add a pseudo-count
// as these are doubles, cannot test equality to 0, then test less than minimum value it can have (1./20)
oneLessThanCutoff := false
for _, v := range num {
if v < 1./float64(ns) {
oneLessThanCutoff = true
break
}
}
for i, v := range num {
if oneLessThanCutoff {
num[i] = v + 1.0
}
sum += num[i]
}
for i, _ := range num {
freq[i] = num[i] / sum
}
return freq, nil
}
func isAmbigu(c uint8) bool {
return (c == align.GAP || c == align.POINT || c == align.OTHER || c == align.ALL_AMINO)
}
func sign(a, b float64) float64 {
if b > .0 {
return math.Abs(a)
} else {
return -math.Abs(a)
}
}
func shift(a, b, c, d *float64) {
(*a) = (*b)
(*b) = (*c)
(*c) = (*d)
}
func checkAmbiguities(pair *seqpairdist, stepsize int) (ambig []bool) {
var j int
pair.seq1Ambigu = make([]bool, len(pair.seq1))
pair.seq2Ambigu = make([]bool, len(pair.seq2))
for j = 0; j < len(pair.seq1); j += stepsize {
pair.seq1Ambigu[j] = false
pair.seq2Ambigu[j] = false
if isAmbigu(pair.seq1[j]) {
pair.seq1Ambigu[j] = true
}
if isAmbigu(pair.seq2[j]) {
pair.seq2Ambigu[j] = true
}
}
return
}
func check2SequencesDiff(pair *seqpairdist) (ret bool) {
var i int
for i = 0; i < len(pair.seq1); i++ {
if (!pair.seq1Ambigu[i] && !pair.seq2Ambigu[i]) && (pair.seq1[i] != pair.seq2[i]) {
return true
}
}
return false
}