Skip to content

Commit

Permalink
Modified Parsimony / number of steps
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Nov 13, 2020
1 parent c525fd7 commit 29e8076
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 52 deletions.
28 changes: 23 additions & 5 deletions acr/acr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,14 @@ func TestACRDELTRANParsimony(t *testing.T) {
}

// Test UPPASS
err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices)
nsteps := 0
nsteps, err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices)
if err != nil {
t.Error(err)
}
if nsteps != 4 {
t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4))
}
testCheckStates(t, 2, ni, "t1", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t2", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t3", states, stateIndices, "B")
Expand Down Expand Up @@ -172,10 +176,15 @@ func TestACRACCTRANParsimony(t *testing.T) {
}

// Test UPPASS
err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices)
nsteps := 0
nsteps, err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices)
if err != nil {
t.Error(err)
}
if nsteps != 4 {
t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4))
}

testCheckStates(t, 2, ni, "t1", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t2", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t3", states, stateIndices, "B")
Expand Down Expand Up @@ -255,11 +264,14 @@ func TestALLDELTRAN(t *testing.T) {
t.Error(err)
}

statemap, err := ParsimonyAcr(tr, tipstates, ALGO_DELTRAN, false)
statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_DELTRAN, false)

if err != nil {
t.Error(err)
}
if nsteps != 4 {
t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4))
}
testCheckMap(t, "t6", statemap, "A")
testCheckMap(t, "t7", statemap, "A")
testCheckMap(t, "t11", statemap, "A")
Expand Down Expand Up @@ -303,10 +315,13 @@ func TestALLDOWNPASS(t *testing.T) {
t.Error(err)
}

statemap, err := ParsimonyAcr(tr, tipstates, ALGO_DOWNPASS, false)
statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_DOWNPASS, false)
if err != nil {
t.Error(err)
}
if nsteps != 4 {
t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4))
}
testCheckMap(t, "t6", statemap, "A", "B")
testCheckMap(t, "t7", statemap, "A", "B")
testCheckMap(t, "t11", statemap, "A", "B")
Expand Down Expand Up @@ -351,10 +366,13 @@ func TestALLACCTRAN(t *testing.T) {
t.Error(err)
}

statemap, err := ParsimonyAcr(tr, tipstates, ALGO_ACCTRAN, false)
statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_ACCTRAN, false)
if err != nil {
t.Error(err)
}
if nsteps != 4 {
t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4))
}
testCheckMap(t, "t6", statemap, "B")
testCheckMap(t, "t7", statemap, "B")
testCheckMap(t, "t11", statemap, "A")
Expand Down
61 changes: 39 additions & 22 deletions acr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package acr

import (
"bytes"
"errors"
"fmt"
"math/rand"
"sort"
Expand All @@ -29,9 +28,9 @@ const (
// If ALGO_ACCTRAN, then executes UPPASS and ACCTRAN
// Returns a map with the states of all nodes. If a node has a name, key is its name, if a node has no name,
// the key will be its id in the deep first traversal of the tree.
// Also returns the number of parsimony steps
// if randomResolve is true, then in the second pass, each ambiguities will be resolved randomly
func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, randomResolve bool) (map[string]string, error) {
var err error
func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, randomResolve bool) (nametostates map[string]string, nsteps int, err error) {
var nodes []*tree.Node = t.Nodes()
var states []AncestralState = make([]AncestralState, len(nodes)) // Downside states of each node
var upstates []AncestralState = make([]AncestralState, len(nodes)) // Upside states of each node
Expand All @@ -54,9 +53,9 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando
upstates[i] = make(AncestralState, len(alphabet))
}

err = parsimonyUPPASS(t.Root(), nil, tipCharacters, states, stateIndices)
nsteps, err = parsimonyUPPASS(t.Root(), nil, tipCharacters, states, stateIndices)
if err != nil {
return nil, err
return
}

switch algo {
Expand All @@ -70,36 +69,40 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando
case ALGO_NONE:
// No pass after uppass
default:
return nil, fmt.Errorf("Parsimony algorithm %d unkown", algo)
err = fmt.Errorf("Parsimony algorithm %d unkown", algo)
return
}

nametostates := buildInternalNamesToStatesMap(t, states, alphabet)
nametostates = buildInternalNamesToStatesMap(t, states, alphabet)
assignStatesToTree(t, states, alphabet)
return nametostates, nil
return
}

// First step of the parsimony computatation: From tips to root
func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, states []AncestralState, stateIndices map[string]int) error {
func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, states []AncestralState, stateIndices map[string]int) (nsteps int, err error) {
nsteps = 0
tempsteps := 0
// If it is a tip, we initialize the ancestral state using the current
// state in the alignment. If no such tip name exists in the mapping file,
// then returns an error
if cur.Tip() {
state, ok := tipCharacters[cur.Name()]
if !ok {
return errors.New(fmt.Sprintf("Tip %s does not exist in the tip/state mapping file", cur.Name()))
return 0, fmt.Errorf("Tip %s does not exist in the tip/state mapping file", cur.Name())
}
stateindex, ok := stateIndices[state]
if ok {
states[cur.Id()][stateindex] = 1
} else {
return errors.New(fmt.Sprintf("State %s does not exist in the alphabet, ignoring the state", state))
return 0, fmt.Errorf("State %s does not exist in the alphabet, ignoring the state", state)
}
} else {
for _, child := range cur.Neigh() {
if child != prev {
if err := parsimonyUPPASS(child, cur, tipCharacters, states, stateIndices); err != nil {
return err
if tempsteps, err = parsimonyUPPASS(child, cur, tipCharacters, states, stateIndices); err != nil {
return 0, err
}
nsteps += tempsteps
}
}

Expand All @@ -117,9 +120,27 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat
nchild++
}
}
maxState := 0
max := 0.0
for k, c := range states[cur.Id()] {
if c > max {
maxState = k
max = c
}
}
computeParsimony(states[cur.Id()], states[cur.Id()], nchild)
// We keep one of the states having the max occurence
// We sum the number of children that does not have this state
// it will give the additionnal number of parsimony steps
for _, child := range cur.Neigh() {
if child != prev {
if states[child.Id()][maxState] == 0 {
nsteps++
}
}
}
}
return nil
return nsteps, nil
}

// Second step of the parsimony computation: From root to tips
Expand Down Expand Up @@ -194,7 +215,8 @@ func parsimonyDOWNPASS(cur, prev *tree.Node,

// Will set the most parsimonious states in the "currentStates" slice
// using the neighbor states "neighborStates", and the number of neighbors
func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) {
func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) (nsteps int) {

// If intersection of states of children and parent is emtpy:
// then State of cur node == Union of intersection of nodes 2 by 2
// If state is still empty, then state of cur node is union of all states
Expand All @@ -205,20 +227,15 @@ func computeParsimony(neighborStates AncestralState, currentStates AncestralStat
}
}
for k, c := range neighborStates {
if int(max) == nchild && c == max {
if c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
currentStates[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
currentStates[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
currentStates[k] = 1
} else {
// Else we do not take it
currentStates[k] = 0
}
}
return
}

// Third step of the parsimony computation for resolving ambiguities
Expand Down
56 changes: 33 additions & 23 deletions asr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package asr

import (
"bytes"
"errors"
"fmt"
"math/rand"

Expand All @@ -22,8 +21,7 @@ const (
// Computed using parsimony
// Sequences will be located in the comment field of each node
// at the first index
func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) error {
var err error
func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) (nsteps []int, err error) {
var nodes []*tree.Node = t.Nodes()
var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes))
var upseqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) // Upside seqs of each node
Expand All @@ -34,21 +32,23 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool)
charToIndex[c] = i
}

nsteps = make([]int, a.Length())

// We initialize all ancestral sequences
// And sequences at tips
for i, n := range nodes {
n.SetId(i)
if seqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil {
return err
return nil, err
}
if upseqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil {
return err
return nil, err
}
}

err = parsimonyUPPASS(t.Root(), nil, a, seqs, charToIndex)
err = parsimonyUPPASS(t.Root(), nil, a, seqs, nsteps, charToIndex)
if err != nil {
return err
return
}

switch algo {
Expand All @@ -60,36 +60,38 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool)
case ALGO_ACCTRAN:
parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve)
default:
return fmt.Errorf("Parsimony algorithm %d unkown", algo)
err = fmt.Errorf("Parsimony algorithm %d unkown", algo)
return
}

assignSequencesToTree(t, seqs, a.AlphabetCharacters())
return nil
return
}

// First step of the parsimony computatation: From tips to root
func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) error {
func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, nsteps []int, charToIndex map[rune]int) (err error) {
// If it is a tip, we initialize the ancestral sequences using the current
// Sequence in the alignment. If no such sequence exists in the alignment,
// then returns an error
if cur.Tip() {
seq, ok := a.GetSequenceChar(cur.Name())
if !ok {
return errors.New(fmt.Sprintf("Sequence %s does not exist in the alignment", cur.Name()))
err = fmt.Errorf("Sequence %s does not exist in the alignment", cur.Name())
return
}
for j, c := range seq {
charindex, ok := charToIndex[c]
if ok {
seqs[cur.Id()].seq[j].counts[charindex] = 1
} else {
io.LogWarning(errors.New(fmt.Sprintf("Character %c does not exist in the alphabet, ignoring the state", c)))
io.LogWarning(fmt.Errorf("Character %c does not exist in the alphabet, ignoring the state", c))
}
}
} else {
for _, child := range cur.Neigh() {
if child != prev {
if err := parsimonyUPPASS(child, cur, a, seqs, charToIndex); err != nil {
return err
if err = parsimonyUPPASS(child, cur, a, seqs, nsteps, charToIndex); err != nil {
return
}
}
}
Expand All @@ -108,11 +110,25 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS
nchild++
}
}

maxState := 0
max := 0.0
for k, c := range seqs[cur.Id()].seq[j].counts {
if c > max {
maxState = k
max = c
}
}
computeParsimony(ances, ances, nchild)
for _, child := range cur.Neigh() {
if child != prev {
if seqs[child.Id()].seq[j].counts[maxState] == 0 {
nsteps[j]++
}
}
}
}
}
return nil
return
}

// Second step of the parsimony computatation: From root to tips
Expand Down Expand Up @@ -201,15 +217,9 @@ func computeParsimony(neighborStates AncestralState, currentStates AncestralStat
}
}
for k, c := range neighborStates.counts {
if int(max) == nchild && c == max {
if c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
currentStates.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
currentStates.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
currentStates.counts[k] = 1
} else {
// Else we do not take it
currentStates.counts[k] = 0
Expand Down
Loading

0 comments on commit 29e8076

Please sign in to comment.