Skip to content

Commit

Permalink
Fix hcat of hcat with non-identical names
Browse files Browse the repository at this point in the history
  • Loading branch information
diegozea committed Jun 3, 2021
1 parent 2146ae3 commit f42cf9b
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 7 deletions.
10 changes: 9 additions & 1 deletion NEWS.md
@@ -1,10 +1,18 @@
## MIToS.jl Release Notes

### Changes from v2.8.1 to v2.8.5

* Fix bugs when concatenating concatenated MSAs using `hcat`.

### Changes from v2.8.1 to v2.8.4

* Ensure that `gaussdca` use the correct project file.

### Changes from v2.8.1 to v2.8.3

* Increase `PairwiseListMatrices` required version.

* Fix bug when concatenating concatenated MSAs using `hcat`.
* Fix bugs when concatenating concatenated MSAs using `hcat`.

### Changes from v2.8.0 to v2.8.1

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
@@ -1,6 +1,6 @@
name = "MIToS"
uuid = "51bafb47-8a16-5ded-8b04-24ef4eede0b5"
version = "2.8.4"
version = "2.8.5"

[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Expand Down
13 changes: 8 additions & 5 deletions src/MSA/Concatenation.jl
Expand Up @@ -62,11 +62,14 @@ function _concatenate_annotfile(data::Annotations...)
annotfile
end

function _get_seqname_mapping(concatenated_seqnames)
function _get_seqname_mapping(concatenated_seqnames, msas...)
mapping = Dict{Tuple{Int, String}, String}()
for concatenated_seqname in concatenated_seqnames
for (i, seqname) in enumerate(split(concatenated_seqname, "_&_"))
mapping[(i, seqname)] = concatenated_seqname
seq_names = hcat([sequencenames(msa) for msa in msas]...)
nseq, nmsa = size(seq_names)
@assert nseq == length(concatenated_seqnames)
for i in 1:nseq
for j in 1:nmsa
mapping[(j, seq_names[i, j])] = concatenated_seqnames[i]
end
end
mapping
Expand Down Expand Up @@ -156,7 +159,7 @@ function Base.hcat(msa::T...) where T <: AnnotatedAlignedObject
colnames = _concatenated_col_names(msa...)
setnames!(concatenated_msa, seqnames, 1)
setnames!(concatenated_msa, colnames, 2)
seqname_mapping = _get_seqname_mapping(seqnames)
seqname_mapping = _get_seqname_mapping(seqnames, msa...)
seq_lengths = _get_seq_lengths(msa...)
old_annot = annotations.([msa...])
new_annot = Annotations(
Expand Down
43 changes: 43 additions & 0 deletions test/MSA/Concatenation.jl
Expand Up @@ -91,9 +91,12 @@

@testset "Inception" begin
concatenated_in = hcat(msa, msa_2)
concatenated_diff_a = hcat(msa[[2, 1], :], msa_2)
concatenated_diff_b = hcat(msa_2, msa[[2, 1], :])

@testset "concatenated concatenated" begin
concatenated_out = hcat(concatenated_in, concatenated_in)
concat_ab = hcat(concatenated_diff_a, concatenated_diff_b)

@test size(concatenated_out) == (2, 8)
@test sequencenames(concatenated_out) == ["ONE", "TWO"]
Expand All @@ -111,6 +114,27 @@
@test getannotresidue(concatenated_out, "TWO", "OnlyTWO") == "yyyyyyyy"
@test getannotcolumn(concatenated_out, "example") == " HE HE"
@test gethcatmapping(concatenated_out) == [1, 1, 2, 2, 3, 3, 4, 4]

@test size(concat_ab) == (2, 8)
@test sequencenames(concat_ab) == [
"TWO_&_ONE_&_ONE_&_TWO", "ONE_&_TWO_&_TWO_&_ONE"]
@test columnnames(concat_ab) == [
"1_1", "1_2", "2_1", "2_2", "3_1", "3_2", "4_1", "4_2"]
@test getcolumnmapping(concat_ab) == [1, 2, 1, 2, 1, 2, 1, 2]
@test getsequencemapping(concat_ab,
"TWO_&_ONE_&_ONE_&_TWO") == [1, 2, 1, 2, 1, 2, 1, 2]
@test getsequencemapping(concat_ab,
"ONE_&_TWO_&_TWO_&_ONE") == [1, 2, 1, 2, 1, 2, 1, 2]
@test getannotresidue(concat_ab,
"TWO_&_ONE_&_ONE_&_TWO", "example") == "cdababcd"
@test getannotresidue(concat_ab,
"ONE_&_TWO_&_TWO_&_ONE", "example") == "abcdcdab"
@test getannotresidue(concat_ab,
"TWO_&_ONE_&_ONE_&_TWO", "OnlyONE") == " xxxx "
@test getannotresidue(concat_ab,
"TWO_&_ONE_&_ONE_&_TWO", "OnlyTWO") == "yy yy"
@test getannotcolumn(concat_ab, "example") == " HEHE "
@test gethcatmapping(concat_ab) == [1, 1, 2, 2, 3, 3, 4, 4]
end

@testset "concatenated non_concatenated" begin
Expand Down Expand Up @@ -138,6 +162,25 @@
end
@test gethcatmapping(concatenated_out) == [1, 1, 2, 2, 3, 3]
end

concat_a = hcat(concatenated_diff_a, msa)

@test size(concat_a) == (2, 6)
@test sequencenames(concat_a) == [
"TWO_&_ONE_&_ONE", "ONE_&_TWO_&_TWO"]
@test columnnames(concat_a) == [
"1_1", "1_2", "2_1", "2_2", "3_1", "3_2"]
@test getcolumnmapping(concat_a) == [1, 2, 1, 2, 1, 2]
@test getsequencemapping(concat_a, "TWO_&_ONE_&_ONE") == [1, 2, 1, 2, 1, 2]
@test getsequencemapping(concat_a, "ONE_&_TWO_&_TWO") == [1, 2, 1, 2, 1, 2]
@test getannotresidue(concat_a, "TWO_&_ONE_&_ONE", "example") == "cdabab"
@test getannotresidue(concat_a, "ONE_&_TWO_&_TWO", "example") == "abcdcd"
@test getannotresidue(concat_a,
"TWO_&_ONE_&_ONE", "OnlyONE") == " xxxx"
@test getannotresidue(concat_a,
"TWO_&_ONE_&_ONE", "OnlyTWO") == "yy "
@test getannotcolumn(concat_a, "example") == " HE "
@test gethcatmapping(concat_a) == [1, 1, 2, 2, 3, 3]
end
end
end

2 comments on commit f42cf9b

@diegozea
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/38126

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v2.8.5 -m "<description of version>" f42cf9b115823cb6dc526541d294f1a00e129446
git push origin v2.8.5

Please sign in to comment.