diff --git a/NEWS.md b/NEWS.md index 16d479da..9dbd9d61 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,18 @@ ## MIToS.jl Release Notes +### Changes from v2.8.1 to v2.8.5 + +* Fix bugs when concatenating concatenated MSAs using `hcat`. + +### Changes from v2.8.1 to v2.8.4 + +* Ensure that `gaussdca` use the correct project file. + ### Changes from v2.8.1 to v2.8.3 * Increase `PairwiseListMatrices` required version. -* Fix bug when concatenating concatenated MSAs using `hcat`. +* Fix bugs when concatenating concatenated MSAs using `hcat`. ### Changes from v2.8.0 to v2.8.1 diff --git a/Project.toml b/Project.toml index d0457e5e..b8202c9a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "MIToS" uuid = "51bafb47-8a16-5ded-8b04-24ef4eede0b5" -version = "2.8.4" +version = "2.8.5" [deps] ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" diff --git a/src/MSA/Concatenation.jl b/src/MSA/Concatenation.jl index 99108d3d..e120c5ad 100644 --- a/src/MSA/Concatenation.jl +++ b/src/MSA/Concatenation.jl @@ -62,11 +62,14 @@ function _concatenate_annotfile(data::Annotations...) annotfile end -function _get_seqname_mapping(concatenated_seqnames) +function _get_seqname_mapping(concatenated_seqnames, msas...) mapping = Dict{Tuple{Int, String}, String}() - for concatenated_seqname in concatenated_seqnames - for (i, seqname) in enumerate(split(concatenated_seqname, "_&_")) - mapping[(i, seqname)] = concatenated_seqname + seq_names = hcat([sequencenames(msa) for msa in msas]...) + nseq, nmsa = size(seq_names) + @assert nseq == length(concatenated_seqnames) + for i in 1:nseq + for j in 1:nmsa + mapping[(j, seq_names[i, j])] = concatenated_seqnames[i] end end mapping @@ -156,7 +159,7 @@ function Base.hcat(msa::T...) where T <: AnnotatedAlignedObject colnames = _concatenated_col_names(msa...) setnames!(concatenated_msa, seqnames, 1) setnames!(concatenated_msa, colnames, 2) - seqname_mapping = _get_seqname_mapping(seqnames) + seqname_mapping = _get_seqname_mapping(seqnames, msa...) seq_lengths = _get_seq_lengths(msa...) old_annot = annotations.([msa...]) new_annot = Annotations( diff --git a/test/MSA/Concatenation.jl b/test/MSA/Concatenation.jl index cc27f9dd..c6fe0039 100644 --- a/test/MSA/Concatenation.jl +++ b/test/MSA/Concatenation.jl @@ -91,9 +91,12 @@ @testset "Inception" begin concatenated_in = hcat(msa, msa_2) + concatenated_diff_a = hcat(msa[[2, 1], :], msa_2) + concatenated_diff_b = hcat(msa_2, msa[[2, 1], :]) @testset "concatenated concatenated" begin concatenated_out = hcat(concatenated_in, concatenated_in) + concat_ab = hcat(concatenated_diff_a, concatenated_diff_b) @test size(concatenated_out) == (2, 8) @test sequencenames(concatenated_out) == ["ONE", "TWO"] @@ -111,6 +114,27 @@ @test getannotresidue(concatenated_out, "TWO", "OnlyTWO") == "yyyyyyyy" @test getannotcolumn(concatenated_out, "example") == " HE HE" @test gethcatmapping(concatenated_out) == [1, 1, 2, 2, 3, 3, 4, 4] + + @test size(concat_ab) == (2, 8) + @test sequencenames(concat_ab) == [ + "TWO_&_ONE_&_ONE_&_TWO", "ONE_&_TWO_&_TWO_&_ONE"] + @test columnnames(concat_ab) == [ + "1_1", "1_2", "2_1", "2_2", "3_1", "3_2", "4_1", "4_2"] + @test getcolumnmapping(concat_ab) == [1, 2, 1, 2, 1, 2, 1, 2] + @test getsequencemapping(concat_ab, + "TWO_&_ONE_&_ONE_&_TWO") == [1, 2, 1, 2, 1, 2, 1, 2] + @test getsequencemapping(concat_ab, + "ONE_&_TWO_&_TWO_&_ONE") == [1, 2, 1, 2, 1, 2, 1, 2] + @test getannotresidue(concat_ab, + "TWO_&_ONE_&_ONE_&_TWO", "example") == "cdababcd" + @test getannotresidue(concat_ab, + "ONE_&_TWO_&_TWO_&_ONE", "example") == "abcdcdab" + @test getannotresidue(concat_ab, + "TWO_&_ONE_&_ONE_&_TWO", "OnlyONE") == " xxxx " + @test getannotresidue(concat_ab, + "TWO_&_ONE_&_ONE_&_TWO", "OnlyTWO") == "yy yy" + @test getannotcolumn(concat_ab, "example") == " HEHE " + @test gethcatmapping(concat_ab) == [1, 1, 2, 2, 3, 3, 4, 4] end @testset "concatenated non_concatenated" begin @@ -138,6 +162,25 @@ end @test gethcatmapping(concatenated_out) == [1, 1, 2, 2, 3, 3] end + + concat_a = hcat(concatenated_diff_a, msa) + + @test size(concat_a) == (2, 6) + @test sequencenames(concat_a) == [ + "TWO_&_ONE_&_ONE", "ONE_&_TWO_&_TWO"] + @test columnnames(concat_a) == [ + "1_1", "1_2", "2_1", "2_2", "3_1", "3_2"] + @test getcolumnmapping(concat_a) == [1, 2, 1, 2, 1, 2] + @test getsequencemapping(concat_a, "TWO_&_ONE_&_ONE") == [1, 2, 1, 2, 1, 2] + @test getsequencemapping(concat_a, "ONE_&_TWO_&_TWO") == [1, 2, 1, 2, 1, 2] + @test getannotresidue(concat_a, "TWO_&_ONE_&_ONE", "example") == "cdabab" + @test getannotresidue(concat_a, "ONE_&_TWO_&_TWO", "example") == "abcdcd" + @test getannotresidue(concat_a, + "TWO_&_ONE_&_ONE", "OnlyONE") == " xxxx" + @test getannotresidue(concat_a, + "TWO_&_ONE_&_ONE", "OnlyTWO") == "yy " + @test getannotcolumn(concat_a, "example") == " HE " + @test gethcatmapping(concat_a) == [1, 1, 2, 2, 3, 3] end end end \ No newline at end of file