Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge Dev branch #10

Merged
merged 6 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.2.0"
[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[compat]
AbstractTrees = "0.4.2"
Expand Down
7 changes: 4 additions & 3 deletions src/Taxonomy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ import DataAPI,
DataAPI.All,
DataAPI.Between,
DataAPI.Cols
export CanonicalRank,
export Rank, CanonicalRank, UnCanonicalRank, CanonicalRankSet, CanonicalRanks,
AbstractTaxon, Taxon, UnclassifiedTaxon,
Lineage,
taxid, name, rank, parent, get, children, lca,
print_tree,
taxid, name, rank, parent, get, children, lca, source,
reformat, print_lineage, isdescendant, isancestor,
All, Between, Cols,
From, Until

include("DataAPI.jl")
include("Utils.jl")
include("database.jl")
include("taxon.jl")
include("rank.jl")
include("lineage.jl")
include("lca.jl")

Expand Down
11 changes: 0 additions & 11 deletions src/Utils.jl

This file was deleted.

3 changes: 1 addition & 2 deletions src/lineage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function Lineage(taxon::Taxon)
reverse!(line)
rankline = map(rank, line)
index = Dict{Symbol,Int}()
for crank in CanonicalRank
for crank in CanonicalRanks
position = findfirst(x -> x == crank, rankline)
position === nothing ? continue : index[crank] = position
end
Expand Down Expand Up @@ -153,7 +153,6 @@ Base.show(io::IO, lineage::Lineage) = print_lineage(io, lineage)
isdescendant(descendant::Taxon, ancestor::Taxon)

Return true if the former taxon is a descendant of the latter taxon.
This function is overloaded because native AbstractTrees.isdescendant is too slow
"""
AbstractTrees.isdescendant(descendant::Taxon, ancestor::Taxon) = ancestor in Lineage(descendant)

Expand Down
59 changes: 59 additions & 0 deletions src/rank.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using InteractiveUtils

abstract type Rank end
abstract type CanonicalRank <: Rank end

const CanonicalRanks = [:strain, :subspecies, :species, :genus, :family, :order, :class, :phylum, :kingdom, :superkingdom]

for (i, rank) in enumerate(CanonicalRanks[3:end])
@eval begin
struct $rank <: CanonicalRank end
rank(::$rank) = Symbol($rank)
Base.Integer(::$rank) = $i
end
end

struct strain <: CanonicalRank end
rank(::strain) = :strain
Base.Integer(::strain) = 0

struct subspecies <: CanonicalRank end
rank(::subspecies) = :subspecies
Base.Integer(::subspecies) = 0

Base.Integer(T::Type{<:CanonicalRank}) = Integer(T())

function Rank(s::Symbol)
if s in CanonicalRanks
return @eval $s()
else
return UnCanonicalRank(s)
end
end

const CanonicalRankSet = subtypes(CanonicalRank)

struct UnCanonicalRank <: Rank
rank::Symbol
end

rank(ucr::UnCanonicalRank) = ucr.rank
Base.show(io::IO, r::Rank) = print(io, String(rank(r)))

Base.isless(x1::CanonicalRank, x2::CanonicalRank) = isless(Integer(x1), Integer(x2))
Base.isless(x1::Type{<:CanonicalRank}, x2::Type{<:CanonicalRank}) = isless(x1(), x2())

function Base.isless(x1::AbstractTaxon, x2::CanonicalRank)
r = rank(x1) |> Rank
r isa CanonicalRank && return isless(r, x2)
p = AbstractTrees.parent(x1)
while true
isnothing(x1) && return false
r = rank(x1) |> Rank
r isa CanonicalRank && return isless(Integer(r)-1, Integer(x2))
x1 = p
p = AbstractTrees.parent(x1)
end
end

Base.isless(x1::AbstractTaxon, x2::Type{<:CanonicalRank}) = isless(x1, x2())
64 changes: 30 additions & 34 deletions src/taxon.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
abstract type AbstractTaxon end

struct Taxon <: AbstractTaxon
struct Taxon <: AbstractTaxon
taxid::Int
name::String
rank::Symbol
db::DB
function Taxon(idx::Int, db::DB)
haskey(db.names, idx) || KeyError(idx) |> throw
return new(idx, db)
end

function Taxon(name::String, db::DB)
taxid_canditates = findall(isequal(name), db.names)
length(taxid_canditates) == 0 && error("There is no candidates for ",name)
length(taxid_canditates) == 1 && return new(taxid_canditates |> first, db)
length(taxid_canditates) > 1 && error("There are several candidates for ",name)
end
end

"""
taxid(taxon::Taxon)

Return the taxid of the given `Taxon` object.
"""
taxid(taxon::Taxon) = taxon.taxid
name(taxon::Taxon) = taxon.db.names[taxid(taxon)]
rank(taxon::Taxon) = get(taxon.db.ranks, taxon.taxid, Symbol("no Rank"))

# define Traits
AbstractTrees.ParentLinks(::Type{Taxon}) = StoredParents()
AbstractTrees.ChildIndexing(::Type{Taxon}) = IndexedChildren()
AbstractTrees.NodeType(::Type{Taxon}) = HasNodeType()
AbstractTrees.nodetype(::Type{Taxon}) = Taxon

"""
parent(taxon::Taxon)
AbstractTrees.parent(taxon::Taxon)

Return the `Taxon` object that is the parent of the given `Taxon` object.
"""
Expand All @@ -38,22 +56,9 @@ function AbstractTrees.children(taxon::Taxon)
return children_taxon
end

Base.show(io::IO, taxon::Taxon) = print(io, "$(taxon.taxid) [$(String(taxon.rank))] $(taxon.name)")
Base.show(io::IO, taxon::Taxon) = print(io, "$(taxid(taxon)) [$(rank(taxon))] $(name(taxon))")
AbstractTrees.printnode(io::IO, taxon::Taxon) = print(io, taxon)

function Taxon(taxid::Int, db::DB)
name = db.names[taxid]
rank = get(db.ranks, taxid, Symbol("no rank"))
return Taxon(taxid, name, rank, db)
end

function Taxon(name::String, db::DB)
taxid_canditates = findall(isequal(name), db.names)
length(taxid_canditates) == 0 && error("There is no candidates for ",name)
length(taxid_canditates) == 1 && return Taxon(taxid_canditates[1],db)
length(taxid_canditates) > 1 && error("There are several candidates for ",name)
end

"""
get(db::Taxonomy.DB, taxid::Int, default)

Expand All @@ -80,42 +85,33 @@ function Base.get(db::DB, name::String, default)
end
end

"""
taxid(taxon::Taxon)

Return the taxid of the given `Taxon` object.
"""
taxid(taxon::Taxon) = taxon.taxid

struct UnclassifiedTaxon <:AbstractTaxon
name::String
rank::Symbol
source::Taxon
end

function UnclassifiedTaxon(rank, source)
name = "unclassified " * source.name * " " * String(rank)
UnclassifiedTaxon(name, rank, source)
namae = "unclassified " * name(source) * " " * String(rank)
UnclassifiedTaxon(namae, rank, source)
end

Base.show(io::IO, taxon::UnclassifiedTaxon) = print(io, "Unclassified [$(String(taxon.rank))] $(taxon.name)")
Base.show(io::IO, taxon::UnclassifiedTaxon) = print(io, "Unclassified [$(rank(taxon))] $(taxon.name)")

"""
rank(taxon::AbstractTaxon)

Return the rank of the given `Taxon` object.
It also works for an `UnclassifiedTaxon` object.
"""
function rank(taxon::AbstractTaxon)
taxon.rank
end
rank(taxon::UnclassifiedTaxon) = taxon.rank

"""
name(taxon::AbstractTaxon)

Return the name of the given `Taxon` object.
It also works for an `UnclassifiedTaxon` object.
"""
function name(taxon::AbstractTaxon)
taxon.name
end
name(taxon::UnclassifiedTaxon) = taxon.name

source(taxon::UnclassifiedTaxon) = taxon.source
30 changes: 22 additions & 8 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ db = Taxonomy.DB("db/nodes.dmp", "db/names.dmp")

human = Taxon(9606,db)
@test typeof(human) == Taxon
@test human.name == "Homo sapiens"
@test human.taxid == 9606
@test human.rank == :species
@test taxid(human) == 9606
@test name(human) == "Homo sapiens"
@test rank(human) == :species
@test sprint(io -> show(io, human)) == "9606 [species] Homo sapiens"

@test get(db, 9606, nothing) == human

@test taxid(human) == human.taxid
@test rank(human) == human.rank
@test AbstractTrees.parent(human) == Taxon(9605,db)
@test children(human) == [Taxon(741158,db), Taxon(63221,db)]
denisova = Taxon(741158, db)
Expand All @@ -27,9 +25,9 @@ db = Taxonomy.DB("db/nodes.dmp", "db/names.dmp")

unclassified_human_subspecies = UnclassifiedTaxon(:subspecies, human)
@test typeof(unclassified_human_subspecies) == UnclassifiedTaxon
@test unclassified_human_subspecies.name == "unclassified Homo sapiens subspecies"
@test unclassified_human_subspecies.rank == :subspecies
@test unclassified_human_subspecies.source == human
@test name(unclassified_human_subspecies) == "unclassified Homo sapiens subspecies"
@test rank(unclassified_human_subspecies) == :subspecies
@test source(unclassified_human_subspecies) == human
@test sprint(io -> show(io, unclassified_human_subspecies)) == "Unclassified [subspecies] unclassified Homo sapiens subspecies"

@test rank(unclassified_human_subspecies) == unclassified_human_subspecies.rank
Expand Down Expand Up @@ -61,6 +59,22 @@ end
@test [n for n in Leaves(human)] == [denisova, neanderthalensis]
end

@testset "rank.jl" begin
human = Taxon(9606, db)
denisova = Taxon(741158, db)
homininae = Taxon(314295, db)

@test Rank(:strain) < Rank(:species) < Rank(:genus)
@test human < Rank(:genus)
@test !(human < Rank(:species))
@test denisova < Rank(:species)
@test homininae < Rank(:order)
@test !(homininae < Rank(:species))

unclassified_human_subspecies = UnclassifiedTaxon(:subspecies, human)
@test unclassified_human_subspecies < Rank(:species)
end

@testset "lineage.jl" begin
human = Taxon(9606,db)
lineage = Lineage(human)
Expand Down