In [1]:
using CSV, DataFrames, BioSequences, FASTX, Plots, Plots.Measures, Serialization

In [2]:
bases = Dict('A'=>1, 'C'=>2, 'G'=>3, 'T'=>4, 'U'=>4)
aas = Dict('A'=>1, 'C'=>2, 'D'=>3, 'E'=>4, 'F'=>5, 'G'=>6, 'H'=>7, 'I'=>8, 'K'=>9, 'L'=>10, 'M'=>11, 'N'=>12,
     'P'=>13, 'Q'=>14, 'R'=>15, 'S'=>16, 'T'=>17, 'V'=>18, 'W'=>19, 'Y'=>20)

aligned_motifs = ["P1","L1","S1","P2","L2","S2","SS","P1i","L1i","S1i","P2i","L2i","S2i","SSi"]

species = Dict("phylloglossum" => "Phylloglossum drummondii", "phlegmariurus" => "Phlegmariurus squarrosus", "huperzia" => "Huperzia serrata")
genes = ["cob", "nad1", "nad5", "rpl2"]

4-element Vector{String}:
 "cob"
 "nad1"
 "nad5"
 "rpl2"

In [3]:
scoring_tables = deserialize("scoring_tables_5L.dict")

max_abs_val = 0.0
for (motif, st) in scoring_tables
    max_abs_val = max(max_abs_val, maximum(st))
    max_abs_val = max(max_abs_val, -minimum(st))
end

function match_colour(motif::AbstractString, fifth::Char, last::Char, base::Char)
    score = scoring_tables[motif][(aas[fifth] - 1) * 20 + aas[last], bases[base]]
    cgrad(:bam)[0.5 + score/max_abs_val]
end


match_colour (generic function with 1 method)

In [4]:
motif_colours = Dict("P1" => :salmon, "L1" => :lightgoldenrod, "S1" => :darkseagreen2, "P2" => :plum3, "L2" => :lemonchiffon, "S2" => :slategray3)

Dict{String, Symbol} with 6 entries:
  "L1" => :lightgoldenrod
  "L2" => :lemonchiffon
  "S1" => :darkseagreen2
  "S2" => :slategray3
  "P2" => :plum3
  "P1" => :salmon

In [5]:
function read_motifs(species::String, gene::String)
    infile = joinpath(species * "_KP", join([species, "dywkp", gene, "motifs"], "_") * ".txt")
    if !isfile(infile); return nothing; end
    motifs = CSV.File(infile, header=["protein", "start", "end", "score", "sequence", "second", "fifth", "last", "motif"]) |> DataFrame
    #motifs.motif = replace.(motifs.motif, "i" => "")
    motifs.second = getindex.(motifs.second, 1)
    motifs.fifth = getindex.(motifs.fifth, 1)
    motifs.last = getindex.(motifs.last, 1)
    motifs.protein = first.(split.(motifs.protein, "."))
    select(motifs, Not(:sequence))
    filter!(x->x.motif ∈ aligned_motifs, motifs)
end

function read_target(species::String, gene::String)
    targetfile = joinpath(species * "_KP", join([species, "dywkp", gene, "target"], "_") * ".fasta")
    if !isfile(targetfile); return nothing; end
    rna = FASTA.Reader(open(targetfile)) do infile
        rec = first(infile)
        string(convert(LongRNA{2}, sequence(LongDNA{4}, rec)))
    end
end

# define a function that returns a Plots.Shape
rectangle(w, h, x, y) = Shape(x .+ [0,w,w,0], y .+ [0,0,h,h])

rectangle (generic function with 1 method)

In [27]:
for gene in genes
    layout = (3, 1)
    motifs = Dict{String, DataFrame}()
    for spec in keys(species)
        df = read_motifs(spec, gene)
        isnothing(df) && continue
        motifs[spec] = df
    end
    maxmotifs = maximum(nrow.(values(motifs)))
    subplots = Any[]
    for spec in ["phylloglossum", "phlegmariurus", "huperzia"] 
        target = read_target(spec, gene)
        isnothing(target) && continue
        target = target[end - maxmotifs - 3:end - 4]
        push!(subplots, plot(legend=false, axis=:false, left_margin = 60mm))
        for i in maxmotifs:-1:1
            offset = maxmotifs - nrow(motifs[spec])
            motif = i - offset >= 1 ? motifs[spec][i - offset, :] : nothing
            plot!(rectangle(2,1,2 * i - 1,1), color = isnothing(motif) ? :silver : motif_colours[motif.motif])
            if !isnothing(motif)
                annotate!(2 * i, 1.5, Plots.text(join([motif.fifth, motif.last]), :black, :center))
            end
            plot!(rectangle(2,1,2 * i - 1,0), color = isnothing(motif) ? :silver : match_colour(motif.motif, motif.fifth, motif.last, target[i]), alpha=0.8)
            annotate!(2 * i, 0.5, Plots.text(target[i], :black, :center))
        end
        annotate!(-0.5, 1, Plots.text(species[spec], "Helvetica Oblique", :black, :right))
    end
    plot(subplots..., size=(800,300), layout = layout)
    savefig(gene * ".svg")
end

In [None]:
l = @layout [a{0.95w} b]
cmap = cgrad(:bam)
heatmap(rand(2,2), clims=(0,1), framestyle=:none, c=cmap, cbar=true, lims=(-1,0))

In [None]:
savefig("cbar.svg")