In [None]:
ENV["LD_LIBRARY_PATH"] = ""

In [None]:
import Pkg
Pkg.activate(".")
Pkg.update()

pkgs = [
"FASTX",
"Graphs",
"MetaGraphs",
]
Pkg.add(pkgs)
Pkg.develop(url="https://github.com/cjprybol/Mycelia.git")
for pkg in pkgs
    eval(Meta.parse("import $pkg"))
end
import Mycelia

In [None]:
data_dir = joinpath(dirname(pwd()), "data")

In [None]:
SRR_paths = filter(x -> !occursin(".ipynb_checkpoints", x), readdir(joinpath(data_dir, "SRA"), join=true))
SRR_paths = filter(x -> "trim_galore" in readdir(x), SRR_paths)

In [None]:
ProgressMeter.@showprogress for SRR_path in SRR_paths
    SRR = basename(SRR_path)

    out_dir = joinpath(SRR_path, "megahit")

    trimmed_forward_reads = joinpath(SRR_path, "trim_galore", "$(SRR)_1_val_1.fq.gz")
    trimmed_reverse_reads = joinpath(SRR_path, "trim_galore", "$(SRR)_2_val_2.fq.gz")

    initial_assembled_fasta = "$(out_dir)/final.contigs.fa"
    assembled_fastg = replace(initial_assembled_fasta, ".fa" => ".fastg")

    # read in the assembled fasta file and parse contig identifiers to get final k length
    final_k_lengths = unique([replace(first(split(FASTX.identifier(record), '_')), r"^k" => "") for record in Mycelia.open_fastx(initial_assembled_fasta)])
    @assert length(final_k_lengths) == 1
    final_k_length = parse(Int, first(final_k_lengths))
    if !isfile(assembled_fastg)
        run(pipeline(`megahit_toolkit contig2fastg $(final_k_length) $(initial_assembled_fasta)`, assembled_fastg))
    end

    assembled_gfa = "$(assembled_fastg).gfa"
    if !isfile(assembled_gfa)
        run(`Bandage reduce $(assembled_fastg) $(assembled_gfa)`)
    end

    assembled_fasta = assembled_gfa * ".fna"
    if !isfile(assembled_fasta)
        open(assembled_fasta, "w") do io
            fastx_io = FASTX.FASTA.Writer(io)
            gfa_graph = Mycelia.parse_gfa(assembled_gfa)
            for v in Graphs.vertices(gfa_graph)
                record = FASTX.FASTA.Record(gfa_graph.vprops[v][:identifier], gfa_graph.vprops[v][:sequence])
                write(fastx_io, record)
            end
            close(fastx_io)
        end
    end

    # generate a bandage plot of the assembly graph
    bandage_outfile = "$(assembled_gfa).bandage.jpg"
    if !isfile(bandage_outfile)
        run(`Bandage image $(assembled_gfa) $bandage_outfile`)
    end
end