In [1]:
DATE = "2021-05-16"
TASK = "phage-k-pangenome"
DIR = mkpath("$(homedir())/$(DATE)-$(TASK)")

"/Users/cameronprybol/2021-05-16-phage-k-pangenome"

In [2]:
import LightGraphs
import MetaGraphs
import BioSequences

In [3]:
import uCSV
import DataFrames
import FASTX
import HTTP
import CodecZlib
import DataStructures

In [4]:
import Pkg

In [5]:
function add_evidence!(kmer_graph, index::Int, evidence)
    if MetaGraphs.has_prop(kmer_graph, index, :evidence)
        push!(kmer_graph.vprops[index][:evidence], evidence)
    else
        MetaGraphs.set_prop!(kmer_graph, index, :evidence, Set([evidence]))
    end
end

function add_evidence!(kmer_graph, edge::LightGraphs.SimpleGraphs.AbstractSimpleEdge, evidence)
    if MetaGraphs.has_prop(kmer_graph, edge, :evidence)
        push!(kmer_graph.eprops[edge][:evidence], evidence)
    else
        MetaGraphs.set_prop!(kmer_graph, edge, :evidence, Set([evidence]))
    end
end

add_evidence! (generic function with 2 methods)

In [6]:
function count_kmers(::Type{KMER_TYPE}, sequence::BioSequences.LongSequence) where KMER_TYPE
    kmer_counts = DataStructures.OrderedDict{KMER_TYPE, Int}()
    kmer_iterator = (kmer.fw for kmer in BioSequences.each(KMER_TYPE, sequence))
    for kmer in kmer_iterator
        kmer_counts[kmer] = get(kmer_counts, kmer, 0) + 1
    end
    return kmer_counts
end

function count_kmers(::Type{KMER_TYPE}, record::R) where {KMER_TYPE, R <: Union{FASTX.FASTA.Record, FASTX.FASTQ.Record}}
    return count_kmers(KMER_TYPE, FASTX.sequence(record))    
end

function count_kmers(::Type{KMER_TYPE}, sequences) where KMER_TYPE
    joint_kmer_counts = DataStructures.OrderedDict{KMER_TYPE, Int}()
    for sequence in sequences
        sequence_kmer_counts = count_kmers(KMER_TYPE, sequence)
        merge!(+, joint_kmer_counts, sequence_kmer_counts)
    end
    sort!(joint_kmer_counts)
end

count_kmers (generic function with 3 methods)

In [7]:
function get_kmer_index(kmers, kmer)
    index = searchsortedfirst(kmers, kmer)
    @assert kmers[index] == kmer "$kmer"
    return index
end

get_kmer_index (generic function with 1 method)

In [8]:
"""
	function get_sequence(;db=""::String, accession=""::String, ftp=""::String)

Get dna (db = "nuccore") or protein (db = "protein") sequences from NCBI
or get fasta directly from FTP site
"""
function get_sequence(;db=""::String, accession=""::String, ftp=""::String)
    if !isempty(db) && !isempty(accession)
        # API will block if we request more than 3 times per second, so set a 1/2 second sleep to set max of 2 requests per second when looping
        sleep(0.5)
        url = "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?db=$(db)&report=fasta&id=$(accession)"
        return FASTX.FASTA.Reader(IOBuffer(HTTP.get(url).body))
    elseif !isempty(ftp)
        return FASTX.FASTA.Reader(CodecZlib.GzipDecompressorStream(IOBuffer(HTTP.get(ftp).body)))
    else
        @error "invalid call"
    end
end

get_sequence

In [9]:
import Eisenia

┌ Info: Precompiling Eisenia [453d265d-8292-4a7b-a57c-dce3f9ae6acd]
└ @ Base loading.jl:1278
│ - If you have Eisenia checked out for development and have
│   added MetaGraphs as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with Eisenia


In [10]:
function graph_to_gfa(;graph, outfile)
#     outfile = "test.$(k).simplified.gfa"
    open(outfile, "w") do io
        println(io, "H\tVN:Z:1.0")
        for vertex in LightGraphs.vertices(graph)
            sequence = graph.vprops[vertex][:sequence]
            if haskey(graph.vprops, vertex) && haskey(graph.vprops[vertex], :evidence)
                depth = length(graph.vprops[vertex][:evidence])
            else
                depth = 1
            end
            fields = ["S", "$vertex", sequence, "RC:i:$(depth)"]
            line = join(fields, '\t')
            println(io, line)
        end
        for edge in LightGraphs.edges(graph)
            overlap = graph.gprops[:k] - 1
            link = ["L",
                        edge.src,
                        graph.eprops[edge][:orientations].source_orientation ? '+' : '-',
                        edge.dst,
                        graph.eprops[edge][:orientations].destination_orientation ? '+' : '-',
                        "$(overlap)M"]
            line = join(link, '\t')
            println(io, line)
        end
    end
end

graph_to_gfa (generic function with 1 method)

In [11]:
# Git_directory = homedir() * "/" * first(filter(x -> occursin(r"^git$"i, x), readdir(homedir())))
# path = "$(Git_directory)/Eisenia"
# push!(LOAD_PATH, path); Pkg.activate(path); Pkg.update(); Pkg.instantiate(); Pkg.activate()
# import Eisenia

In [12]:
ncbi_staph_phage_metadata = DataFrames.DataFrame(uCSV.read("$(dirname(pwd()))/metadata/ncbi-staph-phage.csv", header=1, quotes='"', typedetectrows=100)...)

Unnamed: 0_level_0,Accession,SRA_Accession,Submitters
Unnamed: 0_level_1,String,String,String
1,NC_048634,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
2,NC_048635,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
3,NC_048636,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
4,NC_048644,,"Escobar-Perez,J., Reyes,N., Marquez-Ortiz,A., Rebollo,J., Pinzon,H., Tovar,C., Moreno,J.C., Corredor,Z.R., Castro,B.C., Moncada,M.G., Vanegas,N.G."
5,NC_048657,,"Sun,Q., Zhang,X., Xing,S., Tong,Y.-G."
6,NC_048658,,"Kim,D."
7,NC_048681,,"O'Brien,F.G., Baines,S.L., Howden,B.P., Coombs,G.W."
8,NC_048710,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
9,NC_048711,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
10,NC_048713,,"Dmitrenko,O.A., Balbutskaya,A., Alkhovsky,S.V."


In [13]:
ncbi_staph_phage_metadata = ncbi_staph_phage_metadata[ncbi_staph_phage_metadata[!, "Nuc_Completeness"] .== "complete", :]

Unnamed: 0_level_0,Accession,SRA_Accession,Submitters
Unnamed: 0_level_1,String,String,String
1,NC_048634,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
2,NC_048635,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
3,NC_048636,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
4,NC_048644,,"Escobar-Perez,J., Reyes,N., Marquez-Ortiz,A., Rebollo,J., Pinzon,H., Tovar,C., Moreno,J.C., Corredor,Z.R., Castro,B.C., Moncada,M.G., Vanegas,N.G."
5,NC_048657,,"Sun,Q., Zhang,X., Xing,S., Tong,Y.-G."
6,NC_048658,,"Kim,D."
7,NC_048681,,"O'Brien,F.G., Baines,S.L., Howden,B.P., Coombs,G.W."
8,NC_048710,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
9,NC_048711,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
10,NC_048713,,"Dmitrenko,O.A., Balbutskaya,A., Alkhovsky,S.V."


In [14]:
ncbi_staph_phage_metadata = ncbi_staph_phage_metadata[ncbi_staph_phage_metadata[!, "Sequence_Type"] .== "RefSeq", :]

Unnamed: 0_level_0,Accession,SRA_Accession,Submitters
Unnamed: 0_level_1,String,String,String
1,NC_048634,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
2,NC_048635,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
3,NC_048636,,"Kraushaar,B., Hammerl,J.A., Kienol,M., Heinig,M.L., Sperling,N., Dinh Thanh,M., Reetz,J., Jackel,C., Fetsch,A., Hertwig,S."
4,NC_048644,,"Escobar-Perez,J., Reyes,N., Marquez-Ortiz,A., Rebollo,J., Pinzon,H., Tovar,C., Moreno,J.C., Corredor,Z.R., Castro,B.C., Moncada,M.G., Vanegas,N.G."
5,NC_048657,,"Sun,Q., Zhang,X., Xing,S., Tong,Y.-G."
6,NC_048658,,"Kim,D."
7,NC_048681,,"O'Brien,F.G., Baines,S.L., Howden,B.P., Coombs,G.W."
8,NC_048710,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
9,NC_048711,,"Dmitrenko,O.A., Tikhomirov,T., Balbutskaya,A., Fedorova,N., Alkhovsky,S.V."
10,NC_048713,,"Dmitrenko,O.A., Balbutskaya,A., Alkhovsky,S.V."


In [15]:
sort!(ncbi_staph_phage_metadata, "Length")

Unnamed: 0_level_0,Accession,SRA_Accession,Submitters
Unnamed: 0_level_1,String,String,String
1,NC_007045,,"Kwan,T., Liu,J., DuBow,M., Gros,P., Pelletier,J."
2,NC_004678,,"Vybiral,D., Takac,M., Loessner,M., Witte,A., von Ahsen,U., Blasi,U., Blaesi,U."
3,NC_048159,,"Cha,Y., Chun,J., Son,B., Ryu,S."
4,NC_047855,,"Kraushaar,B., Dinh Thanh,M., Reetz,J., Fetsch,A., Hammerl,J.A., Hertwig,S., Hammerl,J."
5,NC_047919,,"Gozdek,A., Glowacka-Rutkowska,A., Gawor,J., Empel,J., Gromadka,R., Lobocka,M.B."
6,NC_031046,,"Mandeville,R."
7,NC_048107,,"Bari,S.M.N., Hernandez,A.C., Hatoum-Aslan,A."
8,NC_031008,,"Wang,Z., Zheng,P."
9,NC_023550,,"Swift,S.M., Nelson,D.C."
10,NC_009875,,"Son,J.S., Lee,S.J., Jun,S.Y., Yoon,S.J., Kang,S.H., Paik,H.R., Kang,J.O., Choi,Y.J."


In [16]:
show(ncbi_staph_phage_metadata, allcols=true)

111×21 DataFrame
│ Row │ Accession │ SRA_Accession │
│     │ [90mString[39m    │ [90mString[39m        │
├─────┼───────────┼───────────────┤
│ 1   │ NC_007045 │               │
│ 2   │ NC_004678 │               │
│ 3   │ NC_048159 │               │
│ 4   │ NC_047855 │               │
│ 5   │ NC_047919 │               │
│ 6   │ NC_031046 │               │
│ 7   │ NC_048107 │               │
│ 8   │ NC_031008 │               │
│ 9   │ NC_023550 │               │
│ 10  │ NC_009875 │               │
⋮
│ 101 │ NC_025416 │               │
│ 102 │ NC_023573 │               │
│ 103 │ NC_047728 │               │
│ 104 │ NC_047722 │               │
│ 105 │ NC_027991 │               │
│ 106 │ NC_047727 │               │
│ 107 │ NC_047726 │               │
│ 108 │ NC_005880 │               │
│ 109 │ NC_047725 │               │
│ 110 │ NC_047724 │               │
│ 111 │ NC_047948 │               │

│ Row │ Submitters                                                                              

In [18]:
function find_downstream_vertices(kmer_graph, vertex, orientation)
    viable_neighbors = Int[]
    for neighbor in LightGraphs.neighbors(kmer_graph, vertex)
        not_same_vertex = vertex != neighbor
        candidate_edge = LightGraphs.Edge(vertex, neighbor)
        edge_src_orientation = kmer_graph.eprops[candidate_edge][:orientations].source_orientation
        viable_orientation = edge_src_orientation == orientation
        if not_same_vertex && viable_orientation
            push!(viable_neighbors, neighbor)
        end
    end
    return viable_neighbors
end

find_downstream_vertices (generic function with 1 method)

In [19]:
function find_unbranched_neighbors(kmer_graph, vertex, orientation)
    downstream_vertices = find_downstream_vertices(kmer_graph, vertex, orientation)
    if length(downstream_vertices) == 1
        downstream_vertex = first(downstream_vertices)
        destination_orientation = kmer_graph.eprops[LightGraphs.Edge(vertex, downstream_vertex)][:orientations].destination_orientation
        backtrack_vertices = find_downstream_vertices(kmer_graph, downstream_vertex, !destination_orientation)
        if backtrack_vertices == [vertex]
            return downstream_vertices
        else
            return Int[]
        end
    else
        return Int[]
    end
end

find_unbranched_neighbors (generic function with 1 method)

In [20]:
function oriented_unbranching_walk(kmer_graph, vertex, orientation)
    walk = []
    viable_neighbors = find_unbranched_neighbors(kmer_graph, vertex, orientation)
    while length(viable_neighbors) == 1
#         @show "found a viable neighbor!!"
        viable_neighbor = first(viable_neighbors)
        edge = LightGraphs.Edge(vertex, viable_neighbor)
        push!(walk, edge)
        vertex = edge.dst
        orientation = kmer_graph.eprops[edge][:orientations].destination_orientation
        viable_neighbors = find_unbranched_neighbors(kmer_graph, vertex, orientation)
    end
    return walk
end

oriented_unbranching_walk (generic function with 1 method)

In [17]:
import Primes

In [25]:
for k in Primes.primes(11, 31)
    @show k
    KMER_TYPE = BioSequences.DNAMer{k}

    EDGE_MER = BioSequences.DNAMer{k+1}

    accession = ncbi_staph_phage_metadata[1, "Accession"]
    fastx = collect(get_sequence(db = "nuccore", accession = accession))
    kmers = Set(collect(keys(Eisenia.count_canonical_kmers(KMER_TYPE, fastx))))
    # for accession in ncbi_staph_phage_metadata[2:end, "Accession"]
    #     fastx = collect(get_sequence(db = "nuccore", accession = accession))
    #     kmers = union!(kmers, collect(keys(Eisenia.count_canonical_kmers(KMER_TYPE, fastx))))
    # end
    # for kmer in kmers
    #     push!(kmers, BioSequences.reverse_complement(kmer))
    # end
    kmers = unique(sort(collect(kmers)))

    kmer_graph = MetaGraphs.MetaDiGraph(length(kmers))
    MetaGraphs.set_prop!(kmer_graph, :k, k)

    for (vertex, kmer) in enumerate(kmers)
        MetaGraphs.set_prop!(kmer_graph, vertex, :sequence, kmer)
    end

    record = first(fastx)

    sequence = FASTX.sequence(record)
    record_identifier = FASTX.identifier(record) 
    edge_iterator = BioSequences.each(EDGE_MER, sequence)

    for sequence_edge in edge_iterator

        forward_sequence_edge = BioSequences.LongDNASeq(sequence_edge.fw)

        observed_source_kmer = BioSequences.DNAMer(forward_sequence_edge[1:end-1])

        observed_destination_kmer = BioSequences.DNAMer(forward_sequence_edge[2:end])

        oriented_source_kmer = 
            (canonical_kmer = BioSequences.canonical(observed_source_kmer),
             orientation = BioSequences.iscanonical(observed_source_kmer))

        oriented_destination_kmer = 
            (canonical_kmer = BioSequences.canonical(observed_destination_kmer),
             orientation = BioSequences.iscanonical(observed_destination_kmer))

        oriented_source_vertex = 
            (vertex = searchsortedfirst(kmers, oriented_source_kmer.canonical_kmer),
             orientation = oriented_source_kmer.orientation)

        oriented_destination_vertex = 
            (vertex = searchsortedfirst(kmers, oriented_destination_kmer.canonical_kmer),
             orientation = oriented_destination_kmer.orientation)

        source_evidence = 
            (record = record_identifier,
             index = sequence_edge.position,
             orientation = oriented_source_vertex.orientation)

        destination_evidence = 
            (record = record_identifier,
             index = sequence_edge.position + 1,
             orientation = oriented_destination_vertex.orientation)

        add_evidence!(kmer_graph, oriented_source_vertex.vertex, source_evidence)

        add_evidence!(kmer_graph, oriented_destination_vertex.vertex, destination_evidence)

        forward_edge = LightGraphs.Edge(oriented_source_vertex.vertex, oriented_destination_vertex.vertex)

        LightGraphs.add_edge!(kmer_graph, forward_edge)

        forward_edge_orientations = 
            (source_orientation = oriented_source_vertex.orientation,
             destination_orientation = oriented_destination_vertex.orientation)

        MetaGraphs.set_prop!(kmer_graph, forward_edge, :orientations, forward_edge_orientations)

        forward_edge_evidence = (
            record = record_identifier,
            index = sequence_edge.position,
            orientation = true
        )

        add_evidence!(kmer_graph, forward_edge, forward_edge_evidence)

        reverse_edge = LightGraphs.Edge(oriented_destination_vertex.vertex, oriented_source_vertex.vertex)

        LightGraphs.add_edge!(kmer_graph, reverse_edge)

        reverse_edge_orientations = 
            (source_orientation = !oriented_destination_vertex.orientation,
             destination_orientation = !oriented_source_vertex.orientation)

        MetaGraphs.set_prop!(kmer_graph, reverse_edge, :orientations, reverse_edge_orientations)

        reverse_edge_evidence = (
            record = record_identifier,
            index = sequence_edge.position,
            orientation = false
        )

        add_evidence!(kmer_graph, reverse_edge, reverse_edge_evidence)
    end

    # NOTE!! here 1 => 1 has both + => + and - => - but only the - is recorded because the value over-wrote the initial
    # may need to make orientations a set
    graph_to_gfa(graph=kmer_graph, outfile="test.$(k).gfa")

    untigs = []

    if isempty(untigs)
        visited = Int[]
    else
        visited = sort(unique(reduce(vcat, untigs)))
    end
    unvisited = setdiff(1:LightGraphs.nv(kmer_graph), visited)
    while !isempty(unvisited)
#     if !isempty(unvisited)
        first_unvisited = first(unvisited)
        forward_walk = oriented_unbranching_walk(kmer_graph, first_unvisited, true)
        reverse_walk = oriented_unbranching_walk(kmer_graph, first_unvisited, false)
        inverted_reverse_walk = [LightGraphs.Edge(e.dst, e.src) for e in reverse(reverse_walk)]
        edge_path = vcat(inverted_reverse_walk, forward_walk)
        if !isempty(edge_path)
            untig = [first(edge_path).src, [e.dst for e in edge_path]...]
        else
            untig = [first_unvisited]
        end
        push!(untigs, untig)
        visited = sort(unique(reduce(vcat, untigs)))
        unvisited = setdiff(1:LightGraphs.nv(kmer_graph), visited)
    end

    oriented_untigs = []

    for path in untigs
        sequence = BioSequences.LongDNASeq(kmers[first(path)])
        if length(path) == 1
            orientations = [true]
        elseif length(path) > 1
            initial_edge = LightGraphs.Edge(path[1], path[2])
            initial_orientation = kmer_graph.eprops[initial_edge][:orientations].source_orientation
            orientations = [initial_orientation]
            if !initial_orientation
                sequence = BioSequences.reverse_complement(sequence)
            end

            for (src, dst) in zip(path[1:end-1], path[2:end])
                edge = LightGraphs.Edge(src, dst)
                destination = BioSequences.LongDNASeq(kmers[edge.dst])
                destination_orientation = kmer_graph.eprops[edge][:orientations].destination_orientation
                push!(orientations, destination_orientation)
                if !destination_orientation
                    destination = BioSequences.reverse_complement(destination)
                end
                sequence_suffix = sequence[end-length(destination)+2:end]
                destination_prefix = destination[1:end-1]
                @assert sequence_suffix == destination_prefix
                push!(sequence, destination[end])
            end
        end

        oriented_untig = 
        (
            sequence = BioSequences.canonical(sequence),
            path = BioSequences.iscanonical(sequence) ? path : reverse(path),
            orientations = BioSequences.iscanonical(sequence) ? orientations : reverse(.!orientations)
        )

        push!(oriented_untigs, oriented_untig)
    end

    simplified_graph = MetaGraphs.MetaDiGraph(length(oriented_untigs))

    MetaGraphs.set_prop!(simplified_graph, :k, k)

    for (vertex, untig) in enumerate(oriented_untigs)
        MetaGraphs.set_prop!(simplified_graph, vertex, :sequence, untig.sequence)
        MetaGraphs.set_prop!(simplified_graph, vertex, :path, untig.path)
        MetaGraphs.set_prop!(simplified_graph, vertex, :orientations, untig.orientations)
    end
    
    simplified_untigs = []

    for vertex in LightGraphs.vertices(simplified_graph)
        in_kmer = simplified_graph.vprops[vertex][:path][1] => simplified_graph.vprops[vertex][:orientations][1]
        out_kmer = simplified_graph.vprops[vertex][:path][end] => simplified_graph.vprops[vertex][:orientations][end]
    #     @show vertex, in_kmer, out_kmer
        push!(simplified_untigs, in_kmer => out_kmer)
    end

    for (ui, u) in enumerate(simplified_untigs)
        for (vi, v) in enumerate(simplified_untigs)
    #         + => +
            source_kmer_index, source_orientation = last(u)
            destination_kmer_index, destination_orientation = first(v)
            edge = LightGraphs.Edge(source_kmer_index, destination_kmer_index)
            if LightGraphs.has_edge(kmer_graph, edge)
                source_orientation_matches = (kmer_graph.eprops[edge][:orientations].source_orientation == source_orientation)
                destination_orientation_matches = (kmer_graph.eprops[edge][:orientations].destination_orientation == destination_orientation)
                if source_orientation_matches && destination_orientation_matches
#                     @show "right orientation!! + +"

                    simplified_graph_edge = LightGraphs.Edge(ui, vi)

                    LightGraphs.add_edge!(simplified_graph, simplified_graph_edge)
                    edge_orientations = (
                        source_orientation = source_orientation,
                        destination_orientation = destination_orientation
                    )
                    MetaGraphs.set_prop!(simplified_graph, simplified_graph_edge, :orientations, edge_orientations)
                end
            end
    #         + => -
            source_kmer_index, source_orientation = last(u)
            destination_kmer_index, destination_orientation = last(v)
            destination_orientation = !destination_orientation

            edge = LightGraphs.Edge(source_kmer_index, destination_kmer_index)
            if LightGraphs.has_edge(kmer_graph, edge)
                source_orientation_matches = (kmer_graph.eprops[edge][:orientations].source_orientation == source_orientation)
                destination_orientation_matches = (kmer_graph.eprops[edge][:orientations].destination_orientation == destination_orientation)
                if source_orientation_matches && destination_orientation_matches
#                     @show "right orientation!! + -"
                    simplified_graph_edge = LightGraphs.Edge(ui, vi)

                    LightGraphs.add_edge!(simplified_graph, simplified_graph_edge)
                    edge_orientations = (
                        source_orientation = source_orientation,
                        destination_orientation = destination_orientation
                    )
                    MetaGraphs.set_prop!(simplified_graph, simplified_graph_edge, :orientations, edge_orientations)
                end
            end
    #         - => +
            source_kmer_index, source_orientation = first(u)
            source_orientation = !source_orientation
            destination_kmer_index, destination_orientation = first(v)

            edge = LightGraphs.Edge(source_kmer_index, destination_kmer_index)
            if LightGraphs.has_edge(kmer_graph, edge)
                source_orientation_matches = (kmer_graph.eprops[edge][:orientations].source_orientation == source_orientation)
                destination_orientation_matches = (kmer_graph.eprops[edge][:orientations].destination_orientation == destination_orientation)
                if source_orientation_matches && destination_orientation_matches
#                     @show "right orientation!! - +"
                    simplified_graph_edge = LightGraphs.Edge(ui, vi)

                    LightGraphs.add_edge!(simplified_graph, simplified_graph_edge)
                    edge_orientations = (
                        source_orientation = source_orientation,
                        destination_orientation = destination_orientation
                    )
                    MetaGraphs.set_prop!(simplified_graph, simplified_graph_edge, :orientations, edge_orientations)
                end
            end
    #         - => -
            source_kmer_index, source_orientation = first(u)
            source_orientation = !source_orientation
            destination_kmer_index, destination_orientation = last(v)
            destination_orientation = !destination_orientation

            edge = LightGraphs.Edge(source_kmer_index, destination_kmer_index)
            if LightGraphs.has_edge(kmer_graph, edge)
                source_orientation_matches = (kmer_graph.eprops[edge][:orientations].source_orientation == source_orientation)
                destination_orientation_matches = (kmer_graph.eprops[edge][:orientations].destination_orientation == destination_orientation)
                if source_orientation_matches && destination_orientation_matches
#                     @show "right orientation!! - -"
                    simplified_graph_edge = LightGraphs.Edge(ui, vi)

                    LightGraphs.add_edge!(simplified_graph, simplified_graph_edge)
                    edge_orientations = (
                        source_orientation = source_orientation,
                        destination_orientation = destination_orientation
                    )
                    MetaGraphs.set_prop!(simplified_graph, simplified_graph_edge, :orientations, edge_orientations)
                end
            end
        end
    end

    graph_to_gfa(graph=simplified_graph, outfile="test.$(k).simplified.gfa")
end

k = 11
k = 13
k = 17
k = 19
k = 23
k = 29
k = 31


In [27]:
# 11 is too small, minimum should be k=13
Primes.primes(13, 61)

13-element Array{Int64,1}:
 13
 17
 19
 23
 29
 31
 37
 41
 43
 47
 53
 59
 61