In [1]:
DATE = "2021-06-24"
TASK = "simplified-kmer-graph"
DIR = "$(DATE)-$(TASK)"
DIR = mkpath("$(homedir())/$(DIR)")

"/Users/cameronprybol/2021-06-24-simplified-kmer-graph"

In [2]:
pkgs = [
"LightGraphs",
"MetaGraphs",
"BioSequences",
"uCSV",
"DataFrames",
"FASTX",
"Random",
"ProgressMeter",
"Revise"
]

import Pkg
Pkg.add(pkgs)
for pkg in pkgs
    eval(Meta.parse("import $(basename(pkg))"))
end

import Mycelia



In [3]:
# set a random seed
seed = Random.seed!(0)

MersenneTwister(0)

In [4]:
# randomly generate a dna sequence of 100bp
genome = BioSequences.randdnaseq(seed, 100)

100nt DNA Sequence:
AAGGGTGCGGTCTAGGTGCACTGCTTATGGTCCCCGACA…TCTACCTCTTTGTTGAAACGTATTCTTCTCTTAACACCT

In [5]:
# define error rate
error_rate = 0.01

0.01

In [6]:
# generate 100x coverage fastq file
# put accuracy rate into fastq file
coverage = 10
fastq_file = "$(DIR)/$(DATE)-$(TASK).fastq"
open(fastq_file, "w") do io
    fastq_writer = FASTX.FASTQ.Writer(io)
    for i in 1:coverage
        observed_sequence = Mycelia.observe(genome, error_rate=error_rate)
        q = -10 * log10(error_rate)
        quality_scores = fill(q, length(observed_sequence))
        fastq_record = FASTX.FASTQ.Record("i", observed_sequence, quality_scores)
        write(fastq_writer, fastq_record)
    end
end

In [7]:
k = 11

11

In [8]:
kmer_type = BioSequences.BigDNAMer{k}

BioSequences.BigDNAMer{11} (alias for BioSequences.BigMer{BioSequences.DNAAlphabet{2}, 11})

In [20]:
simple_kmer_graph = Mycelia.fastx_to_simple_kmer_graph(kmer_type, fastq_file)

┌ Info: creating graph
└ @ Mycelia /Users/cameronprybol/.julia/dev/Mycelia/src/Mycelia.jl:2958


{120, 240} directed Int64 metagraph with Float64 weights defined by :weight (default weight 1.0)

In [21]:
# visualize
gfa_file = fastq_file * ".k-$k.gfa"
Mycelia.graph_to_gfa(simple_kmer_graph, gfa_file)

"/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.fastq.k-11.gfa"

In [22]:
run(`/Applications/Bandage.app/Contents/MacOS/Bandage image $(gfa_file) $(gfa_file).svg --depwidth 1 --deppower 1`)
# --nodewidth <float> Average node width (0.5 to 1000, default: 5)
# --depwidth <float>  Depth effect on width (0 to 1, default: 0.5)
# --deppower <float>  Power of depth effect on width (0 to 1, default: 0.5)

Process(`[4m/Applications/Bandage.app/Contents/MacOS/Bandage[24m [4mimage[24m [4m/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.fastq.k-11.gfa[24m [4m/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.fastq.k-11.gfa.svg[24m [4m--depwidth[24m [4m1[24m [4m--deppower[24m [4m1[24m`, ProcessExited(0))

In [23]:
html_path_to_svg = "./" * repeat("../", length(split(pwd(), '/')) - 3)
html_path_to_svg *= replace("$(gfa_file).svg", "$(homedir())/" => "")

"./../../../../2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.fastq.k-11.gfa.svg"

In [24]:
x = display("text/html", "<img src=$(html_path_to_svg)>")

In [25]:
output_fastq_file = Mycelia.polish_fastq(simple_kmer_graph, fastq_file)

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:19[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m


"/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.k11.fastq"

In [26]:
simple_kmer_graph = Mycelia.fastx_to_simple_kmer_graph(kmer_type, output_fastq_file)

┌ Info: creating graph
└ @ Mycelia /Users/cameronprybol/.julia/dev/Mycelia/src/Mycelia.jl:2958


{90, 178} directed Int64 metagraph with Float64 weights defined by :weight (default weight 1.0)

In [27]:
# visualize
gfa_file = output_fastq_file * ".k-$k.gfa"
Mycelia.graph_to_gfa(simple_kmer_graph, gfa_file)

"/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.k11.fastq.k-11.gfa"

In [28]:
run(`/Applications/Bandage.app/Contents/MacOS/Bandage image $(gfa_file) $(gfa_file).svg --depwidth 1 --deppower 1`)
# --nodewidth <float> Average node width (0.5 to 1000, default: 5)
# --depwidth <float>  Depth effect on width (0 to 1, default: 0.5)
# --deppower <float>  Power of depth effect on width (0 to 1, default: 0.5)

Process(`[4m/Applications/Bandage.app/Contents/MacOS/Bandage[24m [4mimage[24m [4m/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.k11.fastq.k-11.gfa[24m [4m/Users/cameronprybol/2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.k11.fastq.k-11.gfa.svg[24m [4m--depwidth[24m [4m1[24m [4m--deppower[24m [4m1[24m`, ProcessExited(0))

In [29]:
html_path_to_svg = "./" * repeat("../", length(split(pwd(), '/')) - 3)
html_path_to_svg *= replace("$(gfa_file).svg", "$(homedir())/" => "")

"./../../../../2021-06-24-simplified-kmer-graph/2021-06-24-simplified-kmer-graph.k11.fastq.k-11.gfa.svg"

In [30]:
x = display("text/html", "<img src=$(html_path_to_svg)>")