Skip to content

Commit

Permalink
Merge 8d7864a into 0594e74
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika committed Mar 27, 2018
2 parents 0594e74 + 8d7864a commit 491e601
Show file tree
Hide file tree
Showing 5 changed files with 1,559 additions and 1,561 deletions.
1 change: 1 addition & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ requirements:
- hdf5
- mpfr
- libxml2
- unzip

test:
commands:
Expand Down
2 changes: 1 addition & 1 deletion galaxy/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ owner: dfornika
description: MLST caller designed to handle large typing schemes.
homepage_url: https://github.com/WGS-TB/MentaLiST
long_description:
remote_repository_url: https://github.com/WGS-TB/MentaLiST/tree/master/galaxy
remote_repository_url: https://github.com/WGS-TB/MentaLiST/tree/mentalist_v0.2
type: unrestricted
11 changes: 5 additions & 6 deletions galaxy/tools/mentalist_call/mentalist_call.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
$output_votes
$output_special
#if $input_type.sPaired == "paired":
-s $input_type.pInput1.name $input_type.pInput1 $input_type.pInput2
-1 $input_type.pInput1 -2 $input_type.pInput2
#elif $input_type.sPaired == "collections":
-s $input_type.fastq_collection.name $input_type.fastq_collection.forward $input_type.fastq_collection.reverse
-1 $input_type.fastq_collection.forward -2 $input_type.fastq_collection.reverse
#end if
]]></command>
<inputs>
Expand Down Expand Up @@ -45,13 +45,12 @@
<param name="kmer_threshold" type="integer" label="Kmer threshold"
help="Minimum number of times a kmer is seen to be considered present in the sample."
value="10" min="1" max="25" />
<parapm name="output_votes" type="boolean" label="Output votes"
argument="--output_votes" truevalue="--output_votes" falsevalue=""
help="Outputs the results for the original voting algorithm" />
<param name="output_votes" type="boolean" label="Output votes"
argument="--output_votes" truevalue="--output_votes" falsevalue=""
help="Outputs the results for the original voting algorithm" />
<param name="output_special" type="boolean" label="Output special"
argument="--output_special" truevalue="--output_special" falsevalue=""
help="Outputs a fasta file with the alleles from special cases such as incomplete coverage, novel and multiple alleles." />

</inputs>
<outputs>
<data name="output_file" format="tabular" />
Expand Down
64 changes: 31 additions & 33 deletions src/mlst_download_functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,50 +181,48 @@ function download_enterobase_scheme(scheme, s_type, output_dir, overwrite=false)
return loci_files
end

function download_cgmlst_scheme(target_id, output_dir, overwrite=false)
function download_cgmlst_scheme(target_id, output_dir)
id = _find_cgmlst_id(target_id)
if id == nothing
Lumberjack.warn("Id/species ($target_id) not found!")
exit(-1)
end
info("Downloading cgMLST scheme ...")
gzip_alleles = _download_to_folder("http://www.cgmlst.org/ncs/schema/$id/alleles",output_dir)
# unzip file to one FASTA per locus:
loci_files = String[]
current_fasta_fh = nothing
scheme_zip_file = _download_to_folder("http://www.cgmlst.org/ncs/schema/$id/alleles", output_dir)
locus_files = String[]
locus = ""
fh = GZip.open(gzip_alleles)
info("Unzipping cgMLST scheme into individual FASTA files for each loci ...")
n_locus = 0
for l in eachline(fh)
# skip empty lines or "="
if length(strip(l)) == 0 || startswith(l, "=")
continue
end
if startswith(l,"#")
# print to show progress:
if n_locus % 200 == 0
info("Unzipping cgMLST scheme into individual FASTA files for each locus ...")
scheme_dirname = dirname(scheme_zip_file)
run(`unzip -oq $scheme_zip_file -d $scheme_dirname/tmp`)
rm(scheme_zip_file)
scheme_files = readdir(joinpath(scheme_dirname, "tmp"))

for scheme_file in scheme_files
# print to show progress:
if length(locus_files) % 200 == 0
print(".")
end
scheme_file_path = joinpath(scheme_dirname, scheme_file)
push!(locus_files, scheme_file_path)
# get locus ID from filename
locus = split(scheme_file, ".")[1]
fh = open(scheme_file_path, "w")
for l in eachline(joinpath(scheme_dirname, "tmp", scheme_file))
if length(strip(l)) == 0
continue
end
n_locus += 1
locus = strip(l[2:end]) # remove the starting '#'
if current_fasta_fh != nothing
close(current_fasta_fh)
end
fasta = joinpath(output_dir, "$locus.fa")
push!(loci_files, fasta)
current_fasta_fh = open(fasta, "w")
elseif startswith(l,">")
# substitute the number only to locus_number
id = strip(l[2:end])
write(current_fasta_fh, ">$(locus)_$id\n")
else #
if current_fasta_fh != nothing
write(current_fasta_fh, "$l\n")
if l[1] == '>'
l = '>' * locus * "_" * l[2:end] * "\n"
write(fh, l)
else
write(fh, l * "\n")
end
end
close(fh)
end
println()
info("$n_locus loci found.")
return loci_files
total_loci = length(locus_files)
info("$total_loci loci found.")
rm(joinpath(scheme_dirname, "tmp"), recursive=true)
return locus_files
end
Loading

0 comments on commit 491e601

Please sign in to comment.