diff --git a/.gitignore b/.gitignore index 24f80cf..a042367 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,5 @@ -*.gem -.yardoc doc -gfapy_doc/_build -doc/_build -pdfdoc/index.html -pdfdoc/rgfa*.pdf -pdfdoc/cover.html -cheatsheet/*.aux -cheatsheet/*_latexmk -cheatsheet/*.fls -cheatsheet/*.log -cheatsheet/*.pdf -cheatsheet/version -manual/manual.pdf + # Compiled python modules *.pyc @@ -22,3 +9,6 @@ manual/manual.pdf # Python egg metadata, regenerated from source files by setuptools /*.egg-info /*.egg + +# Wheel data +build diff --git a/CHANGELOG b/CHANGELOG index ac9a135..758951e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,60 +1,3 @@ -== GFA2 == - -- Trace alignments -- Custom record types -- More precise fields terminology: --- required fields => positional fields --- optional fields => tags - -== HEAD == - -- improve handling of comment lines - (now: stored in RGFA object; edit, deletion, output) - -== 1.3.1 == - -- fix: add file missing from gemspec - -== 1.3 == - -major changes: -- changes in GFA specification: --- P lines: cigars field is now overlaps --- comment lines --- forbid +, and -, in segment names -- .rb suffix removed from bin/* scripts - -minor changes: -- improved links terminology - (normal link -> canonical; reverse link/CIGAR -> complement) -- definition of canonical link simplified - -== 1.2.1 == - -- support new segment tags SH and UR -- update cheatsheet - -== 1.2 == - -- merge RGFATools into the main RGFA gem - -== 1.1 == - -- performance and code organization improvements -- Line code rewritten: --- uses an Hash instead of Array for fields data --- support lazy parsing of some field types --- simpler code for subclasses --- define a datatype for required fields --- Field[Parser|Validator|Writer] -- Optfield class replaced -- use symbols instead of strings when appropriate - -== 1.0.1 == - -- complete YARD documentation -- remove redundant/unused code - -== 1.0 == +== 1.0.0 == - initial release diff --git a/CONTRIBUTORS b/CONTRIBUTORS index f3c4de1..fbf3cf9 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,4 +1,6 @@ -The following contributors helped to develop RGFA. Please drop a note to +The following contributors helped to develop gfapy. Please drop a note to gonnella@zbh.uni-hamburg.de if I left someone out or missed something. +- Tim Weber (translation of parts of the code from Ruby to Python) - Stefan Kurtz (advises) + diff --git a/LICENSE b/LICENSE index 17c66c3..230ef64 100644 --- a/LICENSE +++ b/LICENSE @@ -1,10 +1,10 @@ -All code of RGFA is released under the following ISC license. +All code of gfapy is released under the following ISC license. It is functionally equivalent to a two-term BSD copyright with language removed that is made unnecessary by the Berne convention. See http://openbsd.org/policy.html for more information on copyrights. -Copyright (c) 2016 Giorgio Gonnella and CONTRIBUTORS -Copyright (c) 2016 Center for Bioinformatics, University of Hamburg +Copyright (c) 2017 Giorgio Gonnella and CONTRIBUTORS +Copyright (c) 2017 Center for Bioinformatics, University of Hamburg Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..565d924 --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ +default: tests + +.PHONY: manual tests + +PYTHON=python3 +PIP=pip3 + +# Install using pip +install: + ${PIP} install . --upgrade --user + +# Source distribution +sdist: + ${PYTHON} setup.py sdist + +# Pure Python Wheel +wheel: + ${PYTHON} setup.py bdist_wheel + +# Create the manual +manual: + cd manual; \ + cat chapters | xargs pandoc -o gfapy-manual.pdf + +# Run unit tests +tests: + ${PYTHON} -m unittest discover + +# Remove distribution files +cleanup: + rm -rf dist/ build/ gfapy.egg-info/ diff --git a/README.md b/README.md deleted file mode 100644 index 03875fd..0000000 --- a/README.md +++ /dev/null @@ -1,57 +0,0 @@ -The Graphical Fragment Assembly (GFA) is a proposed format which allow -to describe the product of sequence assembly. -This gem implements the proposed specifications for the GFA format -described under https://github.com/GFA-spec/GFA-spec/blob/master/GFA-spec.md -as close as possible. - -The library allows to create a RGFA object from a file in the GFA format -or from scratch, to enumerate the graph elements (segments, links, -containments, paths and header lines), to traverse the graph (by -traversing all links outgoing from or incoming to a segment), to search for -elements (e.g. which links connect two segments) and to manipulate the -graph (e.g. to eliminate a link or a segment or to duplicate a segment -distributing the read counts evenly on the copies). - -## Installation - -The latest release of the gem can be installed from the rubygems repository -using: -```gem install rgfa``` - -Alternatively this git repository can be cloned or the source code -installed from a release archive, and then the gem created and installed -using: -```rake install``` - -## Usage - -To use the library in your Ruby scripts, just require it as follows: -```require "rgfa"``` - -Additional functionality, which -requires custom tags and additional conventions, is included in a separate -part of the code named {RGFATools} and can be accessed with: -```require "rgfatools"``` - -## Documentation - -A cheatsheet is available as pdf under -https://github.com/ggonnella/rgfa/blob/master/cheatsheet/rgfa-cheatsheet-1.3.1.pdf - -The full API documentation is available as pdf under -https://github.com/ggonnella/rgfa/blob/master/pdfdoc/rgfa-api-1.3.1.pdf -or in HTML format (http://www.rubydoc.info/github/ggonnella/rgfa/master/RGFA). - -The main class of the library is {RGFA}, which is a good starting point -when reading the documentation. - -## References - -The manuscript describing the library has been presented at the -German Conference on Bioinformatics 2016. Currently it is under review and -available as a Peer Journal preprint: - -Gonnella G, Kurtz S. (2016) RGFA: powerful and convenient handling of -assembly graphs. PeerJ Preprints 4:e2381v1 -https://doi.org/10.7287/peerj.preprints.2381v1 - diff --git a/README.rst b/README.rst index 37c7d99..68378c9 100644 --- a/README.rst +++ b/README.rst @@ -1,2 +1,63 @@ -The README.md of RGFA should be translated in reStructuredText. -Github supports both formats. +The Graphical Fragment Assembly (GFA) are formats for the representation +of sequence graphs, including assembly, variation and splicing graphs. +Two versions of GFA have been defined (GFA1 and GFA2) and several sequence +analysis programs have been adopting the formats as an interchange format, +which allow to easily combine different sequence analysis tools. + +This library implements the GFA1 and GFA2 specification +described at https://github.com/GFA-spec/GFA-spec/blob/master/GFA-spec.md. +It allows to create a Gfa object from a file in the GFA format +or from scratch, to enumerate the graph elements (segments, links, +containments, paths and header lines), to traverse the graph (by +traversing all links outgoing from or incoming to a segment), to search for +elements (e.g. which links connect two segments) and to manipulate the +graph (e.g. to eliminate a link or a segment or to duplicate a segment +distributing the read counts evenly on the copies). + +The GFA format can be easily extended by users by defining own custom +tags and record types. In Gfapy, it is easy to write extensions modules, +which allow to define custom record types and datatypes for the parsing +and validation of custom fields. The custom lines can be connected, using +references, to each other and to lines of the standard record types. + +Requirements +============ + +Gfapy has been written for Python 3 and tested using Python version 3.3. +It does not require any additional Python packages or other software. + +Installation +============ + +Gfapy is distributed as a Python package and can be installed using +the python package manager pip. + +The following command installs the current stable version from the Python +Packages index:: + + pip install gfapy + +If you would like to install the current development version from Github, +use the following command:: + + pip install -e git+https://github.com/ggonnella/gfapy.git#egg=gfapy + +Usage +===== + +If you installed gfapy as described above, you can import it in your script +using the conventional Python syntax:: + + import gfapy + +Documentation +============= + +An user manual is available at +https://github.com/ggonnella/gfapy/blob/master/manual/gfapy-manual.pdf + +References +========== + +The manuscript describing Gfapy has been submitted and is currently under +review. This section will be updated, as soon as the publication is available. diff --git a/Rakefile b/Rakefile deleted file mode 100644 index 053de7d..0000000 --- a/Rakefile +++ /dev/null @@ -1,78 +0,0 @@ -require "rake/testtask" - -$rgfaversion=Gem::Specification.load("rgfa.gemspec").version.to_s - -Rake::TestTask.new do |t| - t.libs << 'test' - t.pattern = "test/test_*.rb" - t.verbose = true -end - -desc "Run tests" -task :default => :test - -desc "Build gem" -task :build do - system("gem build rgfa.gemspec") -end - -desc "Install gem" -task :install => :build do - system("gem install rgfa") -end - -desc "Rm files created by rake build" -task :clean do - system("rm -f rgfa-*.gem") -end - -# make documentation generation tasks -# available only if yard gem is installed -begin - require "yard" - YARD::Tags::Library.define_tag("Developer notes", :developer) - YARD::Rake::YardocTask.new do |t| - t.files = ['lib/**/*.rb'] - t.stats_options = ['--list-undoc'] - end -rescue LoadError -end - -desc "Typeset cheatsheet" -task :cheatsheet do - system("echo #$rgfaversion > cheatsheet/version") - system("latexmk cheatsheet/rgfa-cheatsheet.tex "+ - "-pdf -outdir=cheatsheet") - system("mv cheatsheet/rgfa-cheatsheet.pdf"+ - " cheatsheet/rgfa-cheatsheet-#$rgfaversion.pdf") -end - -desc "Create a PDF documentation" -task :pdf do - require "erb" - File.open("pdfdoc/cover.html", "w") do |f| - f.puts ERB.new(IO.read("pdfdoc/cover.html.erb")).result(binding) - end - system("yard2.0 --one-file --no-api private -o pdfdoc") - system("wkhtmltopdf cover pdfdoc/cover.html "+ - "toc "+ - "pdfdoc/index.html "+ - "--user-style-sheet pdfdoc/print.css "+ - "pdfdoc/rgfa-api-#$rgfaversion.pdf") -end - -desc "Create the RGFA manual" -task :manual do - system("cd manual; pandoc $(cat chapters) -o manual.pdf") -end - -desc "Create the gfapy manual" -task :pymanual do - system("cd gfapy_manual; pandoc $(cat chapters) -o gfapy-manual.pdf") -end - -desc "Run python tests" -task :pytest do - system("python3 -m unittest discover") -end - diff --git a/TODO b/TODO deleted file mode 100644 index 24622c6..0000000 --- a/TODO +++ /dev/null @@ -1 +0,0 @@ -- write complicated/long tests using data in a separate repo diff --git a/benchmarks/backtrace.rb b/benchmarks/backtrace.rb deleted file mode 100755 index cee4fdd..0000000 --- a/benchmarks/backtrace.rb +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# from: http://stackoverflow.com/questions/3955688/how-do-i-debug-ruby-scripts - -# %-PURPOSE-% -# force a running ruby process to output the backtrace, then continue -echo 'call (void)rb_backtrace()' | gdb -p $(pgrep -nf ruby) diff --git a/benchmarks/profiler.rb b/benchmarks/profiler.rb deleted file mode 100755 index 4e8ad41..0000000 --- a/benchmarks/profiler.rb +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env ruby -require "rgfa" - -mem = ARGV.delete("--mem") -if mem - require "memory_profiler" - profklass = MemoryProfiler - proflabel = "memory" - profmethod = :report - printer = lambda do |result| - result.pretty_print - end -else - require "ruby-prof" - profklass = RubyProf - proflabel = "running time" - profmethod = :profile - printer = lambda do |result| - RubyProf::FlatPrinter.new(result).print(STDOUT) - end -end - -merge = ARGV.delete("--merge") -help = ARGV.delete("--help") - -if ARGV.size != 1 or help - STDERR.puts "Running time and memory profiler for RGFA" - STDERR.puts - STDERR.puts "Usage: #$0 [options] " - STDERR.puts - STDERR.puts "The default action is to parse the GFA file and output it again" - STDERR.puts "(to /dev/null). This can be changed using the following options." - STDERR.puts - STDERR.puts "Actions:" - STDERR.puts " --merge: merge linear paths" - STDERR.puts - STDERR.puts "Profiling options:" - STDERR.puts " --mem: memory profiling (default: running time profiling)" - exit 1 -end - -actions = ["parse input file", "output GFA to /dev/null"] -if merge - actions.insert(1, "merge linear paths") -end - -filename = ARGV[0] -#if !File.exist?(filename) -# STDERR.puts "Specified file does not exist: #{filename}" -# exit 1 -#end - -puts "# --- RGFA profiler ---" -puts "# Input file: #{filename}" -puts "# Profiling: #{proflabel}" -puts "# Actions: #{actions.join(", ")}" -puts "# Date: #{`date`}" -puts "# Host: #{`hostname`}" -gitbranch = `git rev-parse --abbrev-ref HEAD 2> /dev/null` -if $?.exitstatus == 0 - gitlog = `git log --oneline -n 1 2> /dev/null` - puts "# Branch: #{gitbranch}" - puts "# Last Commit: #{gitlog}" -end -puts "# ---" -puts - -result = profklass.send(profmethod) do - g = RGFA.new - g.enable_progress_logging(part: 0.001) - g.read_file(filename) - if merge - g.merge_linear_paths - end - g.to_file("/dev/null") -end -printer.call(result) diff --git a/bin/gfadiff b/bin/gfadiff index 52e8550..e39add2 100755 --- a/bin/gfadiff +++ b/bin/gfadiff @@ -1,413 +1,3 @@ -#!/usr/bin/env ruby - -require "rgfa" - -rt = [] -all_rt = %W[-h -s -l -c -p] -all_rt.each do |rtopt| - rt << ARGV.delete(rtopt) -end -rt.compact! -rt = all_rt if rt.empty? - -out_identical = ARGV.delete("-i") - -out_script = ARGV.delete("-script") - -if ARGV.size != 2 - STDERR.puts "Compare two RGFA files" - STDERR.puts - STDERR.puts "Usage: #$0 [-h] [-s] [-l] [-c] [-p] [-i] [-script] " - STDERR.puts - STDERR.puts "If a combination of -h,-s,-l,-c and/or -p is specified, then" - STDERR.puts "only record of the specified type [h=headers, s=segments, " - STDERR.puts "l=links, c=containments, p=paths] are compared. " - STDERR.puts "(default: -h -s -l -c -p)" - STDERR.puts - STDERR.puts "Other options:" - STDERR.puts " -i: output msg if identical" - STDERR.puts " -script: create ruby script to transform gfa1 in gfa2" - exit 1 -end - -if out_script - puts "#!/usr/bin/env ruby" - puts - puts "#" - puts "# This script was automatically generated using gfadiff.rb" - puts "#" - puts "# Purpose: edit gfa1 into gfa2" - puts "#" - puts "# gfa1: #{ARGV[0]}" - puts "# gfa2: #{ARGV[1]}" - puts "#" - puts - puts "require \"rgfa\"" - puts - puts "gfa = RGFA.from_file(\"#{ARGV[0]}\")" - puts -end - -gfa1 = RGFA.new -gfa1.turn_off_validations -gfa1.read_file(ARGV[0]) -gfa2 = RGFA.new -gfa2.turn_off_validations -gfa2.read_file(ARGV[1]) - -if rt.include?("-h") - h1 = gfa1.header - h2 = gfa2.header - if h1 == h2 - if out_identical - puts "# Header values are identical" - elsif out_script - puts "# Headers" - puts "# ... are identical" - puts - end - else - if out_script - puts "# Headers" - puts - end - (h1.tagnames - h2.tagnames).each do |k| - if out_script - puts "gfa.header.delete_field(#{k.inspect})" - else - v = h1.get(k) - if v.kind_of?(RGFA::FieldArray) - t = v.datatype - v.each do |elem| - elem = elem.to_gfa_field(datatype: t) - puts "<\t[headers/exclusive/multivalue/#{k}]\t#{elem}" - end - else - v = h1.field_to_s(k, tag: true) - puts "M\t[headers/exclusive]\t#{k.inspect}\t#{v}" - end - end - end - (h2.tagnames - h1.tagnames).each do |k| - v = h2.get(k) - if out_script - t = h2.get_datatype(k) - puts "gfa.header.set_datatype(#{k.inspect}, #{t.inspect})" - if v.kind_of?(RGFA::FieldArray) - t = v.datatype - v.each do |elem| - puts "gfa.header.add(#{k.inspect}, #{elem.inspect}, "+ - "#{t.inspect})" - end - else - puts "gfa.header.#{k}=#{v.inspect}" - end - else - if v.kind_of?(RGFA::FieldArray) - t = v.datatype - v.each do |elem| - elem = elem.to_gfa_field(datatype: t) - puts ">\t[headers/exclusive/multivalue/#{k}]\t#{elem}" - end - else - v = h2.field_to_s(k, tag: true) - puts ">\t[headers/exclusive]\t#{k.inspect}\t#{v}" - end - end - end - (h1.tagnames & h2.tagnames).each do |k| - v1 = h1.get(k) - v2 = h2.get(k) - v1a = v1.kind_of?(RGFA::FieldArray) ? v1.sort : [v1] - v2a = v2.kind_of?(RGFA::FieldArray) ? v2.sort : [v2] - t1 = v1.kind_of?(RGFA::FieldArray) ? v1.datatype : h1.get_datatype(k) - t2 = v2.kind_of?(RGFA::FieldArray) ? v2.datatype : h2.get_datatype(k) - m1 = v1.kind_of?(RGFA::FieldArray) ? "multivalue/" : "" - m2 = v2.kind_of?(RGFA::FieldArray) ? "multivalue/" : "" - if out_script - if t1 != t2 or v1a != v2a - puts "gfa.header.delete(#{k.inspect})" - v2a.each do |v2| - v2 = v2.to_gfa_field(datatype: t2) - puts "gfa.header.add(#{k.inspect}, #{v2.inspect}, "+ - "#{t2.inspect})" - end - end - else - if t1 != t2 - v1a.each do |v1| - v1 = v1.to_gfa_field(datatype: t1) - puts "<\t[headers/typediff/#{m1}#{k}#{}]\t#{v1}" - end - v2a.each do |v2| - v2 = v2.to_gfa_field(datatype: t2) - puts ">\t[headers/typediff/#{m2}#{k}]\t#{v2}" - end - else - (v1a-v2a).each do |v1| - v1 = v1.to_gfa_field(datatype: t1) - puts "<\t[headers/valuediff/#{m1}#{k}]\t#{v1}" - end - (v2a-v1a).each do |v2| - v2 = v2.to_gfa_field(datatype: t2) - puts ">\t[headers/valuediff/#{m2}#{k}]\t#{v2}" - end - end - end - end - if out_script - puts - end - end -end - -def diff_segments_or_paths(gfa1,gfa2,rt,out_script,out_identical) - rts = rt + "s" - rtsU = rts[0].upcase + rts[1..-1] - s1names = gfa1.send("#{rt}_names").sort - s2names = gfa2.send("#{rt}_names").sort - difffound = false - if out_script - puts "# #{rtsU}" - puts - end - (s1names - s2names).each do |sn| - difffound = true - segstr = gfa1.send(rt,sn).to_s - if out_script - puts "gfa.rm(#{sn.inspect})" - else - puts "<\t[#{rts}/exclusive]\t#{segstr}" - end - end - (s2names - s1names).each do |sn| - difffound = true - segstr = gfa2.send(rt,sn).to_s - if out_script - puts "gfa << #{segstr.inspect}" - else - puts ">\t[#{rts}/exclusive]\t#{segstr}" - end - end - (s1names & s2names).each do |sn| - s1 = gfa1.send(rt,sn) - s2 = gfa2.send(rt,sn) - s1.positional_fieldnames.each do |fn| - v1 = s1.field_to_s(fn) - v2 = s2.field_to_s(fn) - if v1 != v2 - difffound = true - if out_script - puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v2.inspect}" - else - puts "<\t[#{rts}/posfield/valuediff/#{sn}]\t#{v1}" - puts ">\t[#{rts}/posfield/valuediff/#{sn}]\t#{v2}" - end - end - end - s1f = s1.tagnames - s2f = s2.tagnames - (s1f - s2f).each do |fn| - difffound = true - if out_script - puts "gfa.#{rt}(#{sn.inspect}).delete_field(#{fn.inspect})" - else - v = s1.field_to_s(fn, tag: true) - puts "<\t[#{rts}/tag/exclusive/#{sn}]\t#{v}" - end - end - (s2f - s1f).each do |fn| - difffound = true - if out_script - v = s2.get(fn) - t = s2.get_datatype(fn) - puts "gfa.#{rt}(#{sn.inspect}).set_datatype(#{fn.inspect},#{t})" - puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v.inspect}" - else - v = s2.field_to_s(fn, tag: true) - puts ">\t[#{rts}/tag/exclusive/#{sn}]\t#{v}" - end - end - (s1f & s2f).each do |fn| - v1 = s1.field_to_s(fn, tag: true) - v2 = s2.field_to_s(fn, tag: true) - if v1 != v2 - difffound = true - if out_script - v = s2.get(fn) - t = s2.get_datatype(fn) - puts "gfa.#{rt}(#{sn.inspect}).set_datatype(#{fn.inspect},#{t})" - puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v.inspect}" - else - puts "<\t[#{rts}/tag/valuediff/#{sn}]\t#{v1}" - puts ">\t[#{rts}/tag/valuediff/#{sn}]\t#{v2}" - end - end - end - end - if !difffound - if out_script - puts "# ... are identical" - elsif out_identical - puts "# #{rtsU} are identical" - end - end - puts if out_script -end - -if rt.include?("-s") - diff_segments_or_paths(gfa1,gfa2, "segment",out_script,out_identical) -end - -# TODO: diff of single fields -if rt.include?("-l") - difffound = false - s1names = gfa1.segment_names.sort - s2names = gfa2.segment_names.sort - if out_script - puts "# Links" - puts - end - difflinks1 = [] - (s1names - s2names).each do |sn| - difffound = true - difflinks1 += gfa1.segment(sn).dovetails - end - difflinks1.uniq.each do |l| - if !out_script - puts "<\t[links/exclusive_segments]\t#{l.to_s}" - end - end - difflinks2 = [] - (s2names - s1names).each do |sn| - difffound = true - difflinks2 += gfa2.segment(sn).dovetails - end - difflinks2.uniq.each do |l| - if out_script - puts "gfa << #{l.to_s.inspect}" - else - puts ">\t[links/exclusive_segments]\t#{l.to_s}" - end - end - difflinks1b = [] - difflinks2b = [] - (s1names & s2names).each do |sn| - l1 = gfa1.segment(sn).dovetails - l2 = gfa2.segment(sn).dovetails - d1 = l1 - l2 - d2 = l2 - l1 - if !d1.empty? - difffound = true - difflinks1b += d1 - end - if !d2.empty? - difffound = true - difflinks2b += d2 - end - end - (difflinks1b-difflinks1).uniq.each do |l| - if out_script - puts "gfa.rm(gfa.search_link(#{l.from.to_sym.inspect}, "+ - "#{l.from_orient.inspect}, "+ - "#{l.to.to_sym.inspect}, "+ - "#{l.to_orient.inspect}, "+ - "#{l.overlap.to_s.inspect}.to_cigar))" - else - puts "<\t[links/different]\t#{l.to_s}" - end - end - (difflinks2b-difflinks2).uniq.each do |l| - if out_script - puts "gfa << #{l.to_s.inspect}" - else - puts ">\t[links/different]\t#{l.to_s}" - end - end - if !difffound - if out_script - puts "# ... are identical" - elsif out_identical - puts "# Links are identical" - end - end - puts if out_script -end - -# TODO: this code is similar to -l; make generic and merge -if rt.include?("-c") - difffound = false - s1names = gfa1.segment_names.sort - s2names = gfa2.segment_names.sort - cexcl1 = [] - (s1names - s2names).each do |sn| - difffound = true - cexcl1 += gfa1.segment(sn).containments - end - cexcl1.uniq.each do |c| - if !out_script - puts "<\t[contaiments/exclusive_segments]\t#{c.to_s}" - end - end - cexcl2 = [] - (s2names - s1names).each do |sn| - difffound = true - cexcl2 += gfa2.segment(sn).containments - end - cexcl2.uniq.each do |c| - if out_script - puts "gfa << #{c.to_s.inspect}" - else - puts ">\t[contaiments/exclusive_segments]\t#{c.to_s}" - end - end - cdiff1 = [] - cdiff2 = [] - (s1names & s2names).each do |sn| - c1 = gfa1.segment(sn).containments - c2 = gfa2.segment(sn).containments - d1 = c1 - c2 - d2 = c2 - c1 - if !d1.empty? - difffound = true - cdiff1 += d1 - end - if !d2.empty? - difffound = true - cdiff2 += d2 - end - end - (cdiff1-cexcl1).uniq.each do |l| - if out_script - puts "gfa.segment(#{l.from.to_sym.inspect}).relations_to("+ - "#{l.to.to_sym.inspect}, :edges_to_contained).each(&:disconnect)" - else - puts "<\t[containments/different]\t#{l.to_s}" - end - end - (cdiff2-cexcl2).uniq.each do |l| - if out_script - puts "gfa << #{l.to_s.inspect}" - else - puts ">\t[containments/different]\t#{l.to_s}" - end - end - if !difffound - if out_script - puts "# ... are identical" - elsif out_identical - puts "# Containments are identical" - end - end - puts if out_script -end - -if rt.include?("-p") - diff_segments_or_paths(gfa1,gfa2,"path",out_script,out_identical) -end - -if out_script - puts - puts "# Output graph" - puts "puts gfa" -end +#!/usr/bin/env python3 +# GFAdiff will be converted into python PyGFAdiff +# the other scripts under bin do not need a convertion diff --git a/bin/gfapy-mergelinear b/bin/gfamergelinear similarity index 100% rename from bin/gfapy-mergelinear rename to bin/gfamergelinear diff --git a/bin/pygfadiff b/bin/pygfadiff deleted file mode 100755 index e39add2..0000000 --- a/bin/pygfadiff +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python3 -# GFAdiff will be converted into python PyGFAdiff -# the other scripts under bin do not need a convertion diff --git a/bin/rgfa-mergelinear b/bin/rgfa-mergelinear deleted file mode 100755 index 8a96576..0000000 --- a/bin/rgfa-mergelinear +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env ruby -require "rgfatools" - -redundant = ARGV.delete("-r") -if ARGV.size != 1 - STDERR.puts "Usage: #$0 [-r] " - STDERR.puts "-r: redundant output of junctions (default: false)" - exit 1 -end - -gfa = RGFA.new -#gfa.enable_progress_logging(part: 0.01) -gfa.vlevel = 0 -gfa.read_file(ARGV[0]) -if redundant - # remove isolated segments, as this mode is for comparison - # with readjoiner contigs, and isolated vertices are not output by readjoiner - gfa.connected_components.each do |cc| - if cc.size == 1 - gfa.segment(cc[0]).disconnect - end - end -end -gfa.merge_linear_paths(redundant: redundant, - disable_tracking: true) -# merged_name: :short) -puts gfa - diff --git a/cheatsheet/rgfa-cheatsheet-1.3.1.pdf b/cheatsheet/rgfa-cheatsheet-1.3.1.pdf deleted file mode 100644 index d0a40db..0000000 Binary files a/cheatsheet/rgfa-cheatsheet-1.3.1.pdf and /dev/null differ diff --git a/cheatsheet/rgfa-cheatsheet.tex b/cheatsheet/rgfa-cheatsheet.tex deleted file mode 100644 index d1f299f..0000000 --- a/cheatsheet/rgfa-cheatsheet.tex +++ /dev/null @@ -1,176 +0,0 @@ -\documentclass[12pt]{scrartcl} - -\usepackage{comment} -\usepackage{fancyhdr} -\usepackage{lastpage} -\usepackage{catchfile} -\pagestyle{fancy} - -\CatchFileDef{\RGFAver}{version}{} - -\usepackage{array} -\fancyhf{} -\renewcommand{\headrulewidth}{0pt} -\rhead{\bfseries RGFA \RGFAver Cheatsheet (\thepage/\pageref{LastPage}) -\hspace{1.1cm}} -\lfoot{\tiny \ \ \ \ \ \ \ \ Copyright (c) 2016, Giorgio Gonnella, ZBH, University of -Hamburg, Germany. This document is under CC-BY-SA license.} - -\newcounter{cstablecounter} -\setlength{\footskip}{0in} -\usepackage[top=1in, bottom=0.25in, left=0.1in, right=0.1in]{geometry} - -\newcommand{\cstablestart}{ -\begin{center} -% \large -% \textbf{RGFA: Cheatsheet -% (\refstepcounter{cstablecounter}\arabic{cstablecounter}/2) -% }\\ -%\end{center} -%\vspace*{\fill} -%\begin{table}[h] -\centering -} -\setlength{\baselineskip}{0pt} -\setlength{\textfloatsep}{0pt plus 0pt minus 0pt} -\setlength{\intextsep}{0pt plus 0pt minus 0pt} -\setlength{\floatsep}{0pt plus 0pt minus 0pt} -\newcommand{\cstableend}{ -\end{center} - %\end{table} -%\vspace*{\fill} -%\newpage -} - -\begin{document} - -\cstablestart - -\begin{tabular}{|l|>{\ttfamily}l|} - \hline - Create graph & RGFA.new \\ - \ldots from GFA file & RGFA.from\_file("filename")\\ - \ldots from string & string.to\_rgfa \\ - \ldots from string array (e.g.) & \verb/["H\tVN:i:1.0",/ \\ - & \hspace{2.3mm}\verb/"S\tA\t*\tLN:i:1200"].to_rgfa/ \\ - \hline - Write GFA to file & gfa.to\_file(filename) \\ - Write GFA to standard output & puts gfa \\ - Create deep copy & gfa.clone \\ - Output statistics (normal/compact) & puts gfa.info; puts gfa.info(true) \\ - \hline - Turn off validations & gfa.turn\_off\_validations \\ - Validate line references & gfa.validate! \\ - Enable progress logging & gfa.enable\_progress\_logging \\ - \hline - Name of all segments & gfa.segment\_names \\ - Name of all paths & gfa.path\_names \\ - All segments, links, paths, etc & gfa.segments; gfa.links; gfa.paths; \ldots \\ - Iterate over segments, links, etc & gfa.segments.each \verb/{|s|...}/ \\ - \hline - Find segment & gfa.segment(segment\_name) \\ - \ldots exception if does not exist & gfa.segment!(segment\_name) \\ - \hline - Find path & gfa.path(path\_name) \textrm{(or: }path!\textrm{)} \\ - All paths through segment & gfa.paths\_with(segment\_name) \\ - \hline - Find link & gfa.link(\verb/[:S1,:E]/,\verb/[:S2,:B]/) \textrm{(or: }link!\textrm{)} \\ - (or, if multiple may exist) & gfa.links\_between(\verb/[:S1,:E]/,\verb/[:S2,:B]/) \\ - All links of segment end & gfa.links\_of(\verb/[:S1,:E]/) \\ - (also segment instead of name) & - gfa.links\_of(\verb/[segment!(:S1),:E]/) \\ - Target of all links & gfa.neighbours(\verb/[:S1,:E]/) \\ - \hline - Find containment & gfa.containment(container,~contained)\\ - & gfa.containment!(container,~contained)\\ - (or, if multiple may exist) & gfa.containments\_between(c\_ner,~c\_ned)\\ - All containments for a segment & gfa.containing(contained)\\ - & gfa.contained\_in(container)\\ - \hline - Add line (examples) & gfa << "H\verb/\t/VZ:i:1.0" \\ - & gfa << "S\verb/\t/a\verb/\t/*\verb/\t/LN:i:1200" \\ - Rename segment or path & gfa.rename(:old, :new) \\ - \hline - Segment coverage & s.coverage\\ - Segment coverage (more accurate) & s.coverage(unit\_length:~avreadlen)\\ - Segment K-mer coverage & s.coverage(count\_tag:~:KC)\\ - Segment length & s.length\\ - Other end of a link & link.other\_end([s1,:E])\\ - Other end of other end of link & - link.other\_end([s1,:E])\\ - & \hspace{2.8cm}.revert\_end\_type\\ - \hline - Read req.field/tag value & segment.from; segment.LN \\ - \ldots raise if tag not available & segment.LN! \\ - \ldots tag string & segment.field\_to\_s(:LN) \\ - \hline - Set/create custom tag (ab, Z type) & segment.ab = "value" \\ - \ldots of i or B/i type & s.ab = 12; \ \ \ s.ab = [1,2,3]\\ - \ldots of f or B/f type & s.ab = 12.0; \ s.ab = [1.2,2.3,3.0] \\ - \ldots of J type (hash/array) & s.ab = \{"a" => 12\}; s.ab = ["a","b",1] \\ - \hline - -\end{tabular} - -\cstableend - -\cstablestart - -\begin{tabular}{|l|>{\ttfamily}l|} - \hline - Delete segment (and its links, etc) & gfa.rm("a") \\ - Delete path & gfa.rm("path1") \\ - Delete link/containment & gfa.rm(gfa.link(\ldots)) \\ - Delete all headers & gfa.rm(:headers) \\ - Delete sequences (set all to \texttt{*}) & gfa.rm(:sequences) \\ - \hline - \textit{(rm with a method)} &\\ - Delete links of segment end & gfa.rm(\verb/:links_of,[:S1,"E]/) \\ - Delete link targets & gfa.rm(\verb/:neighbours,[:S1,"E]/) \\ - Delete paths of segment & gfa.rm(\verb/:paths_with,:S1/) \\ - Delete segments contained in s & gfa.rm(\verb/:contained_in,:s/) \\ - Delete s1-E links except to s2-B & gfa.delete\_other\_links(\verb/[s1,:E],[s2,:B]/)\\ - \hline - Access headers field & gfa.header.xx \\ - Add new header field & gfa.header.add(:xx, 12)\\ - \hline - Sum of read counts & \verb/gfa.segments.map(&:RC).inject(:+)/ \\ - Highest coverage & \verb/gfa.segments.map(&:coverage).max/ \\ - Delete low coverage segments & \verb/gfa.rm(gfa.segments.select {|s|/ \\ - & \hspace{2.7cm}\verb/s.coverage < mincov })/ \\ - Delete isolated segments & \verb/gfa.rm(gfa.segments.select {|s|/ \\ - & \hspace{1cm}\verb/gfa.connectivity(s) == [0,0] })/ \\ - \hline - Muliply segment & gfa.multiply("A", 4) \\ - Detect linear paths & gfa.linear\_paths \\ - Detect and merge linear paths & gfa.merge\_linear\_paths \\ - Compute connected components & gfa.connected\_components \\ - Component of a segment & gfa.segment\_connected\_component(s) \\ - Split components & gfa.split\_connected\_components \\ - Number of dead ends & gfa.n\_dead\_ends \\ - \hline - \textit{(require "rgfatools")} & \\ - Muliply segment, distribute links & gfa.multiply("A", 4) \\ - Compute copy numbers & gfa.compute\_copy\_numbers \\ - Apply copy numbers & gfa.apply\_copy\_numbers \\ - Orient invertible segments & gfa.randomly\_orient\_invertibles \\ - Enforce mandatory links & gfa.enforce\_mandatory\_links \\ - Remove p-bubbles & gfa.remove\_p\_bubbles \\ - Remove small components & gfa.remove\_small\_components(minlen) \\ - \hline - \textit{(Command line tools)} & \\ - Compare two GFA files & gfadiff 1.gfa 2.gfa \\ - \ldots only segments and links & gfadiff -s -l 1.gfa 2.gfa \\ - \ldots output as ruby script & gfadiff -script 1.gfa 2.gfa \\ - Merge linear paths in graph & rgfa-mergelinear 2.gfa > 3.gfa \\ - \hline - \textit{(Case studies CLI tools)} & \\ - Simulate de Bruijn graph & rgfa-simdebruijn 27 gnm.fas > 1.gfa \\ - \ldots and find CRISPRs candidates & rgfa-findcrisprs 1.gfa \\ - \hline -\end{tabular} - -\cstableend - -\end{document} - diff --git a/doc/Makefile b/doc/Makefile index 100ad1d..2979230 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line. SPHINXOPTS = -SPHINXBUILD = sphinx-build +SPHINXBUILD = sphinx-build-3.3 PAPER = BUILDDIR = _build diff --git a/gfapy_doc/apidoc/gfapy.alignment.rst b/doc/apidoc/gfapy.alignment.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.alignment.rst rename to doc/apidoc/gfapy.alignment.rst diff --git a/gfapy_doc/apidoc/gfapy.field.Validator.rst b/doc/apidoc/gfapy.field.Validator.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.field.Validator.rst rename to doc/apidoc/gfapy.field.Validator.rst diff --git a/gfapy_doc/apidoc/gfapy.field.rst b/doc/apidoc/gfapy.field.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.field.rst rename to doc/apidoc/gfapy.field.rst diff --git a/gfapy_doc/apidoc/gfapy.line.comment.rst b/doc/apidoc/gfapy.line.comment.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.comment.rst rename to doc/apidoc/gfapy.line.comment.rst diff --git a/gfapy_doc/apidoc/gfapy.line.common.rst b/doc/apidoc/gfapy.line.common.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.common.rst rename to doc/apidoc/gfapy.line.common.rst diff --git a/gfapy_doc/apidoc/gfapy.line.custom_record.rst b/doc/apidoc/gfapy.line.custom_record.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.custom_record.rst rename to doc/apidoc/gfapy.line.custom_record.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.common.rst b/doc/apidoc/gfapy.line.edge.common.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.common.rst rename to doc/apidoc/gfapy.line.edge.common.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.containment.rst b/doc/apidoc/gfapy.line.edge.containment.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.containment.rst rename to doc/apidoc/gfapy.line.edge.containment.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.gfa1.rst b/doc/apidoc/gfapy.line.edge.gfa1.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.gfa1.rst rename to doc/apidoc/gfapy.line.edge.gfa1.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.gfa2.rst b/doc/apidoc/gfapy.line.edge.gfa2.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.gfa2.rst rename to doc/apidoc/gfapy.line.edge.gfa2.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.link.rst b/doc/apidoc/gfapy.line.edge.link.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.link.rst rename to doc/apidoc/gfapy.line.edge.link.rst diff --git a/gfapy_doc/apidoc/gfapy.line.edge.rst b/doc/apidoc/gfapy.line.edge.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.edge.rst rename to doc/apidoc/gfapy.line.edge.rst diff --git a/gfapy_doc/apidoc/gfapy.line.fragment.rst b/doc/apidoc/gfapy.line.fragment.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.fragment.rst rename to doc/apidoc/gfapy.line.fragment.rst diff --git a/gfapy_doc/apidoc/gfapy.line.gap.rst b/doc/apidoc/gfapy.line.gap.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.gap.rst rename to doc/apidoc/gfapy.line.gap.rst diff --git a/gfapy_doc/apidoc/gfapy.line.group.gfa2.rst b/doc/apidoc/gfapy.line.group.gfa2.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.group.gfa2.rst rename to doc/apidoc/gfapy.line.group.gfa2.rst diff --git a/gfapy_doc/apidoc/gfapy.line.group.ordered.rst b/doc/apidoc/gfapy.line.group.ordered.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.group.ordered.rst rename to doc/apidoc/gfapy.line.group.ordered.rst diff --git a/gfapy_doc/apidoc/gfapy.line.group.path.rst b/doc/apidoc/gfapy.line.group.path.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.group.path.rst rename to doc/apidoc/gfapy.line.group.path.rst diff --git a/gfapy_doc/apidoc/gfapy.line.group.rst b/doc/apidoc/gfapy.line.group.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.group.rst rename to doc/apidoc/gfapy.line.group.rst diff --git a/gfapy_doc/apidoc/gfapy.line.group.unordered.rst b/doc/apidoc/gfapy.line.group.unordered.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.group.unordered.rst rename to doc/apidoc/gfapy.line.group.unordered.rst diff --git a/gfapy_doc/apidoc/gfapy.line.header.rst b/doc/apidoc/gfapy.line.header.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.header.rst rename to doc/apidoc/gfapy.line.header.rst diff --git a/gfapy_doc/apidoc/gfapy.line.rst b/doc/apidoc/gfapy.line.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.rst rename to doc/apidoc/gfapy.line.rst diff --git a/gfapy_doc/apidoc/gfapy.line.segment.rst b/doc/apidoc/gfapy.line.segment.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.line.segment.rst rename to doc/apidoc/gfapy.line.segment.rst diff --git a/gfapy_doc/apidoc/gfapy.rst b/doc/apidoc/gfapy.rst similarity index 100% rename from gfapy_doc/apidoc/gfapy.rst rename to doc/apidoc/gfapy.rst diff --git a/gfapy_doc/apidoc/modules.rst b/doc/apidoc/modules.rst similarity index 100% rename from gfapy_doc/apidoc/modules.rst rename to doc/apidoc/modules.rst diff --git a/doc/conf.py b/doc/conf.py index b096a70..0ef3b4f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -37,6 +37,7 @@ # Napoleon napoleon_numpy_docstring = True +napoleon_google_docstring = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/doc/index.rst b/doc/index.rst index a92faaa..d2f0ec0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -6,16 +6,6 @@ Contents: .. toctree:: :maxdepth: 2 -.. automodule:: gfapy - :members: -.. autoclass:: gfapy.CIGAR - :members: -.. autoclass:: gfapy.Trace - :members: -.. autoclass:: gfapy.ByteArray - :members: -.. autoclass:: gfapy.Placeholder - :members: Indices and tables ================== diff --git a/gfapy_doc/Makefile b/gfapy_doc/Makefile deleted file mode 100644 index 2979230..0000000 --- a/gfapy_doc/Makefile +++ /dev/null @@ -1,225 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build-3.3 -PAPER = -BUILDDIR = _build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " epub3 to make an epub3" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " dummy to check syntax errors of document sources" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - -.PHONY: html -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/gfapy.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/gfapy.qhc" - -.PHONY: applehelp -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/gfapy" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/gfapy" - @echo "# devhelp" - -.PHONY: epub -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: epub3 -epub3: - $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 - @echo - @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." - -.PHONY: latex -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -.PHONY: xml -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -.PHONY: pseudoxml -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -.PHONY: dummy -dummy: - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." diff --git a/gfapy_doc/conf.py b/gfapy_doc/conf.py deleted file mode 100644 index 0ef3b4f..0000000 --- a/gfapy_doc/conf.py +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# gfapy documentation build configuration file, created by -# sphinx-quickstart on Sat Oct 22 18:27:28 2016. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -sys.path.insert(0, os.path.abspath('../')) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon' -] - -# Napoleon -napoleon_numpy_docstring = True -napoleon_google_docstring = True - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The encoding of source files. -# -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'gfapy' -copyright = '2016, Giorgio Gonnella, Tim Weber' -author = 'Giorgio Gonnella, Tim Weber' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '1.0' -# The full version, including alpha/beta/rc tags. -release = '1.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# -# today = '' -# -# Else, today_fmt is used as the format for a strftime call. -# -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'alabaster' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# -# html_title = 'gfapy v1.0' - -# A shorter title for the navigation bar. Default is the same as html_title. -# -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# -# html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# -# html_additional_pages = {} - -# If false, no module index is generated. -# -# html_domain_indices = True - -# If false, no index is generated. -# -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -# -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'gfapydoc' - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'gfapy.tex', 'gfapy Documentation', - 'Giorgio Gonnella, Tim Weber', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# -# latex_use_parts = False - -# If true, show page references after internal links. -# -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# -# latex_appendices = [] - -# It false, will not define \strong, \code, itleref, \crossref ... but only -# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added -# packages. -# -# latex_keep_old_macro_names = True - -# If false, no module index is generated. -# -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'gfapy', 'gfapy Documentation', - [author], 1) -] - -# If true, show URL addresses after external links. -# -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'gfapy', 'gfapy Documentation', - author, 'gfapy', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -# -# texinfo_appendices = [] - -# If false, no module index is generated. -# -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# -# texinfo_no_detailmenu = False diff --git a/gfapy_doc/index.rst b/gfapy_doc/index.rst deleted file mode 100644 index d2f0ec0..0000000 --- a/gfapy_doc/index.rst +++ /dev/null @@ -1,15 +0,0 @@ -Documentation for gfapy -======================= - -Contents: - -.. toctree:: - :maxdepth: 2 - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/gfapy_manual/alignments.md b/gfapy_manual/alignments.md deleted file mode 100644 index be423c1..0000000 --- a/gfapy_manual/alignments.md +++ /dev/null @@ -1,152 +0,0 @@ -## Alignments - -Some fields contain alignments and lists of alignments (L/C: overlap; P: -overlaps; E/F: alignment). If an alignment is not given, the placeholder symbol -```*``` is used instead. In GFA1 the alignments can be given as CIGAR strings, -in GFA2 also as Dazzler traces. - -Gfapy uses different classes (in module gfapy::Alignment) for representing the two -possible alignment styles (cigar strings and traces) and undefined alignments -(placeholders). - -### Creating an alignment - -An alignment instance is usually created from its GFA string representation -or from a list by using the ```gfapy.Alignment``` constructor. -If the argument is an alignment object it will be returned, -so that is always safe to call the method on a -variable which can contain a string or an alignment instance: - -```python -gfapy.Alignment("*") # => gfapy.AlignmentPlaceholder -gfapy.Alignment("10,10,10") # => gfapy.Trace -gfapy.Alignment([10,10,10]) # => gfapy.Trace -gfapy.Alignment("30M2I30M") # => gfapy.CIGAR -gfapy.Alignment(gfapy.Alignment("*")) -gfapy.Alignment(gfapy.Alignment("10,10")) -``` - -### Recognizing undefined alignments - -The ```gfapy.is_placeholder()``` method allows to understand if an alignment -field contains a defined value (cigar, trace) or not (placeholder). -The method works correctly with both alignment objects and their string -or list representation. - -```python -gfapy.is_placeholder(gfapy.Alignment("30M")) # => False -gfapy.is_placeholder(gfapy.Alignment("10,10")) # => False -gfapy.is_placeholder(gfapy.Alignment("*")) # => True -gfapy.is_placeholder("*") # => True -gfapy.is_placeholder("30M") # => False -gfapy.is_placeholder("10,10") # => True -gfapy.is_placeholder([]) # => True -gfapy.is_placeholder([10,10]) # => False -``` - -Note that, as a placeholder is False in boolean context, just a -```if not aligment``` will also work, if alignment is an alignment object, -but not if it is a string representation. - -### Reading and editing CIGARs - -CIGARs are represented by arrays of cigar operation objects. -Each cigar operation provides the properties ```length``` and -```code```. Length is the length of the CIGAR operation (int). -Code is one of the codes allowed by the GFA specification. - -```python -cigar = gfapy.Alignment("30M") -isinstance(cigar, list) # => True -operation = cigar[0] -type(operation) # => "gfapy.CIGAR.Operation" -operation.code # => "M" -operation.code = "D" -operation.length # => 30 -len(operation) # => 30 -str(operation) # => "30D" -``` - -The CIGAR object can be edited using the list methods. -If the array is emptied, its string representation will be ```*```. -```python -cigar = gfapy.Alignment("1I20M2D") -cigar[0].code = "M" -cigar.pop(1) -str(cigar) # => "1M2D" -cigar[:] = [] -str(Cigar) # => "*" -``` - -CIGARs consider one sequence as reference and another sequence -as query. The ```length_on_reference``` and ```length_on_query``` methods -compute the length of the alignment on the two sequences. -These methods are used by the library e.g. to convert GFA1 L lines to GFA2 -E lines (which is only possible if CIGARs are provided). - -```python -cigar = gfapy.Alignment("30M10D20M5I10M") -cigar.length_on_reference() # => 70 -cigar.length_on_query() # => 65 -``` - -#### Validation - -The ```validate``` method checks if all operations in a cigar use -valid codes and length values (which must be non-negative) -The codes can be M, I, D or P. For GFA1 the other codes are formally accepted -(no exception is raised), but their use is discouraged. -An error is raised in GFA2 on validation, if the other codes are used. - -```python -cigar = gfapy.Alignment("30M10D20M5I10M") -cigar.validate() # no exception raised -cigar[1].code = "L" -cigar.validate # raises an exception -cigar = gfapy.Alignment("30M10D20M5I10M") -cigar[1].code = "X" -cigar.validate(version="gfa1") # no exception raised -cigar.validate(version="gfa2") # exception raised -``` - -### Reading and editing traces - -Traces are arrays of non-negative integers. The values are interpreted -using a trace spacing value. If traces are used, a trace spacing value must be -defined in a TS integer tag, either in the header, or in the single lines -which contain traces. - -```python -gfa.header.TS # => the global TS value -gfa.line("x").TS # => an edge''s own TS tag -``` - -### Complement alignment - -CIGARs are dependent on which sequence is taken as reference and which is -taken as query. For each alignment, a complement CIGAR can be computed -(using the method ```complement```), which is the CIGAR obtained when the -two sequences are switched. This method is used by the library -e.g. to compare links, as they can be expressed in different ways, by -switching the two sequences. - -```python -cigar = gfapy.Alignment("2M1D3M") -str(cigar.complement()) # => "3M1I2M" -``` - -The current version of gfapy does not provide a way to compute the alignment in -gfapy, thus the trace information can be accessed and edited, but not used for -this purpose. Because of this there is currently no way in gfapy to compute a -complement trace (trace obtained when the sequences are switched). - -```python -trace = gfapy.Alignment("1,2,3") -str(trace.complement()) # => "*" -``` - -The complement of a placeholder is a placeholder: - -```python -str(gfapy.Alignment("*").complement()) # => "*" -``` diff --git a/gfapy_manual/chapters b/gfapy_manual/chapters deleted file mode 100644 index 2f6257c..0000000 --- a/gfapy_manual/chapters +++ /dev/null @@ -1,15 +0,0 @@ -introduction.md -rgfa.md -validation.md -positional_fields.md -placeholders.md -positions.md -alignments.md -tags.md -references.md -header.md -custom_records.md -comments.md -errors.md -graph_operations.md -extending_rgfa.md diff --git a/gfapy_manual/comments.md b/gfapy_manual/comments.md deleted file mode 100644 index 9d736cc..0000000 --- a/gfapy_manual/comments.md +++ /dev/null @@ -1,47 +0,0 @@ -## Comments - -GFA lines starting with a ```#``` symbol are considered comments. -In gfapy comments are represented by instances of ```gfapy.line.Comment```. -They have a similar interface to other line instances, -with some differences, e.g. they do not support tags. - -### Accessing the comments - -Adding a comment to a gfapy.Gfa instance is done similary to other lines, by using the -```add_line(line)``` method. The comments of a gfapy object can be accessed -using the ```comments``` method. This returns a list of comment line -instances. To remove a comment from the Gfa, you need to find the instance in -the list, and call ```disconnect()``` on it. - -```python -g.add_line("# this is a comment") -[str(c) for c in g.comments] # => ["# this is a comment"] -g.comments[0].disconnect() -g.comments # => [] -``` - -### Accessing the comment content - -The content of the comment line, excluding the initial +#+ and eventual -initial spacing characters, is included in the field +content+. - -The initial spacing characters can be read/changed using the +spacer+ -field. The default value is a single space. - -```python -g.add_line("# this is a comment") -c = g.comments[-1] -g.content # => "this is a comment" -g.spacer # => " " -``` - -Tags are not supported by comment lines. If the line contains tags, -these are nor parsed, but included in the +content+ field. -Trying to set tags values raises exceptions. - -```python -c = gfapy.Line.from_string("# this is not a tag\txx:i:1") -c.content # => "this is not a tag\txx:i:1" -c.xx # => None -c.xx = 1 # raises an exception -``` diff --git a/gfapy_manual/custom_records.md b/gfapy_manual/custom_records.md deleted file mode 100644 index 69d9c01..0000000 --- a/gfapy_manual/custom_records.md +++ /dev/null @@ -1,105 +0,0 @@ -## Custom records - -According to the GFA2 specification, each line which starts with -a non-standard record type shall be considered an user- or -program-specific record. - -Gfapy allows to retrieve custom records and access their data using a similar -interface to that for the predefined record types. It assumes that -custom records consist of tab-separated fields and that the first field -is the record type. - -Validation of custom records is very limited; therefore, if you work with custom -records, you may define your own validation method and call it when you read -or write custom record contents. - -### Retrieving, adding and deleting custom records - -The custom records of a Gfa instance can be retrieved using its -```custom_records``` property. This returns a list of all custom records, -regardless of the record type. - -To retrieve only the custom records of a given type use the method -```custom_records_of_type(record_type)```. - -```python -gfa.custom_records -gfa.custom_records_of_type("X") -``` - -Adding custom records to and removing them from a gfapy instance -is similar to any other line. So to delete a custom record, ```disconnect()``` -is called on the instance. To add a custom record line, the instance or its string representation -is added using ```add_line``` on the gfapy. - -```python -gfa.add_line("X\ta\tb") -gfa.custom_records("X")[-1].disconnect() -``` - -### Tags - -As gfapy cannot know how many positional fields are present when parsing custom -records, an heuristic approach is followed, to identify tags. -A field resembles a tag if it starts with ```tn:d:``` where ```tn``` is a valid -tag name and ```d``` a valid tag datatype (see Tags chapter). -The fields are parsed from the last to the first. As soon as a field is found -which does not resemble a tag, all remaining fields are considered positionals -(even if another field parsed later resembles a tag). - -```python -gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100") -x1 = gfa.custom_records("X")[-1] -x1.cc # => 10 -x1.dd # => 100 -gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100\te") -x2 = gfa.custom_records("X")[-1] -x1.cc # => None -x1.dd # => None -``` - -This parsing heuristics has some consequences on validations. Tags with an -invalid tag name (such as starting with a number, or with a wrong number of -letters), or an invalid tag datatype (wrong letter, or wrong number of letters) -are considered positional fields. The only validation available for custom -records tags is thus the validation of the content of the tag, which must -be valid according to the datatype. - -```python -gfa.add_line("X\ta\tb\tcc:i:10\tddd:i:100") -x = gfa.custom_records("X")[-1] -x.cc # => None -# (as ddd:i:100) is considered a positional field -``` - -### Positional fields - -The positional fields in a custom record are called ```"field1", "field2", ...```. -The user can iterate over the positional field names using the array obtained -by calling ```positional_fieldnames``` on the line. - -Positional fields are allowed to contain any character (including non-printable -characters and spacing characters), except tabs and newlines (as they are -structural elements of the line). - -Due to the parsing heuristics mentioned in the Tags section above, invalid -tags are sometimes wrongly taken as positional fields. Therefore, -the user is responsible of validating the number of positional fields. - -```python -gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100") -x = gfa.custom_records("X")[-1] -len(x.positional_fieldnames) # => 2 -x.positional_fieldnames # => ["a", "b"] -``` - -### Extensions - -The support for custom fields is limited, as gfapy does not know which and -how many fields are there and how shall they be validated. -It is possible to create an extension of gfapy, which defines new record -types: this will allow to use these record types in a similar way -to the built-in types. However, extending the library requires sligthly -more advanced programming than just using the predefined record types. -In the chapter Extending gfapy these extensions are discussed and an -example is made. diff --git a/gfapy_manual/errors.md b/gfapy_manual/errors.md deleted file mode 100644 index 3526607..0000000 --- a/gfapy_manual/errors.md +++ /dev/null @@ -1,32 +0,0 @@ -## Errors - -All exception raised in the library are subclasses of gfapy.Error. -This means that ```except gfapy.Error``` catches all library errors. - -Different types of errors are defined and are summarized in the following table: - -| Error | Description | Examples | -|------------------|------------------------------------------------------|----------------------------------------------------------| -| Version | An unknown or wrong version is specified or implied | "GFA0"; or GFA1 in GFA2 context | -| Value | The value of an object is invalid | a negative position is used | -| Type | The wrong type has been used or specified | Z instead of i used for VN tag; Hash for an i tag | -| Format | The format of an object is wrong | a line does not contain the expected number of fields | -| NotUnique | Something should be unique but is not | duplicated tag name or line identifier | -| Inconsistency | Pieces of information collide with each other | length of sequence and LN tag do not match | -| Runtime | The user tried to do something which is not allowed | editing from/to field in connected links | -| Argument | Problem with the arguments of a method | wrong number of arguments in dynamically created method | -| Assertion | Something unexpected happened | there is a bug in the library | - -Some error types are generic (such as RuntimeError and ArgumentError), and their -definition may overlap that of more specific errors (such as ArgumentError, -which overlaps ValueError and TypeError). -The user should not rely on the type of error alone, but -rather take it as an indication. The error message tries to be informative -and for this reason often prints information on the internal state of the -relevant variables. - -Assertion errors are reserved for those situation where something is implied -by the programmer (e.g. a value is implied to be positive at a certain point -of the code). It the checks fails, an assertion error is raised. -The user may report the problem, as this may indicate a bug (unless the user -did something he was not supposed to do, such as calling an API private method). diff --git a/gfapy_manual/extending_rgfa.md b/gfapy_manual/extending_rgfa.md deleted file mode 100644 index 49875af..0000000 --- a/gfapy_manual/extending_rgfa.md +++ /dev/null @@ -1,336 +0,0 @@ -## Extending RGFA - -The RGFA library is designed to be easily extended, although its extensions -requires more knowledge of the Ruby languange, than what is necessary for -merely using the library. - -The GFA2 format can be extended by defining new line types. These are handled -using the custom records functionality, but the support is limited: e.g. -validation, parsing of the field content, references to other lines and access -to fields by name are not possible. All this is made possible by extensions. - -### An example of user-specific record types - -This chapter gives an example on how to extend the -library to define an user-specific record type and custom field datatypes. -As an example, we will define a record type for metagenomics applications -with code M. This will have the role to define taxon-specific subgraphs, -by putting segments in relation with a taxon. The taxa themselves -will be declared in lines with code T: - -Each T line will contain: -- tid: a taxon ID -- name: an organism name (text field) -- the tags may contain an URL tag, which will point to a website, - describing the organism (UL tag, string) - -Each M line will contain: -- mid: an optional assignment ID -- tid: a taxon ID -- sid: a reference to a segment -- score: an optional Phred-style integer score, which will define an error - probability of the assignment of the segment to a taxon - -Here is an example of GFA containing the new line types: -``` -S A 1000 * -T B12_c -M 1 taxon:123 A 40 xx:Z:cjaks536 -M 2 taxon:123 B * xx:Z:cga5r5cs -S B 1000 * -M * B12_c B 20 -T taxon:123 UL:http://www.taxon123.com -``` - -### Subclassing RGFA::Line - -Defining a new record type for RGFA requires to create a new subclass of -the RGFA::Line class. -Thereby some constants must be defined: - -- ```RECORD_TYPE``` must contain the record type as symbol. -- ```POSFIELDS``` is an array of symbols, indicating the sequence - of positional fields in the record -- ```PREDEFINED_TAGS``` contain an array of predefined optional - tag names. -- ```DATATYPE``` is an hash. Each key is a symbol, either contained in - POSFIELDS or in PREDEFINED_TAGS. The value is a datatype symbol: - see the RGFA::Field module for a list of possible datatypes. -- ```NAME_FIELD``` is the field which contains the name of the line, if any -- ```STORAGE_KEY``` is the field which shall be used as a key for storing - references of the line in RGFA; for custom subclasses, - set it to ```:name``` if the line has a name field, to ```nil``` otherwise -- ```FIELD_ALIAS``` ia an hash which contain aliases to field names; - it may be empty -- ```REFERENCE_FIELDS``` is a list of fields which contain references - (or arrays of references) to other lines. The references may contain - an orientation. -- ```BACKREFERENCE_RELATED_FIELDS``` is a list of fields which shall - not be changed in a connected line without potentially invaliding - backreferences to the line. In the predefined line types, these are - the fields containing match coordinates in GFA2 edges (as they change their - nature as internal, dovetails or containments) and the orientation and overlap - fields in GFA1 links. -- ```DEPENDENT_LINES``` and ```OTHER_REFERENCES``` are lists - of names of references collections, which will - contain backreferences to other line types (which refer the line type in their - fields). E.g. for a segment, the list contain the ```:fragments``` symbol, - indicating that a collection - shall be initialized, which will contain backreferences to the fragments - which reference the segment. - Disconnection is cascaded to lines in the collections named in - DEPENDENT_LINES but not to those named in OTHER_REFERENCES. - -For our example, we will define the subclasses for record types T and M. - -```ruby -class RGFA::Line::Taxon < RGFA::Line - - RECORD_TYPE = :T - POSFIELDS = [:tid, :desc] - PREDEFINED_TAGS = [:UL] - DATATYPE = { - :tid => :identifier_gfa2, - :desc => :Z, - :UL => :Z, - } - NAME_FIELD = :tid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:metagenomic_assignments] - OTHER_REFERENCES = [] - - apply_definitions - -end - -class RGFA::Line::MetagenomicAssignment < RGFA::Line - - RECORD_TYPE = :M - POSFIELDS = [:mid, :tid, :sid, :score] - PREDEFINED_TAGS = [] - DATATYPE = { - :mid => :optional_identifier_gfa2, - :tid => :identifier_gfa2, - :sid => :identifier_gfa2, - :score => :optional_integer, - } - NAME_FIELD = :mid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [:tid, :sid] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - -end -``` - -### Enabling the references - -If reference fields have been defined (as in the previous example of M, where -tid is a reference to a taxon line and sid is a reference to a segment line), -a private ```initialize_references``` -method shall be provided, which is called when a line of the type is connected -to a RGFA instance. - -In particular, the method shall change all identifiers in the reference -fields into references to lines in the GFA (either existing lines or -virtual lines, which is the way RGFA handles forward-pointing references). - -If the referenced line is not yet available, but it may be defined by -the GFA at a later time, the method will create a virtual line. -In our example, we know that the reference is to a segment or a taxon line. -If we would not know that we would instantiate RGFA::Line::Unknown. - -When the field content itself is a reference, the content cannot be -changed directly (using set would raise an exception, as the line is -already connected when the initialize_referneces method is called). -Therefore, the private line method set_existing_field shall be used, -with ```set_reference: true```. If the reference field contains -an oriented line or an array instead, references can be edited directly. - -```ruby -class RGFA::Line::MetagenomicAssignment - - def initialize_references - s = @rgfa.segment(sid) - if s.nil? - s = RGFA::Line::Segment::GFA2.new([sid.to_s, "1", "*"], - virtual: true, version: :gfa2) - s.connect(@rgfa) - end - set_existing_field(:sid, s, set_reference: true) - s.add_reference(self, :metagenomic_assignments) - - t = @rgfa.line(tid) - if t.nil? - t = RGFA::Line::Taxon.new([tid.to_s, ""], - virtual: true, version: :gfa2) - t.connect(@rgfa) - end - set_existing_field(:tid, t, set_reference: true) - t.add_reference(self, :metagenomic_assignments) - end - private :initialize_references - -end -``` - -The method defined backreferences to the new line in the -segment and taxon instances, using :metagenomic_assignments as name for the collection -of backreferences in S or T lines to lines of type M. For taxa, this collection -has been defined in the class definition above. For segments, we will need to -add this collection to the segment definition and redefine the reference getters -methods. As lines of type M will be dependent on S lines -(ie they shall be deleted if the referred segment line is deleted), we will -add it to the DEPENDENT_LINES list. In case of no dependency, we would use the -OTHER_REFERENCES list instead. - -```ruby -class RGFA::Line::Segment::GFA2 - DEPENDENT_LINES << :metagenomic_assignments - define_reference_getters -end -``` - -### Recognizing the record type code - -When parsing lines starting with the code for the new record type, -we want RGFA to return an instance of the correct subclass of Line. - -To obtain this, the ```subclass``` class Method of ```RGFA::Line``` must -be extended to handle the new record_type symbol, for GFA2 or -unknown version records. It must return a class (the new subclass of RGFA::Line). -The new record symbols must also be added to the gfa2 specific -symbols list in ```RECORD_TYPE_VERSIONS[:specific][:gfa2]```. - -In our example the method ```subclass``` will be patched as follows: - -```ruby -class RGFA::Line - class << self - alias_method :orig_subclass, :subclass - def subclass_GFA2(record_type, version: nil) - if version.nil? or version == :gfa2 - case record_type.to_sym - when :M then return RGFA::Line::MetagenomicAssignment - when :T then return RGFA::Line::Taxon - end - end - orig_subclass(record_type, version: version) - end - end - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :M - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :T -end -``` - -### Allowing to find records - -Both record types T and M define a name field. This allows to find record of -the types using the ```line()``` method of the ```RGFA``` class, as well as -allowing to replace virtual T lines created while parsing M lines, with real T -lines, when these are found. For this to work, the codes must be added to the -list ```RECORDS_WITH_NAME``` of the ```RGFA``` class: - -```ruby -RGFA::RECORDS_WITH_NAME << :T -RGFA::RECORDS_WITH_NAME << :M -``` - -### Defining a field datatype - -When new subclasses of line are created, it may be necessary or useful to -create new datatypes for its fields. For example, we used :identifier_gfa2 for -the tid field in the M and T records. However, we could made the field syntax -stricter, and require that the content of the field must be either a reference -to the NCBI taxonomy database or a custom identifier. In the first case, it -will need to be in the form ```taxon:```, where `````` is a positive -integer. In the second case, it will need to be a combination of letters, -numbers and underscores (thereby ```:``` will not be allowed). - -A module must be created, which handles the parsing and writing of fields with -the new datatype. -The module shall define six module functions -(see the API documentation of the RGFA::Field module for more detail). -Decode and unsafe_decode take a string as -argument and return an appropriate Ruby object. Encode and unsafe_encode take -a string representation or another ruby object and converts it into the correct -string representation. Validate_encoded validates the string representation. -Validate_decoded validates a non-string content of the field. The unsafe -version of the decode and encode methods may provide faster results and are -used if the parameters are guaranteed to be valid. The safe version must check -the validity of the provided data. - -```ruby -module RGFA::Field::TaxonID - - def validate_encoded(string) - if string !~ /^taxon:(\d+)$/ and string !~ /^[a-zA-Z0-9_]+$/ - raise RGFA::ValueError, "Invalid taxon ID: #{string}" - end - end - module_function :validate_encoded - - def unsafe_decode(string) - string.to_sym - end - module_function :unsafe_decode - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - module_function :decode - - def validate_decoded(object) - case object - when RGFA::Line::Taxon - validate_encoded(object.name.to_s) - when Symbol - validate_encoded(object.to_s) - else - raise RGFA::TypeError, - "Invalid type for taxon ID: #{object.inspect}" - end - end - module_function :validate_decoded - - def unsafe_encode(object) - object = object.name if object.kind_of?(RGFA::Line::Taxon) - object.to_s - end - module_function :unsafe_encode - - def encode(object) - validate_decoded(object) - unsafe_encode(object) - end - module_function :encode - -end -``` - -The new datatype must have a symbol which identifies it. The symbol must be -added to the ```GFA2_POSFIELD_DATATYPE``` list of the ```RGFA::Field``` module. -An entry must be added to the ```RGFA::Field::FIELD_MODULE``` -hash, where the symbol of the new datatype is the key and the value is the -module. - -```ruby -RGFA::Field::GFA2_POSFIELD_DATATYPE << :taxon_id -RGFA::Field::FIELD_MODULE[:taxon_id] = RGFA::Field::TaxonID -``` - -Now the new datatype can be put into use by changing the datatype for the tid -fields of the M and T lines: - -```ruby -RGFA::Line::Taxon::DATATYPE[:tid] = :taxon_id -RGFA::Line::MetagenomicAssignment::DATATYPE[:tid] = :taxon_id -``` diff --git a/gfapy_manual/gfapy-manual.pdf b/gfapy_manual/gfapy-manual.pdf deleted file mode 100644 index 21e607b..0000000 Binary files a/gfapy_manual/gfapy-manual.pdf and /dev/null differ diff --git a/gfapy_manual/graph_operations.md b/gfapy_manual/graph_operations.md deleted file mode 100644 index 9193c7c..0000000 --- a/gfapy_manual/graph_operations.md +++ /dev/null @@ -1,8 +0,0 @@ -## Graph operations - -Graph operations such as linear paths merging, -multiplication of segments and other are provided. -These operations are similar to those provided by the -RGFA library: -please refer to the RGFA paper (Gonnella and Kurtz, 2016) -for a description and to the API documentation. diff --git a/gfapy_manual/header.md b/gfapy_manual/header.md deleted file mode 100644 index 0362ffe..0000000 --- a/gfapy_manual/header.md +++ /dev/null @@ -1,126 +0,0 @@ -## The Header - -GFA files may contain one or multiple header lines (record type: "H"). These -lines may be present in any part of the file, not necessarily at the beginning. - -Although the header may consist of multiple lines, its content refers to the -whole file. Therefore in gfapy the header is accessed using a single line -instance (accessible by the ```header``` method). Header lines contain only -tags. If not header line is present in the Gfa, then the header line object -will be empty (i.e. contain no tags). - -Note that header lines cannot be connected to the gfapy as other lines -(i.e. calling ```connect``` on them raises an exception). Instead they -must be merged to the existing Gfa header, using -```add_line(line)``` on the gfa instance. - -```python -gfapy.Line.from_string("H\tnn:f:1.0").connect(gfa) # exception -gfa.add_line("H\tnn:f:1.0") # this works! -gfa.header.nn # => 1.0 -``` - -### Multiple definitions of the predefined header tags - -For the predefined tags (```VN``` and ```TS```), the presence of multiple -values in different lines is an error, unless the value is the same in each -instance (in which case the repeated definitions are ignored). - -```python -gfa.add_line("H\tVN:Z:1.0") -gfa.add_line("H\tVN:Z:1.0") # ignored -gfa.add_line("H\tVN:Z:2.0") # exception! -``` - -### Multiple definitions of custom header tags - -If the tags are present only once in the header in its entirety, the access to -the tags is the same as for any other line (see Tags chapter). - -However, the specification does not forbid custom tags to be defined with -different values in different header lines (which we name -"multi-definition tags"). This particular case is handled in the next -sections. - -### Reading multi-definitions tags - -Reading, validating and setting the datatype of multi-definition tags is -done using the same methods as for all other lines (see Tags chapter). -However, if a tag is defined multiple times on multiple H lines, reading -the tag will return a list of the values on the lines. This array is an -instance of the subclass ```gfapy.FieldArray``` of list. - -```python -gfa.add_line("H\txx:i:1") -gfa.add_line("H\txx:i:2") -gfa.add_line("H\txx:i:3") -gfa.header.xx # => gfapy.FieldArray("i", [1,2,3]) -``` - -### Setting tags - -There are two possibilities to set a tag for the header. The first is the -normal tag interface (using ```set``` or the tag name property). The second -is to use ```add```. The latter supports multi-definition tags, i.e. it -adds the value to the previous ones (if any), instead of overwriting them. - -```python -gfa.header.xx # => None -gfa.header.add("xx", 1) -gfa.header.xx # => 1 -gfa.header.add("xx", 2) -gfa.header.xx # => gfapy.FieldArray("i", [1,2]) -gfa.header.set("xx", 3) -gfa.header.xx # => 3 -``` - -### Modifying field array values - -Field arrays can be modified directly (e.g. adding new values or removing some -values). After modification, the user may check if the array values -remain compatible with the datatype of the tag using the ```validate_field``` -method. - -```python -gfa.header.xx # => gfapy.FieldArray([1,2,3]) -gfa.header.validate_field("xx") # => True -gfa.header.xx.append("X") -gfa.header.validate_field("xx") # => False -``` - -If the field array is modified using array methods which return a list or data -of any other type, a field array must be constructed, setting its -datatype to the value returned by calling ```get_datatype(tagname)``` -on the header. - -```python -gfa.header.xx # => gfapy.FieldArray([1,2,3]) -gfa.header.xx = gfa.FieldArray(gfa.header.get_datatype("xx"), - map(lambda x: x+1, gfa.header.xx)) -gfa.header.xx # => gfapy.FieldArray([2,3,4]) -``` - -### String representation of the header - -For consinstency with other line types, the string representation of -the header is a single-line string, eventually non standard-compliant, -if it contains multiple instances of the tag. -(and when calling ```field_to_s(tag)``` for a tag present multiple -times, the output string will contain the instances of the tag, separated by -tabs). - -However, when the gfapy is output to file or string, the header is -splitted into multiple H lines with single tags, so that standard-compliant GFA -is output. The splitted header can be retrieved using the ```headers``` method -on the Gfa instance. - -```python -gfa.header.field_to_s("xx") # => "xx:i:1\txx:i:2" -str(gfa.header) # => "H\tVN:Z:1.0\txx:i:1\txx:i:2" -[str(h) for h in gfa.headers] # => ["H\tVN:Z:1.0", "H\txx:i:1", "H\txx:i:2"] -str(gfa) # => """ - H VN:Z:1.0 - H xx:i:1 - H xx:i:2 - """ -``` diff --git a/gfapy_manual/introduction.md b/gfapy_manual/introduction.md deleted file mode 100644 index ed485f7..0000000 --- a/gfapy_manual/introduction.md +++ /dev/null @@ -1,20 +0,0 @@ -# gfapy - -gfapy is a python library for working with GFA files. It allows to parse, -validate, edit and write GFA files. - -This manual explains how to access the information in GFA files using the -library. It is completed by the more technical API library, which documents -each class, method and constant defined by the library. - -A test suite makes sure that the functionality described by this manual also -works as intented. However, if this is not the case, please report any bug -using the Github issues tracked (https://github.com/ggonnella/gfapy/issues). - -## GFA specifications - -The library is based on the official GFA specifications version -1 and 2, available at https://github.com/GFA-spec/GFA-spec. -See the Versions chapter for an overview of the differences of the two versions -and methods for the conversion from one version to the other. - diff --git a/gfapy_manual/placeholders.md b/gfapy_manual/placeholders.md deleted file mode 100644 index b7b47fb..0000000 --- a/gfapy_manual/placeholders.md +++ /dev/null @@ -1,38 +0,0 @@ -## Placeholders - -Some positional fields may contain an undefined value S: ```sequence```; -L/C: ```overlap```; P: ```overlaps```; E: ```eid```, ```alignment```; -F: ```alignment```; G: ```gid```, ```var```; U/O: ```pid```. -In GFA this value is represented by a ```*```. - -In gfapy instances of the class RGFA::Placeholder (and its subclasses) represent -the undefined value. - -### Distinguishing placeholders - -The method ```gfapy.is_placeholder()```` checks if a value is or would -be represented by a placeholder in GFA (such as an empty array, or -a string containing "*"). - -```python -gfapy.is_placeholder("*") # => True -gfapy.is_placeholder("**") # => False -gfapy.is_placeholder([]) # => True -gfapy.is_placeholder(gfapy.Placeholder()) # => True -``` - -Note that, as a placeholder is False in boolean context, just a -```if not placeholder``` will also work, if placeholder is a gfa.Placeholder() -but not if it is a string representation. - -### Compatibility methods - -Some methods are defined for placeholders, which allow them to respond to the -same methods as defined values. This allows to write generic code. - -```python -placeholder.validate() # does nothing -len(placeholder) # => 0 -placeholder[1] # => gfapy.Placeholder() -placeholder + anything # => gfapy.Placeholder() -``` diff --git a/gfapy_manual/positional_fields.md b/gfapy_manual/positional_fields.md deleted file mode 100644 index 4fbdd84..0000000 --- a/gfapy_manual/positional_fields.md +++ /dev/null @@ -1,308 +0,0 @@ -## Positional fields - -Most lines in GFA have positional fields (Headers are an exception). -During parsing, if a line is encountered, which has too less or too many -positional fields, an exception will be thrown. -The correct number of positional fields is record type-specific. - -Positional fields are recognized by its position in the line. -Each positional field has an implicit field name and datatype associated -with it. - -### Field names - -The field names are derived from the specification. Lower case versions -of the field names are used and spaces are subsituted with underscores. -In some cases, the field names were changed, as they represent keywords -in common programming languages (```from```, ```send```). - -The following tables shows the field names used in gfapy, for each kind of line. -Headers have no positional fields. Comments and custom lines follow particular -rules, see the respective chapters. - -#### GFA1 field names - -| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 | -|-------------|--------------------|---------------------|------------------|-----------------|---------------|---------------| -| Segment | ```name``` | ```sequence``` | | | | | -| Link | ```from_segment``` | ```from_orient``` | ```to_segment``` | ```to_orient``` | ```overlap``` | | -| Containment | ```from_segment``` | ```from_orient``` | ```to_segment``` | ```to_orient``` | ```pos``` | ```overlap``` | -| Path | ```path_name``` | ```segment_names``` | ```overlaps``` | | | | - -#### GFA2 field names - -| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 | Field 7 | Field 8 | -|-------------|-----------|----------------|----------------|-------------|-------------|-------------|-----------------|-----------------| -| Segment | ```sid``` | ```slen ``` | ```sequence``` | | | | | | -| Edge | ```eid``` | ```sid1 ``` | ```sid2 ``` | ```beg1 ``` | ```end1 ``` | ```beg2 ``` | ```end2 ``` | ```alignment``` | -| Fragment | ```sid``` | ```external``` | ```s_beg ``` | ```s_end``` | ```f_beg``` | ```f_end``` | ```alignment``` | | -| Gap | ```gid``` | ```sid1 ``` | ```d1 ``` | ```d2 ``` | ```sid2 ``` | ```disp ``` | ```var ``` | | -| Set | ```pid``` | ```items ``` | | | | | | | -| Path | ```pid``` | ```items ``` | | | | | | | - -### Datatypes - -The datatype of each positional field is described in the specification and -cannot be changed (differently from tags). Here is a short description of the -Python classes used to represent data for different datatypes. For some -complex cases, more details are found in the following chapters. - -#### Placeholders - -The positional fields in GFA can never be empty. However, there are some -fields with optional values. If a value is not specified, a placeholder -character is used instead (```*```). Such undefined values are represented -in gfapy by the gfapy.Placeholder class, which is described more in detail in the -Placeholders chapter. - -#### Arrays - -The ```items``` field in unordered and ordered groups -and the ```segment_names``` and ```overlaps``` fields in paths are -lists of objects and are represented by list instances. - -```python -type(set.items) # => "list" -type(gfa2_path.items) # => "list" -type(gfa1_path.segment_names) # => "list" -type(gfa1_path.overlaps) # => "list" -``` - -#### Orientations - -Orientations are represented by strings. The ```gfapy.invert``` method -applied to an orientation string returns the other orientation. -```python -gfapy.invert("+") # => "-" -gfapy.invert("-") # => "+" -``` - -#### Identifiers - -The identifier of the line itself (available for S, P, E, G, U, O lines) -can always be accessed in gfapy using the ```name``` alias and is represented -in gfapy by a Symbol. If it is optional (E, G, U, O lines) -and not specified, it is represented by a Placeholder instance. -The fragment identifier is also a Symbol. - -Identifiers which refer to other lines are also present in some line types -(L, C, E, G, U, O, F). These are never placeholders and in stand-alone lines -are represented by strings. In connected lines they are references to the Line -instances to which they refer to (see the References chapter). - -#### Oriented identifiers - -Oriented identifiers (e.g. ```segment_names``` in GFA1 paths) -are represented by elements of the class -```gfapy::OrientedLine```. The ```segment``` method of the oriented -segments returns the segment identifier (or segment reference in connected -path lines) and the ```orient``` method returns the orientation string. -The ```name``` method returns the string of the segment, even if this is -a reference to a segment. -A new oriented line can be created using the ```OL[line, orientation]``` method. - -Calling ```invert``` returns an oriented segment, with inverted orientation. -To set the two attributes the methods ```segment=``` and ```orient=``` -are available. - -Examples: -```python -p = "P\tP1\ta+,b-\t*".to_rgfa_line -p.segment_names # => [OrientedLine(:a,:+),OrientedLine(:b,:-)] -p[0].segment # => :a -p[0].name # => :a -p[0].orient # => :+ -p[0].invert # => OrientedLine(:a,:-) -p[0].orient = :- -p[0].segment = "S\tX\t*".to_rgfa_line -p[0] # => OrientedLine(gfapy::Line("S\tX\t*"), :-) -p[0].name # => :X -p[0] = OL[gfapy::Line("S\tY\t*"), :+] -``` - -#### Sequences - -Sequences (S field sequence) are represented by strings in gfapy. -Depending on the GFA version, the alphabet definition is more or less -restrictive. The definitions are correctly applied by the validation methods. - -The method ```rc``` is provided to compute the reverse complement of a nucleotidic -sequence. The extended IUPAC alphabet is understood by the method. Applied to -non nucleotidic sequences, the results will be meaningless: -```python -"gcat".rc # => "atgc" -"*".rc # => "*" (placeholder) -"yatc".rc # => "gatr" (wildcards) -"gCat".rc # => "atGc" (case remains) -"ctg".rc(rna: true) # => "cug" -``` - -#### Integers and positions - -The C lines ```pos``` field and the G lines ```disp``` and ```var``` fields -are represented by integers. The ```var``` field is optional, -and thus can be also a placeholder. Positions are 0-based coordinates. - -The position fields of GFA2 E lines (```beg1, beg2, end1, end2```) and -F lines (```s_beg, s_end, f_beg, f_end```) contain a dollar string as suffix -if the position is equal to the segment length. For more information, -see the Positions chapter. - -#### Alignments - -Alignments are always optional, ie they can be placeholders. If they are -specified they are CIGAR alignments or, only in GFA2, trace alignments. -For more details, see the Alignments chapter. - -#### GFA1 datatypes - -| Datatype | Record Type | Fields | -|--------------------------|-------------|---------------------------------| -| Identifier | Segment | ```name ``` | -| | Path | ```path_name ``` | -| | Link | ```from_segment, to_segment ``` | -| | Containment | ```from_segment, to_segment ``` | -| [OrientedIdentifier] | Path | ```segment_names ``` | -| Orientation | Link | ```from_orient, to_orient ``` | -| | Containment | ```from_orient, to_orient ``` | -| Sequence | Segment | ```sequence ``` | -| Alignment | Link | ```overlap ``` | -| | Containment | ```overlap ``` | -| [Alignment] | Path | ```overlaps ``` | -| Position | Containment | ```pos ``` | - -#### GFA2 datatypes - -| Datatype | Record Type | Fields | -|--------------------------|-------------|----------------------------------| -| Itentifier | Segment | ```sid ``` | -| | Fragment | ```sid ``` | -| OrientedIdentifier | Edge | ```sid1, sid2 ``` | -| | Gap | ```sid1, sid2 ``` | -| | Fragment | ```external ``` | -| OptionalIdentifier | Edge | ```eid ``` | -| | Gap | ```gid ``` | -| | U Group | ```oid ``` | -| | O Group | ```uid ``` | -| [Identifier] | U Group | ```items ``` | -| [OrientedIdentifier] | O Group | ```items ``` | -| Sequence | Segment | ```sequence ``` | -| Alignment | Edge | ```alignment ``` | -| | Fragment | ```alignment ``` | -| Position | Edge | ```beg1, end1, beg2, end2 ``` | -| | Fragment | ```s_beg, s_end, f_beg, f_end``` | -| Integer | Gap | ```disp, var ``` | - -### Reading and writing positional fields - -The ```positional_fieldnames``` method returns the list of the names -(as strings) of the positional fields of a line. -The positional fields can be read using a method on the gfapy line object, which -is called as the field name. Setting the value is done with an equal sign -version of the field name method (e.g. segment.slen = 120). In alternative, -the ```set(fieldname, value)``` and ```get(fieldname)``` methods can also be -used. - -```python -s_gfa1.positional_fieldnames # => ["name", "sequence"] -s_gfa1.name # => "segment1" -s_gfa1.get("name") # => "segment3" -s_gfa1.name = "segment2" -s_gfa1.name # => "segment2" -s_gfa1.set("name", "segment3") -s_gfa1.name = "segment3" - -``` - -When a field is read, the value is converted into an appropriate object. The -string representation of a field can be read using the -```field_to_s(fieldname)``` method. - -```python -link.from_segment # => gfapy.line.segment.GFA1("S\ts1\t*") -link.field_to_s(from_segment) # => ("s1") -``` - -When setting a non-string field, the user can specify the value of a tag -either as a Python non-string object, or as the string representation of the -value. - -```python -c.pos = 1 -c.pos = "1" -c.pos # => 1 -c.field_to_s("pos") # => "1" -``` - -Note that setting the value of reference and backreferences-related -fields is generally not allowed, when a line instance is connected to a -gfapy object (see the References chapter). - -```python -s = gfa.Line.from_string("L\ts1\t+\ts2\t-\t*") -s.from_segment = "s3" -gfa.add_line(s) -s.from_segment = "s4" # raises an exception -``` - -### Validation - -The content of all positional fields must be a correctly formatted -string according to the rules given in the GFA specifications (or a Python object -whose string representation is a correctly formatted string). - -Depending on the validation level, more or less checks are done automatically -(see the Validation chapter). Not regarding which validation level is selected, -the user can trigger a manual validation using the -```validate_field(fieldname)``` method for a single field, or using -```validate```, which does a full validation on the whole line, including all -positional fields. - -```python -line.validate_field("xx") -line.validate() -``` - -### Aliases - -For some fields, aliases are defined, which can be used in all contexts -where the original field name is used (i.e. as parameter of a method, and -the same setter and getter methods defined for the original field name are -also defined for each alias, see below). - -```python -gfa1_path.name == gfa1_path.path_name # True -edge.eid == edge.name # True -segment.sid == segment.name # True -containment.from_segment == containment.container # True - -s = gfapy.Line.from_string("S\t1\t*") -s.sid # => "1" -s.name = "a" -s.sid # => "a" -``` - -#### Name - -Different record types have an identifier field: -segments (name in GFA1, sid in GFA2), paths (path_name), edge (eid), -fragment (sid), gap (gid), groups (pid). - -All these fields are aliased to ```name```. This allows the user -for example to set the identifier of a line using the -```name=(value)``` method using the same syntax for different record -types (segments, edges, paths, fragments, gaps and groups). - -#### Version-specific field names - -For segments the GFA1 name and the GFA2 sid are equivalent -fields. For this reason an alias ```sid``` is defined for GFA1 segments -and ```name``` for GFA2 segments. - -#### Crypical field names - -The definition of from and to for containments is somewhat cryptical. -Therefore following aliases have been defined for containments: -container[_orient] for from[_|segment|orient]; contained[_orient] -for to[_segment|orient]. - diff --git a/gfapy_manual/positions.md b/gfapy_manual/positions.md deleted file mode 100644 index 33ab569..0000000 --- a/gfapy_manual/positions.md +++ /dev/null @@ -1,52 +0,0 @@ -## Position fields - -The only position field in GFA1 is the ```pos``` field in the -C lines. This represents the starting position of the contained segment -in the container segment and is 0-based. - -Some fields in GFA2 E lines (```beg1, beg2, end1, end2```) and -F lines (```s_beg, s_end, f_beg, f_end```) are positions. -According to the specification, they are 0-based and represent -virtual ticks before and after each string in the sequence. -Thus ranges are represented similarly to the Python range conventions: -e.g. a 1-character prefix of a sequence will have begin 0 and end 1. - -### GFA2 last position string - -The GFA2 positions must contain an additional string (```$```) appended to the -integer, if (and only if) they are the last position in the segment sequence. -These particular positions are represented in gfapy as instances of the class -```gfapy.LastPos```. - -To create a lastpos instance, the constructor can be used with -an integer, or the string representation (which must end with the dollar sign, -otherwise an integer is returned): -```python -str(gfapy.LastPos(12)) # => "12$" -gfapy.LastPos("12") # => 12 -str(gfapy.LastPos("12")) # => "12" -gfapy.LastPos("12$") # => gfapy.LastPos(12) -str(gfapy.LastPos("12$")) # => "12$" -``` - -Subtracting an integer from a lastpos returns a lastpos if 0 subtracted, -an integer otherwise. This allows to do some arithmetic on positions -without making them invalid. - -```python -gfapy.LastPos(12) - 0 # => gfapy.LastPos(12) -gfapy.LastPos(12) - 1 # => 11 -``` - -The functions ```gfapy.islastpos``` and ``isfirstpos``` -allow to determine if a position value is 0 (first), or the -last position, using the same syntax for lastpos and integer instances. - -```python -gfapy.isfirst(0) # True -gfapy.islast(0) # False -gfapy.isfirst(12) # False -gfapy.islast(12) # False -gfapy.islast(gfapy.LastPos("12")) # False -gfapy.islast(gfapy.LastPos("12$")) # True -``` diff --git a/gfapy_manual/references.md b/gfapy_manual/references.md deleted file mode 100644 index 8531886..0000000 --- a/gfapy_manual/references.md +++ /dev/null @@ -1,323 +0,0 @@ -## References - -Some fields in GFA lines contain identifiers or lists of identifiers -(sometimes followed by orientation strings), which reference -other lines of the GFA file. In gfapy it is possible to follow these -references and traverse the graph. - -### Connecting a line to a gfapy object - -In stand-alone line instances, the identifiers which reference -other lines are either strings containing the line name, pairs -of strings (name and orientation) in a gfapy.OrientedLine object, -or lists of lines names or gfapy.OrientedLine objects. - -Using the ```add_line(line)``` (alias: ```append(line)```) method of the -gfapy.Gfa object, or the equivalent ```connect(gfa)``` method of the gfapy.Line -instance, a line is added to a Gfa instance (this is done automatically when a -GFA file is parsed). All strings expressing references are then changed into -references to the corresponding line objects. The method ```is_connected()``` -allows to determine if a line is connected to an gfapy instance. The read-only -property ```gfa``` allows to find the gfapy.Gfa instance to which the line is -connected. - -```python -link.is_connected() # => False -link.gfa # => None -link.from_segment # => "A" -link.connect(gfa) # or gfa.add_line(link); or gfa.append(link) -link.is_connected() # => True -link.gfa # => gfapy.Gfa(...) -link.from_segment # => gfapy.Segment("S\tA\t*", ...) -``` - -### References for each record type - -The following tables describes the references contained in each record type. -The notation ```[]``` represent lists. - -#### GFA1 - -| Record type | Fields | Type of reference | -|-------------|----------------|-------------------------| -| Link | from, to | Segment | -| Containment | from, to | Segment | -| Path | segment_names, | [OrientedLine(Segment)] | -| | links (1) | [OrientedLine(Link)] | - -(1): paths contain information in the fields segment_names and overlaps, -which allow to find the identify from which they depend; these links can be -retrieved using ```links``` (which is not a field). - -#### GFA2 - -| Record type | Fields | Type of reference | -|-------------|---------------|----------------------------------| -| Edge | sid1, sid2 | Segment | -| Gap | sid1, sid2 | Segment | -| Fragment | sid | Segment | -| Set | items | [Edge/Set/Path/Segment] | -| Path | items | [OrientedLine(Edge/Set/Segment)] | - -### Backreferences for each record type - -When a line containing a reference to another line is connected to a gfapy -object, backreferences to it are created in the targeted line. - -For each backreference collection a read-only property exist, which is named -as the collection (e.g. ```dovetails_L``` for segments). Note that -the reference list returned by these arrays are read-only and editing -the references is done using other methods (see the section -"Editing reference fields" below). - -```python -segment.dovetails_L # => [gfapy.line.edge.Link(...), ...] -``` - -The following tables describe the backreferences collections for each record -type. - -#### GFA1 - -| Record type | Backreferences | Type | -|-------------|----------------------------- -| Segment | dovetails_L | L | -| | dovetails_R | L | -| | edges_to_contained | C | -| | edges_to_containers | C | -| | paths | P | -| Link | paths | P | - -#### GFA2 - -| Record type | Backreferences | Type | -|-------------|---------------------|------- -| Segment | dovetails_L | E | -| | dovetails_R | E | -| | edges_to_contained | E | -| | edges_to_containers | E | -| | internals | E | -| | gaps_L | G | -| | gaps_R | G | -| | fragments | F | -| | paths | O | -| | sets | U | -| Edge | paths | O | -| | sets | U | -| O Group | paths | O | -| | sets | U | -| U Group | sets | U | - -#### Segment backreference convenience methods - -For segments, additional methods are available which combine in different way -the backreferences information. -The ```dovetails_of_end(end)``` and ```gaps_of_end(end)``` methods take an -argument "L" or "R" and return the dovetails overlaps (or gaps) of the left or, -respectively, right end of the segment sequence are returned (equivalent to -```dovetails_L```/```dovetails_R``` and ```gaps_L```/```gaps_R```). - -The segment ```containments``` methods returns both containments -where the segment is the container or the contained segment. -The segment ```edges``` property is a list of all edges (dovetails, containments -and internals) with a reference to the segment. - -Other methods directly compute list of segments from the edges lists mentioned -above. The ```neighbours_L```, ```neighbours_R``` properties -and the ``neighbours(end)``` method computes the set of segment -instances which are connected by dovetails to the segment. -The segment ```containers``` and ```contained``` properties similarly -compute the set of segment instances which, respectively, contains -the segment, or are contained in the segment. - -```python -s.dovetails_of_end("L") # => [gfapy.line.edge.Link(...), ...] -s.dovetails_L == segment.dovetails_of_end("L") # => True -s.gaps_of_end("R") # => [] -s.edges # => [gfapy.line.edge.Link(...), ...] -s.neighbours_L # => [gfapy.line.segment.GFA1(...), ...] -s.containers # => [gfapy.line.segment.GFA1(...), ...] -``` - -### Multiline group definitions - -The GFA2 specification opens the possibility (experimental) to -define groups on multiple lines, by using the same ID -for each line defining the group. This is supported by gfapy. - -This means that if multiple ```gfapy.line.group.Ordered``` or -```gfapy.line.group.Unordered``` instances connected to a gfapy have the same -```gid```, they are merged into a single instance (technically the -last one getting added to the graph object). The items list are merged. - -The tags of multiple line defining a group shall not contradict each other -(i.e. either are the tag names on different lines defining the group all -different, or, if the same tag is present on different lines, the value and -datatype must be the same, in which case the multiple definition will be -ignored). - -```python -gfa.add_line("U\tu1\ts1 s2 s3") -[s.name for s in gfa.sets[-1].items] # => ["s1","s2","s3"] -gfa.add_line("U\tu1\t4 5") -[s.name for s in gfa.sets[-1].items] # => ["s1","s2","s3","s4","s5"] -``` - -### Induced set and captured path - -The item list in GFA2 sets and paths may not contain elements -which are implicitly involved. -For example a path may contain segments, without specifying the -edges connecting them, if there is only one such edge. Alternatively -a path may contain edges, without explitely indicating the segments. -Similarly a set may contain edges, but not the segments refered to -in them, or contain segments which are connected by edges, without -the edges themselves. -Furthermore groups may refer to other groups (set to sets or paths, -paths to paths only), which then indirectly contain references to -segments and edges. - -gfapy provides methods for the computation of the sets of segments -and edges which are implied by an ordered or unordered group. -Thereby all references to subgroups are resolved and implicit -elements are added, as described in the specification. -The computation can, therefore, only be applied to connected lines. -For unordered groups, this computation is provided by the method -```induced_set()```, which returns an array of segment and edge instances. -For ordered group, the computation is provided by the method -```captured_path()```, whcih returns a list of gfapy.OrientedLine instances, -alternating segment and edge instances (and starting and ending in -segments). - -The methods ```induced_segments_set()```, ```induced_edges_set()```, -```captured_segments()``` and ```captured_edges()``` return, respectively, -the list of only segments or edges, in ordered or unordered groups. - -```python -gfa.add_line("U\tu1\ts1 s2 s3") -u = gfa.sets[-1] -u.induced_edges_set # => [gfapy.line.edge.GFA2("E\te1\ts1+\ts2-...", ...)] -[l.name for l in u.induced_set ] # => ["s1", "s2", "s3", "e1"] -``` - -### Disconnecting a line from a gfapy object - -Lines can be disconnected using the ```rm(line)``` method of the -```gfapy.Gfa``` object or the ```disconnect()``` method of the -line instance. - -```python -line = gfa.segment("sA") -gfa.rm(line) -# or equivalent: -line.disconnect() -``` - -Disconnecting a line affects other lines as well. Lines which are dependent -on the disconnected line are disconnected as well. Any other reference to -disconnected lines is removed as well. In the disconnected line, references -to lines are transformed back to strings and backreferences are deleted. - -The following tables show which dependent lines are disconnected if they -refer to a line which is being disconnected. - -#### GFA1 - -| Record type | Dependent lines | -|-------------|--------------------------------| -| Segment | links (+ paths), containments | -| Link | paths | - -#### GFA2 - -| Record type | Dependent lines | -|-------------|-------------------------------------| -| Segment | edges, gaps, fragments, sets, paths | -| Edge | sets, paths | -| Sets | sets, paths | - -### Editing reference fields - -In connected line instances, it is not allowed to directly change the content -of fields containing references to other lines, as this would make the state of -the gfapy object invalid. - -Besides the fields containing references, some other fields are read-only in -connected lines. Changing some of the fields would require moving the -backreferences to other collections (position fields of edges and gaps, -```from_orient``` and ```to_orient``` of links). The overlaps field of connected links is -readonly as it may be necessary to identify the link in paths. - -#### Renaming an element - -The name field of a line (e.g. segment ```name```/```sid```) is not a reference and thus -can be edited also in connected lines. When the name of the line is changed, -no manual editing of references (e.g. from/to fields in links) is necessary, as -all lines which refer to the line will still refer to the same instance. -The references to the instance in the gfapy lines collections will be -automatically updated. Also, the new name will be correctly used when -converting to string, such as when the gfapy is written to a GFA file. - -Renaming a line to a name which already exists has the same effect of adding -a line with that name. That is, in most cases, ```gfapy.NotUniqueError``` is -raised. An exception are GFA2 sets and paths: in this case -the line will be appended to the existing line with the same name -(as described in "Multiline group definitions"). - -#### Adding and removing group elements - -Elements of GFA2 groups can be added and removed from both connected and -non-connected lines, using the following methods. - -To add an item to or remove an item from an unordered group, use the methods -```add_item(item)``` and ```rm_item(item)```, which take as argument either -a string (identifier) or a line instance. - -To append or prepend an item to an ordered group, use the methods -```append_item(item)``` and ```prepend_item(item)```. To remove the first or -the last item of an ordered group use the methods -```rm_first_item()``` and -```rm_last_item()```. - -#### Editing read-only fields of connected lines - -Editing the read-only information of edges, gaps, links, containments, -fragments and paths is more complicated. These lines shall be disconnected -before the edit and connected again to the gfapy object after it. Before -disconnecting a line, you should check if there are other lines dependent on it -(see tables above). If so, you will have to disconnect these lines first, -eventually update their fields and reconnect them at the end of the operation. - -### Virtual lines - -The order of the lines in GFA is not prescribed. Therefore, during parsing, -or constructing a gfapy in memory, it is possible that a line is referenced to, -before it is added to the gfapy instance. -Whenever this happens, gfapy creates a "virtual" line instance. - -Users do not have to handle with virtual lines, if they work with -complete and valid GFA files. - -Virtual lines are similar to normal line instances, with some limitations -(they contain only limited information and it is not allowed to add tags to -them). To check if a line is a virtual line, one can use the -```is_virtual()``` method of the line. - -As soon as the parser founds the real line corresponding to a previously -introduced virtual line, the virtual line is exchanged with the real line -and all references are corrected to point to the real line. - -```python -g = gfapy.Gfa() -g.add_line("S\t1\t*") -g.add_line("L\t\1\t+\t2\t+\t*") -l = g.dovetails[-1] -g.segment("1").is_virtual() # => False -g.segment("2").is_virtual() # => True -l.to_segment == g.segment("2") # => True -g.segment("2").dovetails = [l] # => True -g.add_line("S\t2\t*") -g.segment("2").is_virtual() # => False -l.to_segment == g.segment("2") # => True -g.segment("2").dovetails = [l] # => True -``` diff --git a/gfapy_manual/tags.md b/gfapy_manual/tags.md deleted file mode 100644 index bb3c930..0000000 --- a/gfapy_manual/tags.md +++ /dev/null @@ -1,310 +0,0 @@ -## Tags - -Each record in GFA can contain tags. Tags are fields which consist in a tag -name, a datatype and data. The format is ```NN:T:DATA``` where ``NN`` is a -two-letter tag name, ```T``` is an one-letter datatype string and ```DATA``` is -a string representing the data according to the specified datatype. Tag names -must be unique for each line, i.e. each line may only contain a tag once. - -```python -# Examples of GFA tags of different datatypes: -"aa:i:-12" -"bb:f:1.23" -"cc:Z:this is a string" -"dd:A:X" -"ee:B:c,12,3,2" -"ff:H:122FA0" -'gg:J:["A","B"]' -``` - -### Custom tags - -Some tags are explicitely defined in the specification (these are named -_predefined tags_ in gfapy), and the user or an application can define its own -custom tags. - -Custom tags are user or program specific and may of course collide with the -tags used by other users or programs. For this reasons, if you write scripts -which employ custom tags, you should always check that the values are -of the correct datatype and plausible. - -```python -if line.get_datatype("xx") != "i": - raise Exception("I expected the tag xx to contain an integer!") -myvalue = line.xx -if (myvalue > 120) or (myvalue % 2 == 1): - raise Exception("The value in the xx tag is not an even value <= 120") -# ... do something with myvalue -``` - -Also it is good practice to allow the user of the script to change the name of -the custom tags. For example, gfapy employs the +or+ custom tag to track -the original segment from which a segment in the final graph is derived. All -methods which read or write the +or+ tag allow to specify an alternative tag -name to use instead of +or+, for the case that this name collides with the -custom tag of another program. - -```python -# E.g. a method which does something with myvalue, usually stored in tag xx -# allows the user to specify an alternative name for the tag -def mymethod(line, mytag="xx"): - myvalue = line.get(mytag) - # ... -``` - -### Tag names in GFA1 - -According to the GFA1 specification, custom tags are lower case, while -predefined tags are upper case (in both cases the second character in the name -can be a number). There is a number of predefined tags in the specification, -different for each kind of line. - -``` -"VN:Z:1.0" # VN is upcase => predefined tag -"z5:Z:1.0" # z5 first char is downcase => custom tag - -# not forbidden, but not reccomended: -"zZ:Z:1.0" # => mixed case, first char downcase => custom tag -"Zz:Z:1.0" # => mixed case, first char upcase => custom tag -"vn:Z:1.0" # => same name as predefined tag, but downcase => custom tag -``` - -Besides the tags described in the specification, in GFA1 headers, the TS tag is -allowed, in order to simplify the translation of GFA2 files. - -### Tag names in GFA2 - -The GFA2 specification is currently not as strict regarding tags: anyone can -use both upper and lower case tags, and no tags are predefined except for VN -and TS. - -However, gfapy follows the same conventions as for GFA1: i.e. it allows the tags -specified as predefined tags in GFA1 to be used also in GFA2. No other upper -case tag is allowed in GFA2. - -### Datatypes - -The following table summarizes the datatypes available for tags: - -| Symbol | Datatype | Example | Python class | -|--------|---------------|-------------------------|--------------------| -| Z | string | This is a string | str | -| i | integer | -12 | int | -| f | float | 1.2E-5 | float | -| A | char | X | str | -| J | JSON | [1,{"k1":1,"k2":2},"a"] | list/dict | -| B | numeric array | f,1.2,13E-2,0 | gfapy.NumericArray | -| H | byte array | FFAA01 | gfapy.ByteArray | - -### Validation - -The tag name is validated according the the rules described above: except -for the upper case tags indicated in the GFA1 specification, and the TS header -tag, all other tags must contain at least one lower case letter. - -```python -"VN:i:1" # => in header: allowed, elsewhere: error -"TS:i:1" # => allowed in headers and GFA2 Edges -"KC:i:1" # => allowed in links, containments, GFA1/GFA2 segments -"xx:i:1" # => custom tag, always allowed -"xxx:i:1" # => error: name is too long -"x:i:1" # => error: name is too short -"11:i:1" # => error: at least one letter must be present -``` - -The datatype must be one of the datatypes specified above. For predefined -tags, gfapy also checks that the datatype given in the specification is used. - -``` -"xx:X:1" # => error: datatype X is unknown -"VN:i:1" # => error: VN must be of type Z -``` - -The data must be a correctly formatted string for the specified datatype or a -Python object whose string representation is a correctly formatted string. - -```python -# current value: xx:i:2 -line.xx = 1 # OK -line.xx = "1" # OK, value is set to 1 -line.xx = "A" # error -``` - -Depending on the validation level, more or less checks are done automatically -(see validation chapter). Per default - validation level (1) - validation -is performed only during parsing or accessing values the first time, therefore -the user must perform a manual validation if he changes values to something -which is not guaranteed to be correct. To trigger a manual validation, the -user can call the method ```validate_field(fieldname)``` to validate a -single tag, or ```validate()``` to validate the whole line, including all -tags. - -```python -line.xx = "A" -line.validate_field("xx") # validates xx -# or, to validate the whole line, including tags: -line.validate() -``` - -### Reading and writing tags - -Tags can be read using a property on the gfapy line object, which is called as -the tag (e.g. line.xx). A special version of the property prefixed by -```try_get_``` raises an error if the tag was not available (e.g. -```line.try_get_LN```), while the tag property (e.g. ```line.LN```) would -return ```None``` in this case. Setting the value is done assigning a value to -it the tag name method (e.g. ```line.TS = 120```). In alternative, the -```set(fieldname, value)```, ```get(fieldname)``` and ```try_get(fieldname)``` -methods can also be used. To remove a tag from a line, use the -```delete(fieldname)``` method, or set its value to ```None```. - -```python -# line is "H xx:i:12" -line.xx # => 1 -line.xy # => nil -line.try_get_xx # => 1 -line.try_get_xy # => error: xy is not defined -line.get("xx") # => 1 -line.try_get("xy") # => error, xy is not defined -line.xx = 2 # => value of xx is changed to 2 -line.xx = "a" # => error: not compatible with existing type (i) -line.xy = 2 # => xy is created and set to 2, type is auto-set to i -line.set("xy", 2) # => sets xy to 2 -line.delete("xy") # => tag is eliminated -line.xx = None # => tag is eliminated -``` - -The ```gfapy::Line#tagnames``` property is a list of the names (as -strings) of all defined tags for a line. - -```python -print("Line contains the following tags:") -for t in line.tagnames: - print(t) -if "VN" in line.tagnames: - # do something with line.VN value -``` - -When a tag is read, the value is converted into an appropriate object (see Python -classes in the datatype table above). When setting a value, the user can -specify the value of a tag either as a Python object, or as the string -representation of the value. - -```python -# line is: H xx:i:1 xy:Z:TEXT xz:J:["a","b"] -line.xx # => 1 (Integer) -line.xy # => "TEXT" (String) -line.xz # => ["a", "b"] (Array) -``` - -The string representation of a tag can be read using the -```field_to_s(fieldname)``` method. The default is to only output the content -of the field. By setting ``tag: true```, the entire tag is output (name, -datatype, content, separated by colons). An exception is raised if the field -does not exist. - -```python -# line is: H xx:i:1 -line.xx # => 1 -line.field_to_s("xx") # => "1" -line.field_to_s("xx", tag=True) # => "xx:i:1" -``` - -### Datatype of custom tags - -The datatype of an existing custom field (but not of predefined fields) can be -changed using the ```set_datatype(fieldname, datatype)``` method. The current -datatype specification can be read using ```get_datatype(fieldname)```. - -```python -# line is: H xx:i:1 -line.get_datatype("xx") # => "i" -line.set_datatype("xx", "Z") -``` - -If a new custom tag is specified, gfapy selects the correct datatype for it: i/f -for numeric values, J/B for arrays, J for hashes and Z for strings and strings. -If the user wants to specify a different datatype, he may do so by setting it -with ```set_datatype()``` (this can be done also before assigning a value, which -is necessary if full validation is active). - -```python -# line has not tags -line.xx = "1" # => "xx:Z:1" created -line.xx # => "1" -line.set_datatype("xy", "i") -line.xy = "1" # => "xy:i:1" created -line.xy # => 1 -``` - -### Arrays of numerical values - -```B``` and ```H``` tags represent array with particular constraints (e.g. they -can only contain numeric values, and in some cases the values must be in -predefined ranges). In order to represent them correctly and allow for -validation, Python classes have been defined for both kind of tags: -```gfapy.ByteArray``` for ```H``` and ```gfapy.NumericArray``` for ```B``` -fields. - -Both are subclasses of list. Object of the two classes can be created by -passing an existing list or the string representation to the class constructor. - -```python -# create a byte array instance -gfapy.ByteArray([12,3,14]) -gfapy.ByteArray("A012FF") -# create a numeric array instance -gfapy.NumericArray("c,12,3,14") -gfapy.NumericArray([12,3,14]) -``` - -Instances of the classes behave as normal lists, except that they provide a -#validate() method, which checks the constraints, and that their string -representation is the GFA string representation of the field value. - -```python -gfapy.ByteArray([12,1,"1x"]).validate() # error: 1x is not a valid value -str(gfapy.ByteArray([12,3,14])) # => "c,12,3,14" -``` - -For numeric values, the ```compute_subtype()``` method allows to compute the -subtype which will be used for the string representation. Unsigned subtypes -are used if all values are positive. The smallest possible subtype range is -selected. The subtype may change when the range of the elements changes. - -```python -gfapy.NumericValue([12,13,14]).compute_subtype() # => "C" -``` - -### Special cases: custom records, headers, comments and virtual lines. - -GFA2 allows custom records, introduced by record type strings other than the -predefined ones. gfapy uses a pragmatical approach for identifying tags in -custom records, and tries to interpret the rightmost fields as tags, until the -first field from the right raises an error; all remaining fields are treated as -positional fields. - -```python -"X a b c xx:i:12" # => xx is tag, a, b, c are positional fields -"Y a b xx:i:12 c" # => all positional fields, as c is not a valid tag -``` - -For easier access, the entire header of the GFA is summarized in a single line -instance. A class (```gfapy.FieldArray```) has been defined to handle the -special case when multiple H lines define the same tag (see "Header" chapter -for details). - -Comment lines are represented by a subclass of the same class (```gfapy.Line```) -as the records. However, they cannot contain tags: the entire line is taken as -content of the comment. See the "Comments" chapter for more information about -comments. - -```python -"# this is not a tag: xx:i:1" # => xx is not a tag, xx:i:1 is part of the comment -``` - -Virtual ```gfapy.Line``` instances (e.g. segment instances automatically created -because of not yet resolved references found in edges) cannot be modified by -the user, and tags cannot be specified for them. This includes all instances of -the ```gfapy::Line::Unknown``` class. See the "References" chapter for more information -about virtual lines. diff --git a/gfapy_manual/validation.md b/gfapy_manual/validation.md deleted file mode 100644 index d9e4e38..0000000 --- a/gfapy_manual/validation.md +++ /dev/null @@ -1,66 +0,0 @@ -## Validation - -Different validation levels are available. They represent different compromises -between speed and warrant of validity. The validation level can be specified -when the gfapy.Gfa object is created, using the ```vlevel``` parameter of -the constructor and of the ```gfapy.Gfa.from_file()``` method. -Four levels of validation are defined -(0 = no validation, 1 = validation by reading, 2 = validation by reading and -writing, 3 = continuous validation). The default validation level value is 1. - -### Manual validation - -Independently from the validation level choosen, the user can -always check the value of a field calling ```validate_field(fieldname)``` -on the line instance. If no exeption is raised, the field content -is valid. - -To check if the entire content of the line is valid, the user can call -```validate``` on the line instance. This will check all fields and perform -cross-field validations, such as comparing the length of the sequence of a GFA1 -segment, to the value of the LN tag (if present). - -It is also possible to validate the structure of the GFA, for example -to check if there are unresolved references to lines. To do this, -use the ```validate()``` method of the ```gfapy.Gfa``` class. - -### No validations - -If the validation is set to 0, gfapy will try to accept any input -and never raise an exception. This is not always possible, and in -some cases, an exception will still be raised, if the data is invalid. - -### Validation when reading - -If the validation level is set to 1 or higher, basic validations -will be performed, such as checking the number of positional fields, -the presence of duplicated tags, the tag datatype of predefined tags. -Additionally, all tags will be validated, either -during parsing or on first access. -Record-type cross-field validations will also be performed. - -In other words, a validation of 1 means that gfapy guarantees (as good as -it can) that the GFA content read from a file is valid, and will raise an -exception on accessing the data if not. - -The user is supposed to run ```validate_field(fieldname)``` when changing -a field content to something which can be potentially invalid, or -```validate()``` if potentially cross-field validations could fail. - -### Validation when writing - -Setting the level to 2 will perform all validations described above, -plus validate the fields content when their value is written to string. - -In other words, a validation of 2 means that gfapy guarantee (as good as -it can) that the GFA content read from a file and written to a file is valid -and will raise an exception on accessing the data or writing to file if not. - -### Continuous validation - -If the validation level is set to 3, all validations for lower levels -described above are run, plus a validation of fields contents each -time a setter method is used. - -A validation of 3 means that gfapy guarantees (as good as it can) -that the GFA content is always valid. diff --git a/gfapy_manual/versions.md b/gfapy_manual/versions.md deleted file mode 100644 index 5ec6ba0..0000000 --- a/gfapy_manual/versions.md +++ /dev/null @@ -1,139 +0,0 @@ -## GFA versions - -Two versions of GFA have been defined: GFA1 and GFA2. -The header lines and comments have the same syntax in both versions. Segment -lines have a different syntax, as they have an additional positional field in -GFA2. Edges lines are version-specific: L and C are found only in GFA1, -and E only in GFA2. Group lines are also version-specific: P in GFA1, O and U -in GFA2. F and G lines are GFA2-specific. Furthermore, GFA2 allows to create -user-specific record types, by using non-standard codes. - -### Version autodetection - -gfapy tries to autodetect the version of a GFA file from its syntax. The -version of a valid GFA can always be recognized, unless it contains only header -and comment lines, as any other line refer to segments, and segments are -version-specific. If a GFA contains only header and commments, the version -does not matter. - -The version is set as soon as a version-specific element is found. -Here is the list of such elements: -- segment lines (different number of positional fields in GFA1 and GFA2) -- version tag in header (```VN:Z:1.0``` or ```VN:Z:2.0```) -- E/G/F/O/U lines (GFA2 specific) -- custom record-type lines (GFA2 specific) - -If subsequent version-specific elements are found which contrast with the first -one, gfapy::VersionError is raised. - -P/C/L lines are technically not GFA1-specific, as they could be custom records -in GFA2. However, their use in GFA2 is not supported by gfapy and an exception -is thrown if these records are found in that version. Thus if these lines are -found, their processing is delayed until a version-specific signal is found. -If the version is GFA2, gfapy::VersionError is raised. - -### Setting and reading the version - -Besides relying on autodetection, it is possible to explicitely set the version -of the gfapy or line objects, if this is known. Methods which create Gfa instances -(the constructor and the ```from_file()``` method), as well as methods which -create gfapy lines (the constructor and the ```from_strig()``` method), -accept a ```version``` -parameter, which can be set to the strings ```"gfa1"``` or ```"gfa2"```. - -Instances of gfapy.Gfa and gfapy.Line have a ```version``` property -which contain ```"gfa1"```, ```"gfa2"``` or ```"unknown"```. - -### Line queue - -The version autodetection feature is achieved by deferring the processing -of version-specific lines (ie everything besides headers and comments) -which are found before the version can be detected as explained above. -These lines are put on a line queue. Once the version is clear, -the method ```process_line_queue()``` is called on the gfapy instance. - -This method can also be called by the user, if e.g. an example GFA is -created programmatically, where the version is unclear. For the reasons -explained above, this will generally not be the case, as such a GFA file -would only contain headers and comments. - -### Conversion of gfapy or RGFA::Line instances - -The conversion of GFA lines between GFA version is possible in some -cases. When possible, this is achieved by using the ```to_gfa1()``` -and ```to_gfa2()``` methods on the line instances. It is also possible -to directly output the line as a string in the other version -using the ```to_gfa1_s()``` and ```to_gfa2_s()``` methods. - -Some lines do not require conversion (headers - except changing -the value of the VN tag, comments). -The conversions of GFA2-specific information (gaps, fragments, sets, -custom records) is not possible. The other lines (segments, -edges/links/containments, paths) can be converted if they -fulfill some requirements described below. - -#### Segments - -GFA2 segments contain an additional field (slen: length of the sequence), -compared to GFA1. - -Conversion from GFA2 to GFA1 is possible, unless unsupported -characters are used in the sequence (which is usually not the case) or -the identifier is incompatible with GFA1 (i.e. it ends with -```+,``` or ```-,```). - -Conversion from GFA1 to GFA2 is possible, unless no sequence -and no LN tag are present. - -#### Edges - -GFA2 generalizes the links and containments into edge lines, which can -represent also alignments which are not representable in GFA1. -This goes at the costs of some simplicity, as GFA2 needs to indicate -the coordinates of the alignment, while GFA1 is purely topology based. - -Conversion from GFA1 to GFA2 requires CIGARs, so that the alignment coordinates -can be computed. This is only possible if the sequence lengths are available, -which is anyway required for converting segments. - -Conversion from GFA2 to GFA1 is possible if the edge represents -a dovetail overlap or an alignment. Also trace alignments are not supported -in GFA1, so the trace overlap will be set to ```*```. Edge identifiers are stored -in ```id:Z``` tags. - -#### Paths - -Conversion of paths from GFA1 to GFA2 is possible, if the links specified -in the path are the only between pairs of segments, or if the links contain -an ID optional tag. - -Conversion of paths from GFA2 to GFA1 is possible, if they contain -only segments and/or edges representing dovetail overlaps. Child paths -are also allowed, but only if they are also composed only of segments -and/or edges and/or child paths with the same limitations. - -#### Conversion from GFA1 to GFA2: requirements - -| Record type | Requirements | -|-------------|--------------------------------------------------| -| Comment | None | -| Header | None | -| Segment | Sequence or LN tag available | -| Link | CIGAR and segment lengths available | -| Containment | CIGAR and segment lengths available | -| Path | Links must have an id tag | - -#### Conversion from GFA2 to GFA1: requirements - -| Record type | Requirements | -|-------------|--------------------------------------------------| -| Comment | None | -| Header | None | -| Segment | Sequence alphabet compatible with GFA1 | -| | Identifier compatible with GFA1 name | -| Edge | Dovetail overlap or containment | -| Path | All edges are dovetails | -| Sets | Cannot be converted! | -| Gap | Cannot be converted! | -| Fragment | Cannot be converted! | -| Custom | Cannot be converted! | diff --git a/gfapy_tests/translate_test.rb b/gfapy_tests/translate_test.rb deleted file mode 100755 index f5a4d2f..0000000 --- a/gfapy_tests/translate_test.rb +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env ruby - -# -# This scripts converts VERY ROUGHLY the tests of RGFA in gfapy Tests -# - -filename = ARGV[0] -f = File.new(filename) -puts "import gfapy" -puts "import unittest" -puts -prev_empty = true -klass = nil -before_first = true -f.each do |line| - line.chomp! - line.gsub!(/^class Test(.*) < Test::Unit::TestCase/,'class Test\1(unittest.TestCase):') - line.gsub!("assert_equal","self.assertEqual") - line.gsub!(/assert_raises?\((.*)\) *(\{|do) *(.+?) *\}/, - 'with self.assertRaises(\1): \3') - line.gsub!(/assert_raises?\((.*)\) *(\{|do) *(.+?) */, - 'with self.assertRaises(\1): \3') - line.gsub!(/assert_raises?/, "with self.assertRaises") - line.gsub!("assert_not_equal","self.assertNotEqual") - line.gsub!("assert_nil","self.assertIsNone") - line.gsub!("assert_not_nil","self.assertIsNotNone") - line.gsub!("assert(!","assert(not ") - line.gsub!(/assert_kind_of *\((.*, *.*)\)/,'assert(isinstance(\2,\1))') - line.gsub!("str=","s=") - line.gsub!("\Wstr\.","\Ws.") - line.gsub!(/([^ ]+)\.to_pos/,'gfapy.LastPos(\1)') - line.gsub!(/([^ ]+)\.to_lastpos/,'gfapy.LastPos(\1)') - line.gsub!(/assert_nothing_raised { *(.+?) *}/,'\1 # nothing raised') - line.gsub!('# nothing raised)', ') # nothing raised') - line.gsub!("assert_nothing_raised do","") - line.gsub!(/^( +)([^ ]+) *<< \(?([\w]+) *= *(.+)\)/, - '\1\3 = \4'+"\n"+'\1\2.append(\3)') - line.gsub!(/([^ ]+) << ([^ ]+)/,'\1.append(\2)') - line.gsub!(/("[^"]+").to_rgfa_line/,'gfapy.Line.from_string(\1)') - line.gsub!(/([^ ]+).to_rgfa_line/,'gfapy.Line.from_string(\1)') - line.gsub!(/("[^"]+").to_rgfa/,'gfapy.Gfa.from_string(\1)') - line.gsub!(/([^ ]+).to_rgfa/,'gfapy.Gfa.from_string(\1)') - line.gsub!(/([^ ]+).map\(\&:to_s\)/,'[str(x) for x in \1]') - line.gsub!(/([^ ]+).map\(\&:([^\)]+)\)/,'[x.\2() for x in \1]') - line.gsub!(/([^ ]+).each\(\&:([^\)]+)\)/,'for x in \1: x.\2()') - line.gsub!(/( *)([^ ].+)\.each \{ *\|(.*)\| *(.*) *\}/,'\1for \3 in \2: \4') - line.gsub!(/( *)([^ ].+)\.each do *\|(.*)\| *(.*) */,'\1for \3 in \2: \4') - line.gsub!(/#\{([^}]+)\}/,'"+"{}".format(\1)+"') - line.gsub!('+""',"") - line.gsub!('""+',"") - line.gsub!(/([^ ]+)\.to_s/,'str(\1)') - line.gsub!(/def test_([A-Za-z0-9_]+)/,'def test_\1(self):') - line.gsub!(/([^ ]+).size/,'len(\1)') - line.gsub!("RGFA.new","gfapy.Gfa()") - line.gsub!("Gfa()(","Gfa(") - line.gsub!(".new","") - line.gsub!(".to_sym","") - line.gsub!("RGFA::","gfapy.") - line.gsub!("Line::","line.") - line.gsub!("RGFA","gfapy.Gfa") - line.gsub!(/([A-Za-z_0-9]*)::/,'\1.') - line.gsub!(/([A-Za-z_0-9]*)::/,'\1.') - line.gsub!("nil","None") - line.gsub!(/(\w*) => /,'\1:') - line.gsub!("version: ","version=") - line.gsub!(/\((\w+): ?/,'(\1=') - line.gsub!("NoMethodError ","AttributeError") - line.gsub!("vlevel: ","vlevel=") - line.gsub!(")(vlevel",",vlevel") - line.gsub!(")(version",",version") - line.gsub!(/(\w+)!/,'try_get_\1()') - line.gsub!(/(\w+)\?/,'is_\1()') - line.gsub!("()(",'(') - line.gsub!("true","True") - line.gsub!("false","False") - line.gsub!("..-1]",":]") - line.gsub!("..-2]",":-1]") - line.gsub!("..0]",":1]") - line.gsub!("..1]",":2]") - line.gsub!("..2]",":3]") - line.gsub!(".first",'[0]') - line.gsub!(".last",'[-1]') - line.gsub!(".class",".__class__") - line.gsub!(/([^A-Za-z0-9_]):"?([A-Za-z0-9+\-=_]+)"?/,'\1"\2"') - line.gsub!(".dup",".copy()") - line.gsub!(".clone",".copy()") - line.gsub!(".disconnect",".disconnect()") - line.gsub!(/\.validate$/,".validate()") - line.gsub!(".validate *)",".validate())") - line.gsub!("Alignment.CIGAR","CIGAR") - line.gsub!("Alignment.Trace","Trace") - line.gsub!("Alignment.Placeholder","AlignmentPlaceholder") - line.gsub!("Edge.Link","edge.Link") - line.gsub!("Edge.Containment","edge.Containment") - line.gsub!("Edge.GFA2","edge.GFA2") - line.gsub!("Segment.GFA1","segment.GFA1") - line.gsub!("Segment.GFA2","segment.GFA2") - line.gsub!(/ *([^=]+)\.join\(([^\)]*)\)/,'\2.join(\1)') - line.gsub!(/OL\[([^,]*),([^\]]*)\]/,'gfapy.OrientedLine(\1,\2)') - line.gsub!(".is_virtual()",".virtual") - line.gsub!(".name()",".name") - line.gsub!(/([^ ]+).send\(([^,]+),(.+)\)/,'getattr(\1,\2)(\3)') - line.gsub!(/([^ ]+).send\((.+)\)/,'getattr(\1,\2)') - line.gsub!(/self.assertEqual\((.*).sort, (.*).sort\)/, - 'self.assertEqual(set(\1), set(\2))') - line.gsub!(/\.dovetails\(([^\)]+)\)/,'.dovetails_of_end(\1)') - line.gsub!(/\.gaps\(([^\)]+)\)/,'.gaps_of_end(\1)') - line.gsub!(/ +$/,'') - if line =~ /^class Test/ - line.gsub!(".","") - line.gsub!("unittestTestCase","unittest.TestCase") - line =~ /class (\w+)/ - klass = $1 - end - if line =~ /def test/ - before_first = false - end - if before_first - line.gsub!("@@","") - elsif klass - line.gsub!("@@","#{klass}.") - end - if line !~ /^ *end *$/ and line !~ /require/ and line !~ /Module/ and \ - (!prev_empty or !line.strip.empty?) - puts line - prev_empty = line.strip.empty? - end -end diff --git a/lib/rgfa.rb b/lib/rgfa.rb deleted file mode 100644 index aa82181..0000000 --- a/lib/rgfa.rb +++ /dev/null @@ -1,373 +0,0 @@ -# (c) 2016, Giorgio Gonnella, ZBH, Uni-Hamburg - -# Main class of the RGFA library. -# -# RGFA provides a representation of a GFA graph. It supports creating a graph -# from scratch, input and output from/to file or strings, as well as several -# operations on the graph. Using the RGFA class, it is possible to -# to create a RGFA object from scratch or from a GFA file, write the RGFA object -# to a GFA file, or output it as string. -# -# == Interacting with the graph -# -# Most interaction with the GFA involve interacting with -# its record, i.e. instances of a subclass of the {RGFA::Line} class: -# -# GFA-version independent line types: -# - {RGFA::Line::Header} -# - {RGFA::Line::Comment} -# -# GFA1 line types: -# - {RGFA::Line::Segment::GFA1} -# - {RGFA::Line::Edge::Link} -# - {RGFA::Line::Edge::Containment} -# - {RGFA::Line::Group::Path} -# -# GFA2 line types: -# - {RGFA::Line::Segment::GFA2} -# - {RGFA::Line::Edge::GFA2} -# - {RGFA::Line::Group::Ordered} -# - {RGFA::Line::Group::Unordered} -# - {RGFA::Line::Gap} -# - {RGFA::Line::Fragment} -# - {RGFA::Line::CustomRecord} -# - {RGFA::Line::Unknown} -# -# Basic graph operations, such as finding, editing, iterating over and removing -# lines belonging to a RGFA instance can be done using code in the {RGFA::Lines} -# modules: -# - {RGFA::Lines::Headers} -# - {RGFA::Lines::Collections} -# - {RGFA::Lines::Creators} -# - {RGFA::Lines::Destructors} -# - {RGFA::Lines::Finders} -# -# More complicated graph operations, constructed using the basic operations -# on lines and the graph, are defined by the modules in the -# {RGFA::GraphOperations} namespace: -# - {RGFA::GraphOperations::Topology} -# - {RGFA::GraphOperations::LinearPaths} -# - {RGFA::GraphOperations::Multiplication} -# - {RGFA::GraphOperations::RGL} -# Additional functionality is provided by {RGFATools}. -# -# == Examples -# -# @example Creating an empty RGFA object -# gfa = RGFA.new -# -# @example Parsing and writing GFA format -# gfa = RGFA.from_file(filename) # parse GFA file -# gfa.to_file(filename) # write to GFA file -# puts gfa # show GFA representation of RGFA object -# -# @example Basic statistics report -# puts gfa.info # print report -# puts gfa.info(short = true) # compact format, in one line -# -# @example Validation -# gfa = RGFA.from_file(filename, vlevel: 1) # default level is 1 -# gfa.vlevel = 3 # change validation level -# gfa.validate # run post-validations (e.g. check segment names in links) -# -class RGFA; end - -require_relative "./rgfa/alignment.rb" -require_relative "./rgfa/byte_array.rb" -require_relative "./rgfa/field_array.rb" -require_relative "./rgfa/field.rb" -require_relative "./rgfa/graph_operations.rb" -require_relative "./rgfa/line.rb" -require_relative "./rgfa/lines.rb" -require_relative "./rgfa/logger.rb" -require_relative "./rgfa/numeric_array.rb" -require_relative "./rgfa/placeholder.rb" -require_relative "./rgfa/oriented_line.rb" -require_relative "./rgfa/lastpos.rb" -require_relative "./rgfa/segment_end.rb" -require_relative "./rgfa/sequence.rb" -require_relative "./rgfa/symbol_invert.rb" - -class RGFA - - include RGFA::Lines - include RGFA::GraphOperations - include RGFA::LoggerSupport - - # @!attribute [rw] vlevel - # @return [Integer (0..3)] validation level - attr_accessor :vlevel - - # Recognized GFA specification versions - VERSIONS = [:gfa1, :gfa2] - - # @!attribute [r] version - # @return [RGFA::VERSIONS, nil] GFA specification version - attr_reader :version - - # @!macro vlevel - # @param vlevel [Integer] (defaults to: +1+) - # the validation level; see "Validation level" under - # {RGFA::Line#initialize}. - # @param version [RGFA::VERSIONS] GFA version, nil if unknown - def initialize(vlevel: 1, version: nil) - @vlevel = vlevel - @records = {} - @records[:H] = RGFA::Line::Header.new([], vlevel: @vlevel) - @records[:H].connect(self) - [:S, :P, :F, nil].each {|rt| @records[rt] = {}} - [:E, :U, :G, :O].each {|rt| @records[rt] = {nil => []}} - [:C, :L, :"#"].each {|rt| @records[rt] = []} - @segments_first_order = false - @progress = false - @default = {:count_tag => :RC, :unit_length => 1} - @extensions_enabled = false - @line_queue = [] - if version.nil? - @version = nil - @version_explanation = nil - @version_guess = :gfa2 - else - @version = version.to_sym - @version_explanation = "set during initialization" - @version_guess = @version - validate_version - end - end - - # Post-validation of the RGFA - # @return [void] - # @raise if validation fails - def validate - validate_segment_references - validate_path_links - return nil - end - - # Creates a string representation of RGFA - # @return [String] - def to_s - s = "" - each_line {|line| s << line.to_s; s << "\n"} - return s - end - - # Creates a string representation of RGFA using the GFA1 specification - # @return [String] to_s if version is :gfa1, otherwise the converted GFA - def to_gfa1_s - s = "" - each_line {|line| s << line.to_gfa1_s; s << "\n"} - return s - end - - # Creates a string representation of RGFA using the GFA2 specification - # @return [String] to_s if version is :gfa2, otherwise the converted GFA - def to_gfa2_s - s = "" - each_line {|line| s << line.to_gfa2_s; s << "\n"} - return s - end - - # Converts to GFA1 if possible - # @return [RGFA] self if version is :gfa1, otherwise the converted GFA - def to_gfa1 - to_gfa1_s.to_rgfa - end - - # Converts to GFA2 if possible - # @return [RGFA] self if version is :gfa1, otherwise the converted GFA - def to_gfa2 - to_gfa2_s.to_rgfa - end - - # Return the gfa itself - # @return [self] - def to_rgfa - self - end - - # Create a deep copy of the RGFA instance. - # - # The clone is created by writing the instance to string and - # reading the string content (during the writing and reading, - # no validation is performed; therefore malformed GFA may return - # a copy which is not exact). Instance variables other than the - # lines content are copied after that. - # - # @return [RGFA] a deep copy of the RGFA instance. - def clone - self_vlevel = @vlevel - @vlevel = 0 - cpy = to_s.to_rgfa(vlevel: 0) - @vlevel = self_vlevel - cpy.vlevel = @vlevel - cpy.enable_progress_logging if @progress - cpy.require_segments_first_order if @segments_first_order - cpy.instance_variable_set("@default", @default) - cpy.instance_variable_set("@extensions_enabled", @extensions_enabled) - return cpy - end - - # Populates a RGFA instance reading from file with specified +filename+ - # @param [String] filename - # @raise if file cannot be opened for reading - # @return [self] - def read_file(filename) - if @progress - linecount = `wc -l #{filename}`.strip.split(" ")[0].to_i - progress_log_init(:read_file, "lines", linecount, - "Parse file with #{linecount} lines") - end - File.foreach(filename) do |line| - self << line.chomp - progress_log(:read_file) if @progress - end - if !@line_queue.empty? - @version = @version_guess - process_line_queue - end - progress_log_end(:read_file) if @progress - validate if @vlevel >= 1 - self - end - - # Creates a RGFA instance parsing the file with specified +filename+ - # @param [String] filename - # @raise if file cannot be opened for reading - # @!macro vlevel - # @param version [RGFA::VERSIONS] GFA version, nil if unknown - # @return [RGFA] - def self.from_file(filename, vlevel: 1, version: nil) - gfa = RGFA.new(vlevel: vlevel, version: version) - gfa.read_file(filename) - return gfa - end - - # Write RGFA to file with specified +filename+; - # overwrites it if it exists - # @param [String] filename - # @raise if file cannot be opened for writing - # @return [void] - def to_file(filename) - File.open(filename, "w") {|f| each_line {|l| f.puts l}} - end - - # Compare two RGFA instances. - # @return [Boolean] are the lines of the two instances equivalent? - def ==(other) - lines == other.lines - end - - # @api private - module API_PRIVATE - - # Require that the links, containments and paths referring - # to a segment are added after the segment. Default: do not - # require any particular ordering. - # - # @return [void] - def require_segments_first_order - @segments_first_order = true - end - - attr_reader :segments_first_order - - end - include API_PRIVATE - - private - - # Compute segment length statistics - def lenstats - sln = segments.map(&:length!).sort - n = sln.size - tlen = sln.inject(:+) - n50 = nil - sum = 0 - sln.reverse.each do |l| - sum += l - if sum >= tlen/2 - n50 = l - break - end - end - q = [sln[0], sln[(n/4)-1], sln[(n/2)-1], sln[((n*3)/4)-1], sln[-1]] - return q, n50, tlen - end - - # Checks that L, C and P refer to existing S. - # @return [void] - # @raise [RGFA::NotFoundError] if validation fails - def validate_segment_references - @records[:S].values.each do |s| - if s.virtual? - raise RGFA::NotFoundError, "Segment #{s.name} does not exist\n"+ - "References to #{s.name} were found in the following lines:\n"+ - s.all_references.map(&:to_s).join("\n") - end - end - return nil - end - - # Checks that P are supported by links. - # @return [void] - # @raise if validation fails - def validate_path_links - @records[:P].values.each do |pt| - pt.links.each do |ol| - l = ol.line - if l.virtual? - raise RGFA::NotFoundError, "Edge::Link: #{l.to_s}\n"+ - "does not exist, but is required by the following paths:\n"+ - l.paths.map(&:to_s).join("\n") - end - end - end - return nil - end - - def validate_version - if !@version.nil? and !RGFA::VERSIONS.include?(@version) - raise RGFA::VersionError, - "GFA specification version #{@version} not supported" - end - end - -end - -# Ruby core String class, with additional methods. -class String - - # Converts a +String+ into a +RGFA+ instance. Each line of the string is added - # separately to the gfa. - # @param version [RGFA::VERSIONS] GFA version, nil if unknown - # @return [RGFA] - # @!macro vlevel - def to_rgfa(vlevel: 1, version: nil) - gfa = RGFA.new(vlevel: vlevel, version: version) - split("\n").each {|line| gfa << line} - gfa.process_line_queue - gfa.validate if vlevel >= 1 - return gfa - end - -end - -# Ruby core Array class, with additional methods. -class Array - - # Converts an +Array+ of strings or RGFA::Line instances - # into a +RGFA+ instance. - # @param version [RGFA::VERSIONS] GFA version, nil if unknown - # @return [RGFA] - # @api private? - # @!macro vlevel - def to_rgfa(vlevel: 1, version: nil) - gfa = RGFA.new(vlevel: vlevel, version: version) - each {|line| gfa << line} - gfa.process_line_queue - gfa.validate if vlevel >= 1 - return gfa - end - -end diff --git a/lib/rgfa/alignment.rb b/lib/rgfa/alignment.rb deleted file mode 100644 index 424b614..0000000 --- a/lib/rgfa/alignment.rb +++ /dev/null @@ -1,103 +0,0 @@ -RGFA::Alignment = Module.new - -require_relative "error" -require_relative "alignment/placeholder" -require_relative "alignment/cigar" -require_relative "alignment/trace" - -# @tested_in api_alignment -class String - # Parses an alignment field - # @param version [Symbol] if :gfa2, then CIGARs and placeholders - # are considered valid; if :gfa1, CIGARs (limited to MIDP), - # trace alignments and placeholders - # @return [RGFA::Alignment::CIGAR, RGFA::Alignment::Trace, - # RGFA::Alignment::Placeholder] - # @raise [RGFA::FormatError] if the content of the - # field cannot be parsed - # @param valid [Boolean] (defaults to: +false+) if +true+, - # the string is guaranteed to be valid - # @raise [RGFA::VersionError] if a wrong version is provided - def to_alignment(version: :gfa2, valid: false) - if ![:gfa1, :gfa2].include?(version) - raise RGFA::VersionError, "Version unknown: #{version}" - end - first = true - each_char do |char| - if first - if char =~ /\d/ - first = false - next - elsif placeholder? - return RGFA::Alignment::Placeholder.new - end - else - if char =~ /\d/ - next - elsif char == "," - if version == :gfa2 - t = self.to_trace - t.validate if !valid - return t - else - raise RGFA::FormatError, - "Trace alignments are not allowed in GFA1: #{self.inspect}" - end - elsif char =~ /[MIDP]/ or (char =~ /[=XSHN]/ and version == :gfa1) - return self.to_cigar(valid: valid, version: version) - end - end - break - end - raise RGFA::FormatError, - "Alignment field contains invalid data: #{self.inspect}" - end -end - -# @tested_in unit_alignment -class Array - - # @api private - module API_PRIVATE - - # Convert an array to an appropriate Alignment instance - # - # @param version [Symbol] if +:gfa2+, then CIGARs and placeholders - # are considered valid; if +:gfa1+, CIGARs (limited to MIDP), - # trace alignments and placeholders - # @param valid [Boolean] ignored, for compatibility - # - # @raise [RGFA::FormatError] if the content of the - # array cannot be interpreted as an alignment specification - # @raise [RGFA::VersionError] if a wrong version is provided - # - # @return [RGFA::Alignment::CIGAR, RGFA::Alignment::Trace, RGFA::Alignment::Placeholder] - # an empty array is converted to a placeholder, - # an array of CIGAR operations to a CIGAR instance, - # an array of Integers to a Trace instance - # - def to_alignment(version: :gfa1, valid: nil) - if ![:gfa1, :gfa2].include?(version) - raise RGFA::VersionError, "Version unknown: #{version}" - end - if self.empty? - return RGFA::Alignment::Placeholder.new - elsif self[0].kind_of?(Integer) - if version == :gfa2 - return RGFA::Alignment::Trace.new(self) - else - raise RGFA::VersionError, - "Trace alignments are not allowed in GFA1: #{self.inspect}" - end - elsif self[0].kind_of?(RGFA::Alignment::CIGAR::Operation) - return RGFA::Alignment::CIGAR.new(self) - else - raise RGFA::FormatError, - "Array does not represent a valid alignment: #{self.inspect}" - end - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/alignment/cigar.rb b/lib/rgfa/alignment/cigar.rb deleted file mode 100644 index d96cae7..0000000 --- a/lib/rgfa/alignment/cigar.rb +++ /dev/null @@ -1,309 +0,0 @@ -RGFA::Alignment ||= Module.new - -# Array of {RGFA::Alignment::CIGAR::Operation CIGAR operations}. -# Represents the contents of a CIGAR string. -# @tested_in api_alignment -class RGFA::Alignment::CIGAR < Array - - # Compute the CIGAR for the segments when these are switched. - # - # @example Computing the complement CIGAR - # - # "2M1D3M".to_alignment.complement.to_s - # # => "3M1I2M" - # - # # S1 + S2 + 2M1D3M - # # - # # S1+ ACGACTGTGA - # # S2+ CT-TGACGG - # # - # # S2- CCGTCA-AG - # # S1- TCACAGTCGT - # # - # # S2 - S1 - 3M1I2M - # - # @return [RGFA::Alignment::CIGAR] - def complement - RGFA::Alignment::CIGAR.new(clone.reverse.map do |op| - if op.code == :I or op.code == :S - op.code = :D - elsif op.code == :D or op.code == :N - op.code = :I - end - op - end) - end - - # String representation of the CIGAR - # @note no validation is performed, use #validate if required - # @return [String] CIGAR string - def to_s - placeholder? ? "*" : (map(&:to_s).join) - end - - # Validate the instance - # @param version [Symbol] (defaults to: +:gfa1+) if :gfa2, - # then only CIGAR codes M/I/D/P are allowed, if :gfa1 all CIGAR codes - # @raise [RGFA::ValueError] if a code is invalid or a length is negative - # @raise [RGFA::TypeError] if a length is not an Integer or - # the array contains anything which is not interpretable as a - # cigar operation - # @raise [RGFA::VersionError] if a wrong version is provided - # @return [void] - def validate(version: :gfa1) - if ![:gfa1, :gfa2].include?(version) - raise RGFA::VersionError, "Version unknown: #{version}" - end - any? do |op| - begin - op = op.to_cigar_operation - rescue - raise RGFA::TypeError, "Array contains elements which are "+ - "not CIGAR operations: #{self.inspect}" - end - op.validate(version: version) - end - end - - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - # @return [RGFA::Alignment::CIGAR] self - def to_alignment(valid: nil, version: :nil) - self - end - - # Lenght of the aligned substring on the reference sequence - # (+from+ sequence for GFA1 links/containments; - # +sid1+ sequence for GFA2 edges) - # @return [Integer] length of the aligned substring on the - # reference sequence - def length_on_reference - l = 0 - each do |op| - if [:M, :"=", :X, :D, :N].include?(op.code) - l += op.len - end - end - return l - end - - # Lenght of the aligned substring on the query sequence - # (+to+ sequence for GFA1 links/containments; - # +sid2+ sequence for GFA2 edges) - # @return [Integer] length of the aligned substring on the - # query sequence - def length_on_query - l = 0 - each do |op| - if [:M, :"=", :X, :I, :S].include?(op.code) - l += op.len - end - end - return l - end - - # Create a deep copy - # @return [RGFA::Alignment::CIGAR] - def clone - RGFA::Alignment::CIGAR.new(map{|x|x.clone}) - end - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - - # @return [RGFA::Alignment::CIGAR] self - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - def to_cigar(valid: nil, version: :nil) - self - end - - module ClassMethods - - # Parse a CIGAR string into an array of CIGAR operations. - # - # Each operation is represented by a {RGFA::Alignment::CIGAR::Operation}, - # i.e. a tuple of operation length and operation code. - # - # The operation code is one of MIDP for GFA2 or MIDPNSHX= for GFA1. - # The additional operations allowed in GFA1 have an unclear meaning - # in the context of GFA and should be avoided. - # - # @param version [Symbol] (defaults to: +gfa1+) if +:gfa2+, - # then only CIGAR codes M/I/D/P are allowed, if +:gfa1+ all CIGAR codes - # @param valid [Boolean] (defaults to: +false+) if +true+, - # the string is guaranteed to be valid - # @raise [RGFA::FormatError] if the string is not a valid CIGAR string - # @raise [RGFA::VersionError] if a wrong version is provided - # @return [RGFA::Alignment::CIGAR] - def from_string(str, valid: false, version: :gfa1) - a = RGFA::Alignment::CIGAR.new - unless valid - case version - when :gfa1 - if str !~ /^([0-9]+[MIDPNSHX=])+$/ - raise RGFA::FormatError, - "The string #{str} does not represent a valid CIGAR string" - end - when :gfa2 - if str !~ /^([0-9]+[MIDP])+$/ - raise RGFA::FormatError, - "The string #{str} does not represent a valid GFA2 CIGAR string" - end - else - raise RGFA::VersionError, "Version unknown: #{version}" - end - end - str.scan(/[0-9]+[MIDPNSHX=]/).each do |op| - len = op[0..-2].to_i - code = op[-1..-1].to_sym - a << RGFA::Alignment::CIGAR::Operation.new(len, code) - end - return a - end - - end - - end - include API_PRIVATE - extend API_PRIVATE::ClassMethods - -end - -# An operation in a CIGAR string -# @tested_in api_alignment -class RGFA::Alignment::CIGAR::Operation - - # @!attribute [rw] len - # @return [Integer > 0] operation length - attr_accessor :len - - # @!attribute [rw] code - # @return [RGFA::Alignment::CIGAR::Operation::CODE] operation code - attr_accessor :code - - # CIGAR operation code - CODE_GFA1_ONLY = [:S, :H, :N, :X, :"="] - CODE_GFA1_GFA2 = [:M, :I, :D, :P] - CODE = CODE_GFA1_ONLY + CODE_GFA1_GFA2 - - # @param len [Integer] length of the operation - # @param code [RGFA::Alignment::CIGAR::Operation::CODE] code of the operation - def initialize(len, code) - @len = len - @code = code - end - - # The string representation of the operation - # @note no validation is performed, use #validate if required - # @return [String] - def to_s - "#{len}#{code}" - end - - # Compare two operations - # @return [Boolean] - def ==(other) - other.len == len and other.code == code - end - - # Validate the operation - # @param version [Symbol] (defaults to: +:gfa1+) if :gfa2, - # then only CIGAR codes M/I/D/P are allowed, if :gfa1 all CIGAR codes - # @raise [RGFA::ValueError] if the code is invalid or the length is negative - # @raise [RGFA::TypeError] if the length is not an Integer - # @raise [RGFA::VersionError] if a wrong version is provided - # @return [void] - def validate(version: :gfa1) - if ![:gfa1, :gfa2].include?(version) - raise RGFA::VersionError, "Version unknown: #{version}" - end - begin - len = Integer(@len) - rescue - raise RGFA::TypeError, "CIGAR operation: #{self.inspect}\n"+ - "CIGAR length cannot be casted to Integer (class: #{len.class})" - end - begin - code = @code.to_sym - rescue - raise RGFA::TypeError, "CIGAR operation: #{self.inspect}\n"+ - "CIGAR code cannot be casted to symbol (class: #{code.class})" - end - if len < 0 - raise RGFA::ValueError, - "Length of CIGAR operation #{self} is invalid (#{len})" - elsif RGFA::Alignment::CIGAR::Operation::CODE_GFA1_ONLY.include?(code) - if version == :gfa2 - raise RGFA::ValueError, "CIGAR operation: #{self.inspect}\n"+ - "CIGAR code is not supported in GFA2: #{code}" - end - elsif !RGFA::Alignment::CIGAR::Operation::CODE_GFA1_GFA2.include?(code) - raise RGFA::ValueError, "CIGAR operation: #{self.inspect}\n"+ - "Invalid CIGAR code found: #{code}" - end - end - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - # @return [RGFA::Alignment::CIGAR::Operation] self - def to_cigar_operation - self - end - end - include API_PRIVATE - -end - -class Array - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - # Create a {RGFA::Alignment::CIGAR} instance from the content of the array. - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - # @return [RGFA::Alignment::CIGAR] - def to_cigar(valid: nil, version: nil) - RGFA::Alignment::CIGAR.new(self) - end - - # Create a {RGFA::Alignment::CIGAR::Operation} instance - # from the array content - # @return [RGFA::Alignment::CIGAR::Operation] - def to_cigar_operation - RGFA::Alignment::CIGAR::Operation.new(Integer(self[0]), self[1].to_sym) - end - end - include API_PRIVATE - -end - -class String - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - # Parse CIGAR string - # @return [RGFA::Alignment::CIGAR,RGFA::Alignment::Placeholder] - # CIGAR or Placeholder (if +*+) - # @param valid [Boolean] (defaults to: +false+) if +true+, - # the string is guaranteed to be valid - # @param version [Symbol] (defaults to: +:gfa1+) if :gfa2, - # then only CIGAR codes M/I/D/P are allowed, if :gfa1 all CIGAR codes - # @raise [RGFA::ValueError] if the string is not a valid CIGAR string - # @raise [RGFA::VersionError] if a wrong version is provided - def to_cigar(valid: false, version: :gfa1) - if placeholder? - return RGFA::Alignment::Placeholder.new - else - return RGFA::Alignment::CIGAR.from_string(self, valid: valid, - version: version) - end - end - end - include API_PRIVATE - -end diff --git a/lib/rgfa/alignment/placeholder.rb b/lib/rgfa/alignment/placeholder.rb deleted file mode 100644 index e0fa1b5..0000000 --- a/lib/rgfa/alignment/placeholder.rb +++ /dev/null @@ -1,43 +0,0 @@ -require_relative "../placeholder" - -RGFA::Alignment ||= Module.new - -# Placeholder for alignments fields -class RGFA::Alignment::Placeholder < RGFA::Placeholder - - # For compatibility with CIGAR#complement. - # @return [self] - def complement - self - end - - # For compatibility with the +to_alignment+ method of other classes - # (CIGAR, Trace, String, Array). - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - # @return [RGFA::Alignment::CIGAR] self - def to_alignment(valid: nil, version: :nil) - self - end - - # @api private - module API_PRIVATE - - # For compatibility with the +to_cigar+ method of other classes - # @return [RGFA::Alignment::Placeholder] self - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - def to_cigar(valid: nil, version: :nil) - self - end - - # For compatibility with the +to_trace+ method of other classes - # @return [RGFA::Alignment::Placeholder] self - def to_trace - self - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/alignment/trace.rb b/lib/rgfa/alignment/trace.rb deleted file mode 100644 index 1ab76cc..0000000 --- a/lib/rgfa/alignment/trace.rb +++ /dev/null @@ -1,111 +0,0 @@ -RGFA::Alignment ||= Module.new - -# Array of trace points. -# -# A trace is a list of integers, each giving the number of characters -# in the second segment to align to the next TS characters in the first -# segment. -# -# TS is either the default spacing given in the header line TS tag, -# or the the spacing given in the TS tag on the line of the edge. -# -# Note: a complement operation such as for CIGARs cannot be defined -# for a trace, without computing the alignment. -# -# @tested_in api_alignment -class RGFA::Alignment::Trace < Array - - # Validate the numeric array - # - # @param ts [Integer,nil] (defaults to: +nil+) trace spacing; if an - # integer is specified, it will be checked that all values are < +ts+; if - # +nil+, then this check is skipped - # - # @raise [RGFA::TypeError] if the array contains non-integer values - # @raise [RGFA::ValueError] if the array contains values < 0 or > +ts+ - # - # @return [void] - # - def validate(ts: nil) - each do |e| - if !e.kind_of?(Integer) - raise RGFA::TypeError, - "Trace contains non-integer values (#{e} found)\n"+ - "Content: #{inspect}" - end - if e < 0 - raise RGFA::ValueError, - "Trace contains value < 0 (#{e} found)\n"+ - "Content: #{inspect}" - end - if !ts.nil? and e > ts - raise RGFA::ValueError, - "Trace contains value > TS (#{e} found, TS=#{ts})\n"+ - "Content: #{inspect}" - end - end - end - - def to_s - placeholder? ? "*" : (each(&:to_s).join(",")) - end - - # @param valid [nil] ignored, for compatibility - # @param version [nil] ignored, for compatibility - # @return [RGFA::Alignment::CIGAR] self - def to_alignment(valid: nil, version: :nil) - self - end - - def complement - return RGFA::Alignment::Placeholder.new - end - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - - # @return [RGFA::Alignment::Trace] self - def to_trace - self - end - - module ClassMethods - - # @return [RGFA::Alignment::Trace] trace from trace string representation - # @raise [RGFA::FormatError] if after splitting by comma, some elements - # are not integers - def from_string(str) - begin - RGFA::Alignment::Trace.new(str.split(",").map{|i|Integer(i)}) - rescue - raise RGFA::FormatError, - "'#{str}' is not a valid string representing a trace" - end - end - - end - - end - include API_PRIVATE - extend API_PRIVATE::ClassMethods - -end - -class String - - # @api private - # @tested_in unit_alignment - module API_PRIVATE - - # Parse trace string - # @return [RGFA::Alignment::Trace] - # @raise [RGFA::FormatError] if the string is not a valid trace string - def to_trace - RGFA::Alignment::Trace.from_string(self) - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/byte_array.rb b/lib/rgfa/byte_array.rb deleted file mode 100644 index 5db7941..0000000 --- a/lib/rgfa/byte_array.rb +++ /dev/null @@ -1,93 +0,0 @@ -require_relative "error.rb" - -# -# Array of positive integers <= 255; -# representation of the data contained in a H field -# -# @tested_in api_tags -# -class RGFA::ByteArray < Array - - # Validates the byte array content - # @raise [RGFA::ValueError] if any value is not a - # positive integer <= 255 - # @return [void] - def validate - each do |x| - unless x.kind_of?(Integer) and (0..255).include?(x) - raise RGFA::ValueError, - "Value incompatible with byte array: #{x.inspect}\n"+ - "in array: #{self.inspect}" - end - end - return nil - end - - # Returns self - # @param valid [nil] ignored, for compatibility - # @return [RGFA::ByteArray] self - def to_byte_array(valid: nil) - self - end - - # GFA datatype H representation of the byte array - # @raise [RGFA::ValueError] if the - # array is not a valid byte array - # @param valid [Boolean] (defaults to: +false+) if +true+, - # the string is guaranteed to be valid - # @return [String] - def to_s(valid: false) - validate unless valid - map do |elem| - str = elem.to_s(16).upcase - elem < 16 ? "0#{str}" : str - end.join - end - - # @api private - # @tested_in internals_tag_datatype - module API_PRIVATE - # GFA tag datatype to use, if none is provided - # @return [RGFA::Field::TAG_DATATYPE] - def default_gfa_tag_datatype; :H; end - end - include API_PRIVATE - -end - -# Method to create a RGFA::ByteArray from an Array -# @tested_in api_tags -class Array - # Create a RGFA::ByteArray from an Array instance - # @param valid [nil] ignored, for compatibility - # @return [RGFA::ByteArray] the byte array - def to_byte_array(valid: nil) - RGFA::ByteArray.new(self) - end -end - -# Method to parse the string representation of a RGFA::ByteArray -# @tested_in api_tags -class String - # Convert a GFA string representation of a byte array to a byte array - # @return [RGFA::ByteArray] the byte array - # @param valid [Boolean] (defaults to: +false+) if +true+, - # the string is guaranteed to be valid - # @raise [RGFA::FormatError] if the string size is not > 0 - # and even - def to_byte_array(valid: false) - if !valid and ((size < 2) or (size % 2 == 1)) - raise RGFA::FormatError, - "Invalid byte array string #{self}; "+ - "each element must be represented by two letters [0-9A-F]" - end - scan(/..?/).map do |x| - begin - Integer(x,16) - rescue - raise RGFA::FormatError, - "Invalid element #{x} found in byte array string: #{self}" - end - end.to_byte_array - end -end diff --git a/lib/rgfa/error.rb b/lib/rgfa/error.rb deleted file mode 100644 index 0012802..0000000 --- a/lib/rgfa/error.rb +++ /dev/null @@ -1,45 +0,0 @@ -# Parent class for library-specific errors -class RGFA::Error < StandardError; end - -# unknown/wrong version of the specification -class RGFA::VersionError < RGFA::Error; end - -# the user tried to do something not allowed -class RGFA::RuntimeError < RGFA::Error; end - -# an object has the right type/form, but an invalid content -# e.g. number out-of-range; string/array too big/small; -# enum-like symbol not in allowed values list -class RGFA::ValueError < RGFA::Error; end - -# the format of an object is invalid -# e.g. a line contains too many/few fields; -# a tagname has the wrong format -class RGFA::FormatError < RGFA::Error; end - -# a wrong type has been used or specified; -# e.g. a field contains an array instead of an integer; -# an invalid record type or datatype is found by parsing -class RGFA::TypeError < RGFA::Error; end - -# the argument of a method has the wrong type -class RGFA::ArgumentError < RGFA::Error; end - -# an element which should have been unique is not unique -# e.g. a tag name is duplicated in a line; -# a duplicated record ID is found -class RGFA::NotUniqueError < RGFA::Error; end - -# contradictory information has been provided; -# e.g. GFA1 segment LN and sequence length differ; -# a GFA2-only record is added to a GFA1 file -class RGFA::InconsistencyError < RGFA::Error; end - -# an element which has been required is not found -# e.g. a tag! method has been used and the tag is not set; -# a record finder ! method does not find the record -class RGFA::NotFoundError < RGFA::Error; end - -# an assertion has failed; this should not happen -# an error of this kind indicates a bug -class RGFA::AssertionError < RGFA::Error; end diff --git a/lib/rgfa/field.rb b/lib/rgfa/field.rb deleted file mode 100644 index f5da8b7..0000000 --- a/lib/rgfa/field.rb +++ /dev/null @@ -1,395 +0,0 @@ -require "json" -require_relative "alignment" -require_relative "byte_array" -require_relative "error" -require_relative "field_array" -require_relative "line" -require_relative "numeric_array" - -RGFA::Field = Module.new - -require_relative "field/alignment_gfa1.rb" -require_relative "field/alignment_gfa2.rb" -require_relative "field/alignment_list_gfa1.rb" -require_relative "field/byte_array.rb" -require_relative "field/char.rb" -require_relative "field/comment.rb" -require_relative "field/custom_record_type.rb" -require_relative "field/float.rb" -require_relative "field/generic.rb" -require_relative "field/identifier_gfa2.rb" -require_relative "field/identifier_list_gfa2.rb" -require_relative "field/oriented_identifier_list_gfa1.rb" -require_relative "field/oriented_identifier_list_gfa2.rb" -require_relative "field/integer.rb" -require_relative "field/json.rb" -require_relative "field/numeric_array.rb" -require_relative "field/optional_identifier_gfa2.rb" -require_relative "field/optional_integer.rb" -require_relative "field/orientation.rb" -require_relative "field/oriented_identifier_gfa2.rb" -require_relative "field/path_name_gfa1.rb" -require_relative "field/position_gfa1.rb" -require_relative "field/position_gfa2.rb" -require_relative "field/segment_name_gfa1.rb" -require_relative "field/sequence_gfa1.rb" -require_relative "field/sequence_gfa2.rb" -require_relative "field/string.rb" - -# Decoding, validation and encoding of GFA fields. -# -# For each datatype a module under field/ exists, which defines -# the following methods as module functions: -# -# unsafe_decode => parses an ASSUMED VALID string representation to -# an appropriate Ruby object -# - faster or as fast as decode() -# - if the assumption is not met, sometimes it will -# raise an exception, sometimes it will return an -# invalid object -# -# decode => parses a string representation to an appropriate Ruby object -# - if the string is invalid, an exception is raised -# - the returned object is guaranteed to be valid -# -# validate_encoded => validates a string representation -# - raises RGFA::FormatError if invalid -# -# validate_decoded => validates a non-string field content -# - raises exception if its state is invalid -# -# unsafe_encode => encodes an ASSUMED VALID field to the string representation; -# - faster or as fast as encode() -# - if the assumption is not met, sometimes it will -# raise an exception, sometimes it will return an -# invalid string representation -# -# encode => encodes a field to its string representation; -# - raises an exception if the field content is invalid -# - the string representation is guaranteed to be valid; -# -# Everything in the RGFA::Field module is API private. The user will not call -# these methods directly, and use instead methods of RGFA::Line. -# Also: code in line.rb should not call the functions of the submodules -# defined in the field/* files directly, but rather call the functions of -# the submodules defined in this file, ie RGFA::Field::Parser, -# RGFA::Field::Validator, RGFA::Field::Writer. -# -# @api private -module RGFA::Field - - # Symbol representing a GFA1-specific datatype for positional fields - GFA1_POSFIELD_DATATYPE = [ - :alignment_gfa1, - :alignment_list_gfa1, - :oriented_identifier_list_gfa1, - :position_gfa1, - :segment_name_gfa1, - :sequence_gfa1, - :path_name_gfa1, - ] - - # Symbol representing a GFA2-specific datatype for positional fields - GFA2_POSFIELD_DATATYPE = [ - :alignment_gfa2, - :generic, - :identifier_gfa2, - :oriented_identifier_gfa2, - :identifier_list_gfa2, - :oriented_identifier_list_gfa2, - :optional_identifier_gfa2, - :position_gfa2, - :custom_record_type, - :sequence_gfa2, - :optional_integer, - ] - - # Symbol representing a datatype for positional fields common to GFA1 and GFA2 - GFAX_POSFIELD_DATATYPE = [:comment, :orientation] - - # Symbol representing a datatype for positional fields - POSFIELD_DATATYPE = GFA1_POSFIELD_DATATYPE + - GFA2_POSFIELD_DATATYPE + - GFAX_POSFIELD_DATATYPE - - # A symbol representing a datatype for tags - TAG_DATATYPE = [:A, :i, :f, :Z, :J, :H, :B] - - # A symbol representing a valid datatype - FIELD_DATATYPE = TAG_DATATYPE + POSFIELD_DATATYPE - - FIELD_MODULE = { - :alignment_gfa1 => RGFA::Field::AlignmentGFA1, - :alignment_gfa2 => RGFA::Field::AlignmentGFA2, - :alignment_list_gfa1 => RGFA::Field::AlignmentListGFA1, - :comment => RGFA::Field::Comment, - :custom_record_type => RGFA::Field::CustomRecordType, - :generic => RGFA::Field::Generic, - :identifier_gfa2 => RGFA::Field::IdentifierGFA2, - :identifier_list_gfa2 => RGFA::Field::IdentifierListGFA2, - :oriented_identifier_list_gfa1 => RGFA::Field::OrientedIdentifierListGFA1, - :oriented_identifier_list_gfa2 => RGFA::Field::OrientedIdentifierListGFA2, - :optional_identifier_gfa2 => RGFA::Field::OptionalIdentifierGFA2, - :oriented_identifier_gfa2 => RGFA::Field::OrientedIdentifierGFA2, - :optional_integer => RGFA::Field::OptionalInteger, - :orientation => RGFA::Field::Orientation, - :path_name_gfa1 => RGFA::Field::PathNameGFA1, - :position_gfa1 => RGFA::Field::PositionGFA1, - :position_gfa2 => RGFA::Field::PositionGFA2, - :segment_name_gfa1 => RGFA::Field::SegmentNameGFA1, - :sequence_gfa1 => RGFA::Field::SequenceGFA1, - :sequence_gfa2 => RGFA::Field::SequenceGFA2, - :H => RGFA::Field::ByteArray, - :A => RGFA::Field::Char, - :f => RGFA::Field::Float, - :i => RGFA::Field::Integer, - :J => RGFA::Field::JSON, - :B => RGFA::Field::NumericArray, - :Z => RGFA::Field::String, - } - - # Encoding of Ruby objects to GFA string representation - # @tested_in internals_field_writer - # @api private - module Writer - - # Encode a Ruby object into a GFA field. The ruby object can be - # either an encoded GFA field (in which case, at most it is validated, - # see +safe+, but not encoded) or an object of a class compatible - # with the specified datatype, if a datatype is specified (see +datatype+), - # e.g. Integer # for i fields. - # @param datatype [RGFA::Field::FIELD_DATATYPE] datatype to use. If no - # datatype is specified, any class will do and the default datatype - # will be chosen (see RGFA::DefaultDatatype module). - # @param fieldname [String] fieldname, for error messages (optional) - # @param safe [Boolean] (defaults to: +true+) if +true+, the safe - # version of the encode function is used, which guarantees that the - # resulting data is valid; if +false+, the unsafe version is used, - # which, for some datatypes, skips validations in order to be faster - # than the safe version - # @raise [RGFA::TypeError] if an unknown datatype is specified - # @raise [RGFA::ValueError] if the object value is invalid for the datatype - # @raise [RGFA::FormatError] if the object syntax is invalid for the - # datatype (eg for invalid encoded strings, if +safe+ is set) - # @raise [RGFA::TypeError] if the type of the object and the datatype - # are not compatible - def to_gfa_field(datatype: nil, safe: true, fieldname: nil) - datatype ||= default_gfa_tag_datatype - mod = RGFA::Field::FIELD_MODULE[datatype] - if mod.nil? - fieldnamemsg = fieldname ? "Field: #{fieldname}\n" : "" - contentmsg = "Content: #{self.inspect}\n" - raise RGFA::TypeError, - fieldnamemsg + - contentmsg + - "Datatype unknown: #{datatype.inspect}" - end - begin - if safe - mod.encode(self) - else - mod.unsafe_encode(self) - end - rescue => err - fieldnamemsg = fieldname ? "Field: #{fieldname}\n" : "" - contentmsg = "Content: #{self.inspect}\n" - datatypemsg = "Datatype: #{datatype}\n" - raise err.class, - fieldnamemsg + - datatypemsg + - contentmsg + - err.message - end - end - - # Representation of the data as a GFA tag +xx:d:content+, where +xx+ is - # the tag name and +d+ is the datatype. - # @param fieldname [Symbol] the tag name - # @param datatype [RGFA::Field::TAG_DATATYPE] (defaults to: the value - # returned by {#default_gfa_tag_datatype}) - def to_gfa_tag(fieldname, datatype: default_gfa_tag_datatype) - return "#{fieldname}:#{datatype}:"+ - "#{to_gfa_field(datatype: datatype, fieldname: fieldname)}" - end - end - - # Decoding of the GFA string representations into Ruby objects - # @tested_in internals_field_parser - # @api private - module Parser - - # Parse a GFA string representation and decodes it into a Ruby object - # @param datatype [RGFA::Field::FIELD_DATATYPE] the datatype to use - # @param safe [Boolean] (defaults to: +true+) if +true+ the safe - # version of the decode function for the datatype is used, which - # validates the content of the string; if +false+, the string is - # assumed to be valid and decoded into a value accordingly, which may - # result in invalid values (but may be faster than the safe decoding) - # @param fieldname [String] fieldname, for error messages (optional) - # @param line [#to_s] line content, for error messages (optional) - # @raise [RGFA::TypeError] if the specified datatype is unknown - # @raise [RGFA::FormatError] if the string syntax is not valid - # @raise [RGFA::ValueError] if the decoded value is not valid - def parse_gfa_field(datatype, - safe: true, - fieldname: nil, - line: nil) - mod = RGFA::Field::FIELD_MODULE[datatype] - if mod.nil? - begin - linemsg = line ? "Line content: #{line.to_s}\n" : "" - rescue - linemsg = "" - end - fieldnamemsg = fieldname ? "Field: #{fieldname}\n" : "" - contentmsg = "Content: #{self}\n" - raise RGFA::TypeError, - linemsg + - fieldnamemsg + - contentmsg + - "Datatype unknown: #{datatype.inspect}" - end - begin - if safe - mod.decode(self) - else - mod.unsafe_decode(self) - end - rescue => err - begin - linemsg = line ? "Line content: #{line.to_s}\n" : "" - rescue - linemsg = "" - end - fieldnamemsg = fieldname ? "Field: #{fieldname}\n" : "" - contentmsg = "Content: #{self}\n" - datatypemsg = "Datatype: #{datatype}\n" - raise err.class, - linemsg + - fieldnamemsg + - datatypemsg + - contentmsg + - err.message - end - end - - # Parses a GFA tag in the form +xx:d:content+ into its components. - # The +content+ is not decoded (see #parse_gfa_field). - # @raise [RGFA::FormatError] if the string does not represent - # a valid GFA tag - # @return [Array(Symbol, RGFA::Field::FIELD_DATATYPE, String)] - # the parsed content of the field - def parse_gfa_tag - if self =~ /^([A-Za-z][A-Za-z0-9]):([AifZJHB]):(.+)$/ - return $1.to_sym, $2.to_sym, $3 - else - raise RGFA::FormatError, - "Expected GFA tag, found: #{self.inspect}" - end - end - - end - - # Validates the content of a GFA field, which can be a GFA string - # representation or a Ruby object, according to the field datatype. - # @tested_in internals_field_validator - # @api private - module Validator - - # Validates a GFA string representation according to the field datatype. - # @!macro [new] validate_gfa_field - # @raise [RGFA::TypeError] if an unknown datatype is specified - # @param datatype [RGFA::Field::FIELD_DATATYPE] the datatype to use - # @param fieldname [String] fieldname, for error messages (optional) - # @raise [RGFA::FormatError] if the object type or content - # is not compatible to the provided datatype - # @return [void] - # @api private - module Encoded - def validate_gfa_field(datatype, fieldname=nil) - mod = RGFA::Field::FIELD_MODULE[datatype] - if mod.nil? - raise RGFA::TypeError, - "Datatype unknown: #{datatype.inspect}" - end - mod::validate_encoded(self) - end - end - - # Validates a non-string Ruby object field content - # according to the field datatype. - # @!macro validate_gfa_field - # @api private - module Decoded - def validate_gfa_field(datatype, fieldname=nil) - mod = RGFA::Field::FIELD_MODULE[datatype] - if mod.nil? - raise RGFA::TypeError, - "Datatype unknown: #{datatype.inspect}" - end - mod::validate_decoded(self) - end - end - - end - -end - -class Object - include RGFA::Field::Writer - include RGFA::Field::Validator::Decoded -end - -class String - include RGFA::Field::Parser - include RGFA::Field::Validator::Encoded -end - -# -# This module specifies default datatypes for GFA tags -# for the core classes. -# -# Custom classes shall define the default_gfa_tag_datatype -# function in their class definition and not here. -# -# @tested_in internals_tag_datatype -# @api private -module RGFA::DefaultDatatypes - - module Object - # @!macro [new] gfa_datatype - # GFA tag datatype to use, if none is provided - # @return [RGFA::Field::TAG_DATATYPE] - def default_gfa_tag_datatype; :Z; end - end - - module Fixnum - # @!macro gfa_datatype - def default_gfa_tag_datatype; :i; end - end - - module Float - # @!macro gfa_datatype - def default_gfa_tag_datatype; :f; end - end - - module Hash - # @!macro gfa_datatype - def default_gfa_tag_datatype; :J; end - end - - module Array - # @!macro gfa_datatype - def default_gfa_tag_datatype - (!empty? and - (all?{|i|i.kind_of?(Integer)} or all?{|i|i.kind_of?(Float)})) ? :B : :J - end - end - -end - -class Object; include RGFA::DefaultDatatypes::Object; end -class Fixnum; include RGFA::DefaultDatatypes::Fixnum; end -class Float; include RGFA::DefaultDatatypes::Float; end -class Hash; include RGFA::DefaultDatatypes::Hash; end -class Array; include RGFA::DefaultDatatypes::Array; end diff --git a/lib/rgfa/field/alignment_gfa1.rb b/lib/rgfa/field/alignment_gfa1.rb deleted file mode 100644 index da79baf..0000000 --- a/lib/rgfa/field/alignment_gfa1.rb +++ /dev/null @@ -1,61 +0,0 @@ -module RGFA::Field::AlignmentGFA1 - - def decode(string) - string.to_cigar(valid: false, version: :gfa1) - end - - def unsafe_decode(string) - string.to_cigar(valid: true, version: :gfa1) - end - - def validate_encoded(string) - if string !~ /^(\*|([0-9]+[MIDNSHPX=])+)$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA1 alignment\n"+ - "(it is not * and is not a CIGAR string (([0-9]+[MIDNSHPX=])+)" - end - end - - def validate_decoded(object) - case object - when RGFA::Alignment::CIGAR - object.validate - when RGFA::Alignment::Placeholder - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::Alignment::CIGAR,"+ - "RGFA::Alignment::Placeholder)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when RGFA::Alignment::CIGAR - object.validate - return object.to_s - when RGFA::Alignment::Placeholder - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, "+ - "RGFA::Alignment::CIGAR, RGFA::Alignment::Placeholder)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/alignment_gfa2.rb b/lib/rgfa/field/alignment_gfa2.rb deleted file mode 100644 index 71813d2..0000000 --- a/lib/rgfa/field/alignment_gfa2.rb +++ /dev/null @@ -1,47 +0,0 @@ -module RGFA::Field::AlignmentGFA2 - - def unsafe_decode(string) - string.to_alignment(version: :gfa2, valid: true) - end - - def decode(string) - string.to_alignment(version: :gfa2, valid: false) - end - - alias_method :validate_encoded, :decode - - def validate_decoded(alignment) - alignment.validate - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when RGFA::Alignment::CIGAR, RGFA::Alignment::Trace - object.validate - return object.to_s - when RGFA::Alignment::Placeholder - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: "+ - "String, RGFA::Alignment::CIGAR, RGFA::Alignment::Trace, "+ - "RGFA::Alignment::Placeholder)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/alignment_list_gfa1.rb b/lib/rgfa/field/alignment_list_gfa1.rb deleted file mode 100644 index 5baf176..0000000 --- a/lib/rgfa/field/alignment_list_gfa1.rb +++ /dev/null @@ -1,71 +0,0 @@ -module RGFA::Field::AlignmentListGFA1 - - def unsafe_decode(string) - string.split(",").map {|c| c.to_cigar(valid: true, version: :gfa1)} - end - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - - def validate_encoded(string) - if string !~ /^(\*|(([0-9]+[MIDNSHPX=])+))(,(\*|(([0-9]+[MIDNSHPX=])+)))*$/ - raise RGFA::FormatError, - "#{string.inspect} is not a comma separated list of * or CIGARs\n"+ - "(CIGAR strings must match ([0-9]+[MIDNSHPX=])+)" - end - end - - def validate_decoded(object) - case object - when RGFA::Placeholder - when Array - object.map do |elem| - elem.to_cigar(version: :gfa1) - end.each(&:validate) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array, RGFA::Placeholder)" - end - end - - def unsafe_encode(object) - case object - when RGFA::Placeholder - object.to_s - when Array - object.map{|cig|cig.to_cigar.to_s}.join(",") - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array, RGFA::Placeholder)" - end - end - - def encode(object) - case object - when RGFA::Placeholder - object.to_s - when Array - object.map do |cig| - cig = cig.to_cigar - cig.validate - cig.to_s - end.join(",") - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array, RGFA::Placeholder)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/byte_array.rb b/lib/rgfa/field/byte_array.rb deleted file mode 100644 index 317201e..0000000 --- a/lib/rgfa/field/byte_array.rb +++ /dev/null @@ -1,61 +0,0 @@ -module RGFA::Field::ByteArray - - def unsafe_decode(string) - string.to_byte_array(valid: true) - end - - def decode(string) - string.to_byte_array - end - - def validate_encoded(string) - if string !~ /^[0-9A-F]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid hex string\n"+ - "(it does not match the regular expression [0-9A-F]+)" - end - end - - def validate_decoded(byte_array) - byte_array.validate - end - - def unsafe_encode(object) - case object - when RGFA::ByteArray - object.to_s(valid: true) - when Array - object.to_byte_array.to_s(valid: true) - when String - object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::ByteArray, Array)" - end - end - - def encode(object) - case object - when RGFA::ByteArray - object.to_s - when Array - object.to_byte_array.to_s - when String - validate_encoded(object) - object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::ByteArray, Array)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/char.rb b/lib/rgfa/field/char.rb deleted file mode 100644 index 3a4b4bb..0000000 --- a/lib/rgfa/field/char.rb +++ /dev/null @@ -1,44 +0,0 @@ -module RGFA::Field::Char - - def decode(string) - validate_encoded(string) - string - end - - alias_method :unsafe_decode, :decode - - def validate_encoded(string) - if string !~ /^[!-~]$/ - raise RGFA::FormatError, - "#{string.inspect} is not a single printable character string" - end - end - - alias_method :validate_decoded, :validate_encoded - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - when Symbol - object = object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol)" - end - validate_encoded(object) - return object - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/comment.rb b/lib/rgfa/field/comment.rb deleted file mode 100644 index caf9ab3..0000000 --- a/lib/rgfa/field/comment.rb +++ /dev/null @@ -1,44 +0,0 @@ -module RGFA::Field::Comment - - def unsafe_decode(string) - string - end - - def decode(string) - validate_encoded(string) - string - end - - def validate_encoded(string) - if string.index("\n") - raise RGFA::FormatError - "#{string.inspect} is not a single-line string" - end - end - - alias_method :validate_decoded, :validate_encoded - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/custom_record_type.rb b/lib/rgfa/field/custom_record_type.rb deleted file mode 100644 index 2fe0a08..0000000 --- a/lib/rgfa/field/custom_record_type.rb +++ /dev/null @@ -1,49 +0,0 @@ -module RGFA::Field::CustomRecordType - - def unsafe_decode(string) - string.to_sym - end - - def decode(string) - validate_encoded(string) - string.to_sym - end - - def validate_encoded(string) - if string !~ /^[!-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid custom record type\n"+ - "(it contains spaces and/or non-printable characters)" - elsif [:E, :G, :F, :O, :U, :H, :"#", :S].include?(string.to_sym) - raise RGFA::FormatError, - "#{string.inspect} is not a valid custom record type\n"+ - "(it is a predefined GFA2 record type)" - end - end - - alias_method :validate_decoded, :validate_encoded - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String, Symbol - validate_encoded(object) - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/float.rb b/lib/rgfa/field/float.rb deleted file mode 100644 index 23f7d88..0000000 --- a/lib/rgfa/field/float.rb +++ /dev/null @@ -1,52 +0,0 @@ -module RGFA::Field::Float - - def decode(string) - Float(string) rescue raise RGFA::FormatError - end - - alias_method :unsafe_decode, :decode - - def validate_decoded(object) - case object - when Integer, Float - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Integer)" - end - end - - def validate_encoded(string) - if string !~ /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/ - raise RGFA::FormatError, - "#{string.inspect} does not represent a valid float\n"+ - "(it does not match [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Integer, Float - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Integer, Float)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/generic.rb b/lib/rgfa/field/generic.rb deleted file mode 100644 index abed09b..0000000 --- a/lib/rgfa/field/generic.rb +++ /dev/null @@ -1,45 +0,0 @@ -module RGFA::Field::Generic - - def unsafe_decode(string) - string - end - - def decode(string) - validate_encoded(string) - string - end - - def validate_encoded(string) - if string.index("\n") or string.index("\t") - raise RGFA::FormatError, - "#{string.inspect} is not a valid field content\n"+ - "(it contains newlines and/or tabs)" - end - end - - alias_method :validate_decoded, :validate_encoded - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/identifier_gfa2.rb b/lib/rgfa/field/identifier_gfa2.rb deleted file mode 100644 index 1646413..0000000 --- a/lib/rgfa/field/identifier_gfa2.rb +++ /dev/null @@ -1,61 +0,0 @@ -module RGFA::Field::IdentifierGFA2 - - def unsafe_decode(string) - string.to_sym - end - - def decode(string) - validate_encoded(string) - string.to_sym - end - - def validate_encoded(string) - if string !~ /^[!-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA2 identifier\n"+ - "(it contains spaces or non-printable characters)" - end - end - - def validate_decoded(object) - case object - when RGFA::Line - validate_encoded(object.name) - when String, Symbol - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol, RGFA::Line)" - end - end - - def unsafe_encode(object) - case object - when String - return object - when Symbol - return object.to_s - when RGFA::Line - return object.name.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol, RGFA::Line)" - end - end - - def encode(object) - string = unsafe_encode(object) - validate_encoded(string) - return string - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/identifier_list_gfa2.rb b/lib/rgfa/field/identifier_list_gfa2.rb deleted file mode 100644 index 06eea6d..0000000 --- a/lib/rgfa/field/identifier_list_gfa2.rb +++ /dev/null @@ -1,85 +0,0 @@ -module RGFA::Field::IdentifierListGFA2 - - def unsafe_decode(string) - string.split(" ").map(&:to_sym) - end - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - - def validate_encoded(string) - if string !~ /^[ !-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid list of GFA2 identifier\n"+ - "(it contains non-printable characters)" - end - end - - def validate_decoded(object) - case object - when Array - object.each do |elem| - elem = case elem - when String - elem - when Symbol - elem.to_s - when RGFA::Line - elem.name.to_s - else - raise RGFA::TypeError, - "the array contains an object of class #{elem.class}\n"+ - "(accepted classes: String, Symbol, RGFA::Line)" - end - if elem !~ /^[!-~]+$/ - raise RGFA::FormatError, - "the list contains an invalid GFA2 identifier (#{string.inspect})\n"+ - "(it contains spaces and/or non-printable characters)" - end - end - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array)" - end - end - - def unsafe_encode(object) - case object - when Array - object.map do |elem| - case elem - when String, Symbol - elem.to_s - when RGFA::Line - elem.name.to_s - else - raise RGFA::TypeError, - "the array contains an object of class #{elem.class}\n"+ - "(accepted classes: String, Symbol, RGFA::Line)" - end - end.join(" ") - when String - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array, String)" - end - end - - def encode(object) - validate_decoded(object) - return unsafe_encode(object) - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/integer.rb b/lib/rgfa/field/integer.rb deleted file mode 100644 index ba08ba1..0000000 --- a/lib/rgfa/field/integer.rb +++ /dev/null @@ -1,53 +0,0 @@ -module RGFA::Field::Integer - - def decode(string) - Integer(string) rescue raise RGFA::FormatError, - "the string does not represent a valid integer" - end - - alias_method :unsafe_decode, :decode - - def validate_decoded(object) - case object - when Integer - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Integer)" - end - end - - def validate_encoded(string) - if string !~ /^[-+]?[0-9]+$/ - raise RGFA::FormatError, - "#{string.inspect} does not represent a valid integer\n"+ - "(it does not match the regular expression [-+]?[0-9]+)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Integer - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Integer, String)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/json.rb b/lib/rgfa/field/json.rb deleted file mode 100644 index 1cda16a..0000000 --- a/lib/rgfa/field/json.rb +++ /dev/null @@ -1,77 +0,0 @@ -require "json" -module RGFA::Field::JSON - - def unsafe_decode(string) - JSON.parse(string) - end - - def decode(string) - validate_all_printable(string) - unsafe_decode(string) - end - - def validate_encoded(string) - # both regex and JSON parse are necessary, - # because string can be invalid JSON and - # JSON can contain forbidden chars (non-printable) - validate_all_printable(string) - begin - JSON.parse(string) - rescue => err - "#{string.inspect} is not a valid JSON string\n"+ - "JSON.parse raised a #{err.class} exception\n"+ - "error message: #{err.message}" - end - end - - def validate_decoded(object) - case object - when RGFA::FieldArray - object.validate - when Array, Hash - string = encode(object) - validate_all_printable(string) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Array, Hash)" - end - end - - def unsafe_encode(object) - object.to_json - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Array, Hash - string = object.to_json - validate_all_printable(string) - return string - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Array, Hash)" - end - end - - def validate_all_printable(string) - if string !~ /^[ !-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid JSON field\n"+ - "(it contains newlines, tabs and/or non-printable characters)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - module_function :validate_all_printable - -end diff --git a/lib/rgfa/field/numeric_array.rb b/lib/rgfa/field/numeric_array.rb deleted file mode 100644 index ef5de5f..0000000 --- a/lib/rgfa/field/numeric_array.rb +++ /dev/null @@ -1,62 +0,0 @@ -module RGFA::Field::NumericArray - - def unsafe_decode(string) - string.to_numeric_array(validate: false) - end - - def decode(string) - string.to_numeric_array - end - - def validate_encoded(string) - if string !~ /^(f(,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+|[CSI](,\+?[0-9]+)+|[csi](,[-+]?[0-9]+)+)$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid numeric array string\n"+ - "(it must be one of [fcsiCSI] followed by a comma-separated list of:"+ - " for f: floats; for csi: signed integers; for CSI: unsigned integers)" - end - end - - def validate_decoded(numeric_array) - numeric_array.validate - end - - def unsafe_encode(object) - case object - when RGFA::NumericArray - object.to_s - when Array - object.to_numeric_array.to_s - when String - object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::NumericArray, Array)" - end - end - - def encode(object) - case object - when RGFA::NumericArray - object.to_s - when Array - object.to_numeric_array.to_s - when String - validate_encoded(object) - object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::NumericArray, Array)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/optional_identifier_gfa2.rb b/lib/rgfa/field/optional_identifier_gfa2.rb deleted file mode 100644 index 1d088f0..0000000 --- a/lib/rgfa/field/optional_identifier_gfa2.rb +++ /dev/null @@ -1,82 +0,0 @@ -module RGFA::Field::OptionalIdentifierGFA2 - - def unsafe_decode(string) - if string.placeholder? - return RGFA::Placeholder.new - else - return string.to_sym - end - end - - def decode(string) - if string.placeholder? - return RGFA::Placeholder.new - else - validate_encoded(string) - return string.to_sym - end - end - - def validate_encoded(string) - if string !~ /^[!-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA2 optional identifier\n"+ - "(it contains spaces or non-printable characters)" - end - end - - def validate_decoded(object) - case object - when RGFA::Placeholder - when RGFA::Line - validate_encoded(object.name) - when String, Symbol - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol, RGFA::Line, RGFA::Placeholder)" - end - end - - def unsafe_encode(object) - case object - when String - return object - when Symbol, RGFA::Placeholder - return object.to_s - when RGFA::Line - return object.name.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol, RGFA::Line, RGFA::Placeholder)" - end - end - - def encode(object) - case object - when RGFA::Placeholder - return object.to_s - when String - when Symbol - object = object.to_s - when RGFA::Line - object = object.name.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol, RGFA::Line, RGFA::Placeholder)" - end - validate_encoded(object) - return object - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/optional_integer.rb b/lib/rgfa/field/optional_integer.rb deleted file mode 100644 index 0a1bc01..0000000 --- a/lib/rgfa/field/optional_integer.rb +++ /dev/null @@ -1,58 +0,0 @@ -module RGFA::Field::OptionalInteger - - def decode(string) - if string.placeholder? - return RGFA::Placeholder.new - else - Integer(string) rescue raise RGFA::FormatError, - "the string does not represent a valid integer" - end - end - - alias_method :unsafe_decode, :decode - - def validate_decoded(object) - case object - when Integer, RGFA::Placeholder - # always valid - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Integer, RGFA::Placeholder)" - end - end - - def validate_encoded(string) - if string !~ /^(\*|[-+]?[0-9]+)$/ - raise RGFA::FormatError, - "#{string.inspect} does not represent a valid optional integer value\n"+ - "(it is not * and does not match the regular expression [-+]?[0-9]+)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Integer, RGFA::Placeholder - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Integer, RGFA::Placeholder, String)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/orientation.rb b/lib/rgfa/field/orientation.rb deleted file mode 100644 index 3c2d0bd..0000000 --- a/lib/rgfa/field/orientation.rb +++ /dev/null @@ -1,57 +0,0 @@ -module RGFA::Field::Orientation - - def unsafe_decode(string) - string.to_sym - end - - def decode(string) - s = string.to_sym - validate_decoded(s) - return s - end - - def validate_decoded(symbol) - if symbol != :+ and symbol != :- - raise RGFA::FormatError, - "#{symbol.inspect} is not a valid orientation\n"+ - "(it must be + or -)" - end - return symbol - end - - def validate_encoded(string) - if string != "+" and string != "-" - raise RGFA::FormatError, - "#{string.inspect} is not a valid orientation\n"+ - "(it must be + or -)" - end - return string - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Symbol - validate_decoded(object) - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/oriented_identifier_gfa2.rb b/lib/rgfa/field/oriented_identifier_gfa2.rb deleted file mode 100644 index 9ae9b07..0000000 --- a/lib/rgfa/field/oriented_identifier_gfa2.rb +++ /dev/null @@ -1,65 +0,0 @@ -module RGFA::Field::OrientedIdentifierGFA2 - - def unsafe_decode(string) - string.to_oriented_line - end - - def decode(string) - object = unsafe_decode(string) - validate_decoded(object) - return object - end - - def validate_encoded(string) - if string !~ /^[!-~]+[+-]$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA2 oriented identifier\n"+ - "(it contains spaces or non-printable characters,"+ - "or does not end with an orientation symbol)" - end - end - - def validate_decoded(object) - case object - when RGFA::OrientedLine - if object.name !~ /^[!-~]+$/ - raise RGFA::ValueError, - "#{object.inspect} is not a valid GFA2 oriented identifier\n"+ - "(segment name contains spaces or non-printable characters)" - elsif ![:+,:-].include?(object.orient) - raise RGFA::ValueError, - "#{object.inspect} is not a valid GFA2 oriented identifier\n"+ - "(orientation is invalid)" - end - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: RGFA::OrientedLine)" - end - end - - def unsafe_encode(object) - case object - when String, RGFA::OrientedLine - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::OrientedLine)" - end - end - - def encode(object) - object.kind_of?(String) ? - validate_encoded(object) : validate_decoded(object) - return object.to_s - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/oriented_identifier_list_gfa1.rb b/lib/rgfa/field/oriented_identifier_list_gfa1.rb deleted file mode 100644 index ccba997..0000000 --- a/lib/rgfa/field/oriented_identifier_list_gfa1.rb +++ /dev/null @@ -1,79 +0,0 @@ -module RGFA::Field::OrientedIdentifierListGFA1 - - def unsafe_decode(string) - string.split(",").map do |l| - OL[l[0..-2].to_sym, l[-1].to_sym] - end - end - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - - def validate_encoded(string) - if string !~ /^[!-)+-<>-~][!-~]*[+-](,[!-)+-<>-~][!-~]*[+-])+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid list of GFA1 segment names "+ - "and orientations\n"+ - "(the segment names must match [!-)+-<>-~][!-~]*;\n"+ - " the orientations must be + or -;\n"+ - " the list must be comma-separated "+ - "NameOrient,NameOrient[,NameOrient...])" - end - end - - def validate_decoded(array) - array.each do |elem| - if !elem.kind_of?(RGFA::OrientedLine) - raise RGFA::TypeError, - "an element of the list is not a RGFA::OrientedLine\n"+ - "Element class: #{elem.class}\n"+ - "Element: #{elem}\n"+ - "List: #{array}" - end - elem.validate - if elem.name !~ /^[!-)+-<>-~][!-~]*$/ - raise RGFA::FormatError, - "#{elem.name} is not a valid GFA1 segment name\n"+ - "(it does not match [!-)+-<>-~][!-~]*)" - end - end - end - - def unsafe_encode(object) - case object - when String - return object - when Array - return object.map{|os|os.to_s}.join(",") - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Array)" - end - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Array - validate_decoded(object) - return object.map{|os|os.to_s}.join(",") - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Array)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/oriented_identifier_list_gfa2.rb b/lib/rgfa/field/oriented_identifier_list_gfa2.rb deleted file mode 100644 index 13c1da6..0000000 --- a/lib/rgfa/field/oriented_identifier_list_gfa2.rb +++ /dev/null @@ -1,83 +0,0 @@ -module RGFA::Field::OrientedIdentifierListGFA2 - - def unsafe_decode(string) - string.split(" ").map do |item| - OL[item[0..-2].to_sym, item[-1].to_sym] - end - end - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - - def validate_encoded(string) - if string !~ /^[!-~]+[+-]( [!-~]+[+-])*$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid list of oriented GFA2 identifier\n"+ - "(it contains non-printable characters or invalid orientations)" - end - end - - def validate_decoded(object) - case object - when Array - object.each do |elem| - if !elem.kind_of?(RGFA::OrientedLine) - raise RGFA::TypeError, - "the array contains an object of class #{elem.class}\n"+ - "(accepted classes: RGFA::OrientedLine)" - end - if elem.name !~ /^[!-~]+$/ - raise RGFA::FormatError, - "the list contains an invalid GFA2 identifier (#{elem.name})\n"+ - "(it contains spaces and/or non-printable characters)" - end - if ![:+,:-].include?(elem.orient) - raise RGFA::ValueError, - "#{elem} is not a valid GFA2 oriented identifier\n"+ - "(orientation #{elem.orient} is invalid)" - end - end - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array)" - end - end - - def unsafe_encode(object) - case object - when Array - object.map do |elem| - case elem - when RGFA::OrientedLine - elem.to_s - else - raise RGFA::TypeError, - "the array contains an object of class #{elem.class}\n"+ - "(accepted classes: RGFA::OrientedLine)" - end - end.join(" ") - when String - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Array, String)" - end - end - - def encode(object) - validate_decoded(object) - return unsafe_encode(object) - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/path_name_gfa1.rb b/lib/rgfa/field/path_name_gfa1.rb deleted file mode 100644 index b4d4c28..0000000 --- a/lib/rgfa/field/path_name_gfa1.rb +++ /dev/null @@ -1,57 +0,0 @@ -module RGFA::Field::PathNameGFA1 - - def unsafe_decode(string) - string.to_sym - end - - def decode(string) - validate_encoded(string) - string.to_sym - end - - def validate_encoded(string) - if string !~ /^[!-)+-<>-~][!-~]*$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA1 path name\n"+ - "(it does not match the regular expression [!-)+-<>-~][!-~]*" - end - end - - def validate_decoded(object) - case object - when String, Symbol - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Symbol, String)" - end - end - - def unsafe_encode(object) - case object - when String - return object - when Symbol - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Symbol, String)" - end - end - - def encode(object) - string = unsafe_encode(object) - validate_encoded(string) - return string - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/position_gfa1.rb b/lib/rgfa/field/position_gfa1.rb deleted file mode 100644 index 9a43442..0000000 --- a/lib/rgfa/field/position_gfa1.rb +++ /dev/null @@ -1,58 +0,0 @@ -module RGFA::Field::PositionGFA1 - - def unsafe_decode(string) - begin - Integer(string) - rescue - raise RGFA::FormatError, - "#{string.inspect} does not represent a valid integer" - end - end - - def decode(string) - value = unsafe_decode(string) - validate_decoded(value) - return value - end - - def validate_decoded(integer) - if integer < 0 - raise RGFA::ValueError, - "#{integer} is not a positive integer" - end - end - - def validate_encoded(string) - if string =~ /^[0-9]+$/ - raise RGFA::FormatError, - "#{string.inspect} does not represent a valid unsigned integer" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - validate_encoded(object) - return object - when Integer - validate_decoded(object) - return object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Integer)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/position_gfa2.rb b/lib/rgfa/field/position_gfa2.rb deleted file mode 100644 index dbe93f0..0000000 --- a/lib/rgfa/field/position_gfa2.rb +++ /dev/null @@ -1,57 +0,0 @@ -module RGFA::Field::PositionGFA2 - - def unsafe_decode(string) - string.to_pos - end - - def decode(string) - position = unsafe_decode(string) - if position.value < 0 - raise RGFA::ValueError, - "#{position.value} is not a positive integer" - end - return position - end - - def validate_decoded(object) - case object - when Integer - if object < 0 - raise RGFA::ValueError, - "#{object} is not a positive integer" - end - when RGFA::LastPos - object.validate - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: RGFA::LastPos, Integer)" - end - end - - def validate_encoded(string) - if string =~ /^[0-9]+\$?$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA2 position\n"+ - "(it must be an unsigned integer eventually followed by a $)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - object.kind_of?(String) ? validate_encoded(object) - : validate_decoded(object) - object.to_s - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/segment_name_gfa1.rb b/lib/rgfa/field/segment_name_gfa1.rb deleted file mode 100644 index 05037c2..0000000 --- a/lib/rgfa/field/segment_name_gfa1.rb +++ /dev/null @@ -1,65 +0,0 @@ -module RGFA::Field::SegmentNameGFA1 - - def unsafe_decode(string) - string.to_sym - end - - def decode(string) - validate_encoded(string) - string.to_sym - end - - def validate_encoded(string) - if string !~ /^[!-)+-<>-~][!-~]*$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA1 segment name\n"+ - "(it does not match the regular expression [!-)+-<>-~][!-~]*" - elsif string =~ /[+-],/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA1 segment name\n"+ - "(it contains + or - followed by ,)" - end - end - - def validate_decoded(object) - case object - when RGFA::Line::Segment::GFA1 - validate_encoded(object.name) - when String, Symbol - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Symbol, String, RGFA::Line::Segment::GFA1)" - end - end - - def unsafe_encode(object) - case object - when String - return object - when Symbol - return object.to_s - when RGFA::Line::Segment::GFA1 - return object.name.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: Symbol, String, RGFA::Line::Segment::GFA1)" - end - end - - def encode(object) - string = unsafe_encode(object) - validate_encoded(string) - return string - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/sequence_gfa1.rb b/lib/rgfa/field/sequence_gfa1.rb deleted file mode 100644 index be7e8fc..0000000 --- a/lib/rgfa/field/sequence_gfa1.rb +++ /dev/null @@ -1,62 +0,0 @@ -module RGFA::Field::SequenceGFA1 - - def unsafe_decode(string) - if string.placeholder? - return RGFA::Placeholder.new - else - return string - end - end - - def decode(string) - object = unsafe_decode(string) - validate_decoded(object) - return object - end - - def validate_encoded(string) - if string !~ /^\*$|^[A-Za-z=.]+$/ - raise RGFA::FormatError, - "the string #{string.inspect} is not a valid GFA1 sequence\n"+ - "(it is not * and does not match the regular expression [A-Za-z=.]+" - end - end - - def validate_decoded(object) - case object - when RGFA::Placeholder - when String - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::Placeholder)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when RGFA::Placeholder - return object.to_s - when String - validate_encoded(object) - return object - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::Placeholder)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/sequence_gfa2.rb b/lib/rgfa/field/sequence_gfa2.rb deleted file mode 100644 index bd25355..0000000 --- a/lib/rgfa/field/sequence_gfa2.rb +++ /dev/null @@ -1,62 +0,0 @@ -module RGFA::Field::SequenceGFA2 - - def unsafe_decode(string) - if string.placeholder? - return RGFA::Placeholder.new - else - return string - end - end - - def decode(string) - object = unsafe_decode(string) - validate_decoded(object) - return object - end - - def validate_encoded(string) - if string !~ /^[!-~]+$/ - raise RGFA::FormatError, - "the string #{string.inspect} is not a valid GFA2 sequence\n"+ - "(it contains spaces and/or non-printable characters)" - end - end - - def validate_decoded(object) - case object - when RGFA::Placeholder - when String - validate_encoded(object) - else - raise RGFA::TypeError, - "the class is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::Placeholder)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when RGFA::Placeholder - return object.to_s - when String - validate_encoded(object) - return object - else - raise RGFA::TypeError, - "the class is incompatible with the datatype\n"+ - "(accepted classes: String, RGFA::Placeholder)" - end - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field/string.rb b/lib/rgfa/field/string.rb deleted file mode 100644 index 509a70d..0000000 --- a/lib/rgfa/field/string.rb +++ /dev/null @@ -1,58 +0,0 @@ -module RGFA::Field::String - - def decode(string) - validate_encoded(string) - string - end - - def unsafe_decode(string) - string - end - - def validate_encoded(string) - if string !~ /^[ !-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid string field\n"+ - "(it contains newlines/tabs and/or non-printable characters)" - end - end - - alias_method :validate_decoded, :validate_encoded - - def validate(object) - case object - when String, Symbol - validate_encoded(object) - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol)" - end - end - - def unsafe_encode(object) - object.to_s - end - - def encode(object) - case object - when String - when Symbol - object = object.to_s - else - raise RGFA::TypeError, - "the class #{object.class} is incompatible with the datatype\n"+ - "(accepted classes: String, Symbol)" - end - validate_encoded(object) - return object - end - - module_function :decode - module_function :unsafe_decode - module_function :validate_encoded - module_function :validate_decoded - module_function :unsafe_encode - module_function :encode - -end diff --git a/lib/rgfa/field_array.rb b/lib/rgfa/field_array.rb deleted file mode 100644 index f817dc8..0000000 --- a/lib/rgfa/field_array.rb +++ /dev/null @@ -1,106 +0,0 @@ -# Array representing multiple values of the same tag in different header lines -# @tested_in api_header -class RGFA::FieldArray < Array - attr_reader :datatype - - # @param datatype [RGFA::Field::TAG_DATATYPE] the datatype to use - def initialize(datatype, data = []) - @datatype = datatype - super(data) - end - - # Run the datatype-specific validation on each element of the array - # @param fieldname [Symbol] fieldname to use for error messages - # @return [void] - def validate(fieldname=nil) - validate_gfa_field(nil, fieldname) - end - - # @api private - # @tested_in unit_field_array - module API_PRIVATE - - # Run a datatype-specific validation on each element of the array, - # using the specified datatype - # @param datatype [nil, RGFA::Field::TAG_DATATYPE] datatype to use for the - # validation; use +nil+ to use the stored datatype (self.datatype) - # @param fieldname [Symbol] fieldname to use for error messages - # @return [void] - def validate_gfa_field(datatype, fieldname=nil) - datatype ||= @datatype - each {|elem| elem.validate_gfa_field(datatype, fieldname)} - end - - # Default GFA tag datatype - # @return [RGFA::Field::TAG_DATATYPE] - def default_gfa_tag_datatype - @datatype - end - - # String representation of the field array - # @param datatype [RGFA::Field::TAG_DATATYPE] - # (defaults to: +self.datatype+) datatype of the data - # @param fieldname [Symbol] - # (defaults to +nil+) fieldname to use for error messages - # @return [String] tab-separated string representations of the elements - def to_gfa_field(datatype: @datatype, fieldname: nil) - map do |x| - x.to_gfa_field(datatype: datatype, fieldname: fieldname) - end.join("\t") - end - - # String representation of the field array as GFA tags - # @param datatype [RGFA::Field::TAG_DATATYPE] - # (defaults to: +self.datatype+) datatype of the data - # @param fieldname [Symbol] name of the tag - # @return [String] tab-separated GFA tag representations of the elements - def to_gfa_tag(fieldname, datatype: @datatype) - map{|x| x.to_gfa_tag(fieldname, datatype: datatype)}.join("\t") - end - - # Add a value to the array and validate - # @raise [RGFA::InconsistencyError] if the type of the new value does not - # correspond to the type of existing values - # @param value [Object] the value to add - # @param datatype [RGFA::Field::TAG_DATATYPE, nil] the datatype to use; - # if not +nil+, it will be checked that the specified datatype is the - # same as for previous elements of the field array (no further validation - # will be performed); - # if +nil+, the value will be validated, according to the datatype - # specified on field array creation - # @param fieldname [Symbol] the field name to use for error messages - def vpush(value, datatype=nil, fieldname=nil) - if datatype.nil? - value.validate_gfa_field(@datatype, fieldname) - elsif datatype != @datatype - raise RGFA::InconsistencyError, - "Datatype mismatch error for field #{fieldname}:\n"+ - "value: #{value}\n"+ - "existing datatype: #{@datatype};\n"+ - "new datatype: #{datatype}" - end - self << value - end - - end - include API_PRIVATE - -end - -class Array - - # Create a {RGFA::FieldArray} from an array - # @param datatype [RGFA::Field::TAG_DATATYPE] the datatype to use - # @return [RGFA::FieldArray] - # @tested_in api_array - def to_rgfa_field_array(datatype=nil) - if kind_of?(RGFA::FieldArray) - self - elsif datatype.nil? - raise RGFA::ArgumentError, "No datatype specified" - else - RGFA::FieldArray.new(datatype, self) - end - end - -end diff --git a/lib/rgfa/graph_operations.rb b/lib/rgfa/graph_operations.rb deleted file mode 100644 index 68ff161..0000000 --- a/lib/rgfa/graph_operations.rb +++ /dev/null @@ -1,19 +0,0 @@ -# -# Complicated operations on the graph, such as identification and merging of -# linear paths, multiplication of segments, analysis of the connectivity of -# the graph are defined in submodules of this module and included in the RGFA -# class. -# -RGFA::GraphOperations = Module.new - -require_relative "graph_operations/topology.rb" -require_relative "graph_operations/linear_paths.rb" -require_relative "graph_operations/multiplication.rb" -require_relative "graph_operations/rgl.rb" - -module RGFA::GraphOperations - include RGFA::GraphOperations::Topology - include RGFA::GraphOperations::LinearPaths - include RGFA::GraphOperations::Multiplication - include RGFA::GraphOperations::RGL -end diff --git a/lib/rgfa/graph_operations/linear_paths.rb b/lib/rgfa/graph_operations/linear_paths.rb deleted file mode 100644 index 67bf100..0000000 --- a/lib/rgfa/graph_operations/linear_paths.rb +++ /dev/null @@ -1,332 +0,0 @@ -require_relative "../segment_end" -require_relative "./redundant_linear_paths" - -# -# Methods for the RGFA class, which allow to find and merge linear paths. -# -# @tested_in api_linear_paths -# -module RGFA::GraphOperations::LinearPaths - - require "set" - - # - # Find a path without branches. - # - # The path must include +s+ and exclude all segments in the +exclude+ set. Any - # segment used in the returned path will be added to the +exclude+ set. - # - # @param s [String,Symbol,RGFA::Line::Segment::GFA1,RGFA::Line::Segment::GFA2] - # a segment name or instance - # @param exclude [Set] a set of segment names to exclude from the path - # @return [Array] - # - def linear_path(s, exclude = Set.new) - cs = segment(s).connectivity - s = s.to_sym - segpath = RGFA::SegmentEndsPath.new() - [:L, :R].each_with_index do |et, i| - if cs[i] == 1 - exclude << s - segpath.pop - segpath += traverse_linear_path(RGFA::SegmentEnd.new(s, et), exclude) - end - end - return segpath - end - - # Find all unbranched paths in the graph. - # - # @param redundant [Boolean] - # if true, junctions are added to paths ends, also if the same end of the - # junction node has also additional links - # - # @return [Array>] the booleans are added - # if the +redundant+ parameter is set, as first and last element - # (they are set if the first / last segment are junctions shall - # be duplicated when merging paths) - def linear_paths(redundant = false) - exclude = Set.new - junction_exclude = Set.new if redundant - retval = [] - segnames = segment_names - progress_log_init(:linear_paths, "segments", segnames.size, - "Detect linear paths (#{segnames.size} segments)") if @progress - segnames.each do |sn| - progress_log(:linear_paths) if @progress - next if exclude.include?(sn) - lp = linear_path(sn, exclude) - if !redundant - retval << lp if lp.size > 1 - else - if lp.empty? - # add paths from junction to junction neighbours - retval += junction_junction_paths(sn, junction_exclude) - else - # extend linear path to junction neighbours - extend_linear_path_to_junctions(lp) - retval << lp - end - end - end - progress_log_end(:linear_paths) - return retval - end - - # Merge a linear path, i.e. a path of segments without extra-branches - # @!macro [new] merge_lim - # Limitations: all containments und paths involving merged segments are - # deleted. - # - # @param segpath [Array] a linear path, such as that - # retrieved by {#linear_path} - # @!macro [new] merge_options - # @param options [Hash] optional keyword arguments - # @option options [String, :short, nil] :merged_name (nil) - # if nil, the merged_name is automatically computed; if :short, - # a name is computed starting with "merged1" and calling next until - # an available name is founf; if String, the name to use - # @option options [Boolean] :cut_counts (false) - # if true, total count in merged segment m, composed of segments - # s of set S is multiplied by the factor Sum(|s in S|)/|m| - # @option options [Boolean] :redundant (false) - # if true, junctions are output multiple times, merging them with - # all paths which start or end in them; default: they are - # merged only with the path on the side which has one link only, - # if any, and left as separate nodes otherwise - # @option options [symbol] :jntag (:jn) - # specify temporary optional tag used for storing information about - # junctions used by the merged_linear_paths method when deleting - # them; it should be a tag which is otherwise not present in the segments - # - # @return [RGFA] self - # @see #merge_linear_paths - def merge_linear_path(segpath, **options) - return if segpath.size < 2 - if options[:redundant] and ((segpath[0] == true) or (segpath[0] == false)) - first_redundant = segpath.shift - last_redundant = segpath.pop - else - first_redundant, last_redundant = false - end - segpath.map!{|se|se.to_segment_end} - merged, first_reversed, last_reversed = - create_merged_segment(segpath, options) - self << merged - if first_redundant - link_duplicated_first(merged, - segment(segpath.first.to_segment_end.segment), - first_reversed, options[:jntag]) - else - link_merged(merged.name, segpath.first.to_segment_end.invert, - first_reversed) - end - if last_redundant - link_duplicated_last(merged, segment(segpath.last.segment), - last_reversed, options[:jntag]) - else - link_merged(merged.name, segpath.last, last_reversed) - end - idx1 = first_redundant ? 1 : 0 - idx2 = last_redundant ? -2 : -1 - segpath[idx1..idx2].each do |sn_et| - segment(sn_et.segment).disconnect - progress_log(:merge_linear_paths, 0.05) if @progress - end - self - end - - # Merge all linear paths in the graph, i.e. - # paths of segments without extra-branches - # @!macro merge_lim - # @!macro merge_options - # - # @return [RGFA] self - def merge_linear_paths(**options) - paths = linear_paths(options[:redundant]) - psize = paths.flatten.size / 2 - progress_log_init(:merge_linear_paths, "segments", psize, - "Merge #{paths.size} linear paths (#{psize} segments)") if @progress - paths.each do |path| - merge_linear_path(path, **options) - end - progress_log_end(:merge_linear_paths) - remove_junctions(options[:jntag]) if options[:redundant] - self - end - - private - - # Traverse the links, starting from the segment end +segment_end+. - # - # If any segment is found during traversal whose name is included in +exclude+ - # the traversing is interrupted. The +exclude+ set is updated, so that - # circular paths are avoided. The starting segment is not added to the set. - # - def traverse_linear_path(segment_end, exclude) - list = RGFA::SegmentEndsPath.new() - current = segment_end.to_segment_end - current.segment = segment(current.segment) - loop do - after = current.segment.dovetails_of_end(current.end_type) - before = current.segment.dovetails_of_end(current.end_type.invert) - if (before.size == 1 and after.size == 1) or list.empty? - list << [current.name, current.end_type] - exclude << current.name - l = after.first - current = l.other_end(current).invert - break if exclude.include?(current.name) - elsif before.size == 1 - list << [current.name, current.end_type] - exclude << current.name - break - else - break - end - end - return segment_end.end_type == :L ? list.reverse : list - end - - def sum_of_counts(segpath, multfactor = 1) - retval = {} - segs = segpath.map {|sn_et|segment!(sn_et.segment)} - [:KC, :RC, :FC].each do |count_tag| - segs.each do |s| - if s.tagnames.include?(count_tag) - retval[count_tag] ||= 0 - retval[count_tag] += s.get(count_tag) - end - end - if retval[count_tag] - retval[count_tag] = (retval[count_tag] * multfactor).to_i - end - end - return retval - end - - def reverse_segment_name(name, separator) - name.to_s.split(separator).map do |part| - openp = part[0] == "(" - part = part[1..-1] if openp - closep = part[-1] == ")" - part = part[0..-2] if closep - part = (part[-1] == "^") ? part[0..-2] : part+"^" - part += ")" if openp - part = "(#{part}" if closep - part - end.reverse.join(separator) - end - - def reverse_pos_array(pos_array, lastpos) - return nil if pos_array.nil? or lastpos.nil? - pos_array.map {|pos| lastpos - pos + 1}.reverse - end - - def add_segment_to_merged(merged, segment, reversed, cut, init, options) - s = (reversed ? segment.sequence.rc[cut..-1] : segment.sequence[cut..-1]) - if init - merged.sequence = s - merged.name = (options[:merged_name].nil? ? - segment.name : options[:merged_name]) - merged.LN = segment.LN - else - (segment.sequence.placeholder?) ? - (merged.sequence = RGFA::Placeholder.new) : - (merged.sequence += s) - if options[:merged_name].nil? - merged.name = "#{merged.name}_#{segment.name}" - end - if merged.LN - segment.LN ? merged.LN += (segment.LN - cut) - : merged.LN = nil - end - end - end - - def create_merged_segment(segpath, options) - merged = segment!(segpath.first.segment).clone - merged.set(options[:jntag] || :jn, nil) - total_cut = 0 - a = segpath.first - first_reversed = (a.end_type == :L) - last_reversed = nil - if options[:merged_name] == :short - forbidden = (segment_names + path_names) #TODO: update to GFA2 - options[:merged_name] = "merged1" - while forbidden.include?(options[:merged_name]) - options[:merged_name] = options[:merged_name].next - end - end - add_segment_to_merged(merged, segment(a.segment), first_reversed, 0, true, - options) - progress_log(:merge_linear_paths, 0.95) if @progress - (segpath.size-1).times do |i| - b = segpath[i+1].to_segment_end.invert - ls = segment(a.segment).end_relations(a.end_type, b, :dovetails) - if ls.size != 1 - msg = "A single link was expected between #{a} and #{b}, "+ - "#{ls.size} were found" - msg << ":\n#{l.map(&:to_s).join("\n")}" if ls.size > 0 - raise RGFA::ValueError, msg - end - l = ls[0] - if l.overlap == [] - cut = 0 - elsif l.overlap.all?{|op|[:M, :"="].include?(op.code)} - cut = l.overlap.map(&:len).inject(:+) - else - raise RGFA::ValueError, - "Merging is only allowed if all operations are M/=" - end - total_cut += cut - last_reversed = (b.end_type == :R) - add_segment_to_merged(merged, segment(b.segment), last_reversed, cut, - false, options) - a = b.to_segment_end.invert - if @progress - progress_log(:merge_linear_paths, 0.95) - end - end - if !merged.sequence.placeholder? and @version == :gfa1 - if merged.LN.nil? - merged.LN = merged.sequence.length - elsif @vlevel >= 1 and merged.LN != merged.sequence.length - raise RGFA::InconsistencyError, - "Computed sequence length #{merged.sequence.length} "+ - "and computed LN #{merged.LN} differ" - end - end - if merged.LN.nil? - [:KC, :RC, :FC].each {|count_tag| merged.set(count_tag, nil)} - else - sum_of_counts(segpath, (options[:cut_counts] ? - merged.LN.to_f / (total_cut+merged.LN) : 1)). - each do |count_tag, count| - merged.set(count_tag, count) - end - end - return merged, first_reversed, last_reversed - end - - def link_merged(merged_name, segment_end, reversed) - segment(segment_end.segment).dovetails_of_end( - segment_end.end_type).each do |l| - l2 = l.clone - if l2.to == segment_end.segment - l2.to = merged_name - if reversed - l2.to_orient = l2.to_orient.invert - end - else - l2.from = merged_name - if reversed - l2.from_orient = l2.from_orient.invert - end - end - self << l2 - end - end - - include RGFA::GraphOperations::RedundantLinearPaths - -end diff --git a/lib/rgfa/graph_operations/multiplication.rb b/lib/rgfa/graph_operations/multiplication.rb deleted file mode 100644 index 0c5cfb0..0000000 --- a/lib/rgfa/graph_operations/multiplication.rb +++ /dev/null @@ -1,155 +0,0 @@ -require_relative "../error.rb" - -# -# Method for the RGFA class, which allow to split a segment into -# multiple copies. -# -# @tested_in api_multiplication -# -module RGFA::GraphOperations::Multiplication - - # Create multiple copies of a segment. - # - # == Automatic computation of the copy names - # - # - Can be overridden, by providing an array of copy names. - # - First, it is checked if the name of the original segment ends with a - # relevant - # string, i.e. a lower case letter (for +:lowcase+), an upper case letter - # (for +:upcase+), a digit (for +:number+), or the string +"_copy"+ - # plus one or more optional digits (for +:copy+). - # - If so, it is assumed, it was already a copy, and it is not - # altered. - # - If not, then +a+ (for +:lowcase+), +A+ (for +:upcase+), +1+ (for - # +:number+), +_copy+ (for +:copy+) is appended to the string. - # - Then, in all - # cases, next (*) is called on the string, until a valid, non-existant name - # is found for each of the segment copies - # - (*) = except for +:copy+, where - # for the first copy no digit is present, but for the following is, - # i.e. the segment names will be +:copy+, +:copy2+, +:copy3+, etc. - # - # @param [Integer] factor multiplication factor; if 0, delete the segment; - # if 1; do nothing; if > 1; number of copies to create - # @param segment [String, RGFA::Line::Segment] segment name or instance - # @param [:lowcase, :upcase, :number, :copy, Array] copy_names - # (Defaults to: +:lowcase+) - # Array of names for the copies of the segment, - # or a symbol, which defines a system to compute the names from the name of - # the original segment. See "automatic computation of the copy names". - # @param [Boolean] conserve_components (Defaults to: +true+) - # If factor == 0 (i.e. deletion), delete segment only if - # {#cut_segment?}(segment) is +false+. - # - # @return [RGFA] self - def multiply(segment, factor, copy_names: :asterisk, - conserve_components: true) - segment_name = segment.kind_of?(RGFA::Line) ? segment.name : segment - if factor < 2 - return self if factor == 1 - return self if cut_segment?(segment_name) and conserve_components - return rm(segment_name) - end - s = segment!(segment_name) - divide_segment_and_connection_counts(s, factor) - copy_names = compute_copy_names(copy_names, segment_name, factor) - copy_names.each {|cn| clone_segment_and_connections(s, cn)} - return self - end - - private - - def compute_copy_names(copy_names, segment_name, factor) - return nil if factor < 2 - accepted = [:lowcase, :upcase, :number, :copy, :asterisk] - if copy_names.kind_of?(Array) - return copy_names - elsif !accepted.include?(copy_names) - raise RGFA::ArgumentError, - "copy_names shall be an array of names or one of: "+ - accepted.inspect - end - retval = [] - next_name = segment_name.to_s - case copy_names - when :asterisk - if next_name =~ /^(.)\*\d+$/ - next_name = next_name.next - else - next_name += "*2" - end - when :lowcase - if next_name =~ /^.*[a-z]$/ - next_name = next_name.next - else - next_name += "b" - end - when :upcase - if next_name =~ /^.*[A-Z]$/ - next_name = next_name.next - else - next_name += "B" - end - when :number - if next_name =~ /^.*[0-9]$/ - next_name = next_name.next - else - next_name += "2" - end - when :copy - if next_name =~ /^.*_copy(\d*)$/ - next_name += "1" if $1 == "" - next_name = next_name.next - copy_names = :number - else - next_name += "_copy" - end - end - while retval.size < (factor-1) - while retval.include?(next_name) or - line(next_name.to_sym) - if copy_names == :copy - next_name += "1" - copy_names = :number - end - next_name = next_name.next - end - retval << next_name - end - return retval - end - - def divide_counts(gfa_line, factor) - [:KC, :RC, :FC].each do |count_tag| - if gfa_line.tagnames.include?(count_tag) - value = (gfa_line.get(count_tag).to_f / factor) - gfa_line.set(count_tag, value.to_i) - end - end - end - - def divide_segment_and_connection_counts(segment, factor) - divide_counts(segment, factor) - processed_circulars = Set.new - (segment.dovetails + segment.containments).each do |l| - # circular link counts shall be divided only ones - if !l.circular? or !processed_circular.include?(l) - divide_counts(l, factor) - processed_circulars << l if l.circular? - end - end - end - - def clone_segment_and_connections(segment, clone_name) - cpy = segment.clone - cpy.name = clone_name - cpy.connect(self) - (segment.dovetails + segment.containments).each do |l| - lc = l.clone - lc.from = clone_name if lc.from == segment.name - lc.to = clone_name if lc.to == segment.name - lc.connect(self) - end - end - -end diff --git a/lib/rgfa/graph_operations/redundant_linear_paths.rb b/lib/rgfa/graph_operations/redundant_linear_paths.rb deleted file mode 100644 index eafbbdf..0000000 --- a/lib/rgfa/graph_operations/redundant_linear_paths.rb +++ /dev/null @@ -1,148 +0,0 @@ -# -# Methods for the RGFA class, which allow to add redundant junction -# sequences to merged linear paths. -# -# @tested_in XXX -# -module RGFA::GraphOperations::RedundantLinearPaths - - private - - def junction_junction_paths(sn, exclude) - retval = [] - exclude << sn - s = segment(sn) - s.dovetails_of_end(:L).each do |dL| - eL = dL.other_end([s, :L]) - next if exclude.include?(eL.name) or - eL.segment.dovetails_of_end(eL.end_type).size == 1 - retval << [true, eL, [s, :R], true] - end - s.dovetails_of_end(:R).each do |dR| - eR = dR.other_end([s, :R]) - next if exclude.include?(eR.name) or - eR.segment.dovetails_of_end(eR.end_type).size == 1 - retval << [true, [s, :R], eR.invert, true] - end - return retval - end - - def extend_linear_path_to_junctions(segpath) - segpath[0] = segpath[0].to_segment_end - segfirst = segment(segpath[0].segment) - segfirst_d = segfirst.dovetails_of_end(segpath[0].end_type.invert) - redundant_first = (segfirst_d.size > 0) - if segfirst_d.size == 1 - segpath.unshift(segfirst_d[0].other_end(segpath[0].invert)) - end - segpath.unshift(redundant_first) - segpath[-1] = segpath[-1].to_segment_end - seglast = segment(segpath[-1].segment) - seglast_d = seglast.dovetails_of_end(segpath[-1].end_type) - redundant_last = (seglast_d.size > 0) - if seglast_d.size == 1 - segpath << seglast_d[0].other_end(segpath[-1]).invert - end - segpath << redundant_last - end - - def link_duplicated_first(merged, first, reversed, jntag) - # annotate junction - jntag ||= :jn - if !first.get(jntag) - first.set(jntag, {"L" => [], "R" => []}) - end - if reversed - first.get(jntag)["L"] << [merged.name, "-"] - else - first.get(jntag)["R"] << [merged.name, "+"] - end - # create temporary link - len = first.sequence.size - if version == :gfa1 - self << RGFA::Line::Edge::Link.new([first.name.to_s, - reversed ? "-" : "+", - merged.name.to_s,"+", - "#{len}M", "co:Z:temporary"]) - elsif version == :gfa2 - self << RGFA::Line::Edge::GFA2.new(["*",first.name.to_s+ - (reversed ? '-' : '+'), - "#{merged.name}+", - # note: s1 coords are on purpose fake - reversed ? "0" : "#{len-1}", - reversed ? "1" : "#{len}$", - "0", len.to_s, - "#{len}M", "co:Z:temporary"]) - else - raise RGFA::AssertionError - end - end - - def link_duplicated_last(merged, last, reversed, jntag) - # annotate junction - jntag ||= :jn - if !last.get(jntag) - last.set(jntag, {"L" => [], "R" => []}) - end - if reversed - last.get(jntag)["R"] << [merged.name, "-"] - else - last.get(jntag)["L"] << [merged.name, "+"] - end - # create temporary link - len = last.sequence.size - if version == :gfa1 - self << RGFA::Line::Edge::Link.new([merged.name.to_s, "+", - last.name.to_s, - reversed ? "-" : "+", - "#{len}M", "co:Z:temporary"]) - elsif version == :gfa2 - mlen = merged.sequence.size - self << RGFA::Line::Edge::GFA2.new(["*", "#{merged.name}+", - last.name.to_s+ - (reversed ? '-' : '+'), - (mlen - len).to_s, "#{mlen}$", - # note: s2 coords are on purpose fake - reversed ? "#{len - 1}" : "0", - reversed ? "#{len}$" : "1", - "#{len}M", "co:Z:temporary"]) - else - raise RGFA::AssertionError - end - end - - def remove_junctions(jntag) - jntag ||= :jn - segments.each do |s| - jndata = s.get(jntag) - if jndata - len = s.sequence.size - jndata["L"].each do |m1, dir1| - jndata["R"].each do |m2, dir2| - if version == :gfa1 - self << RGFA::Line::Edge::Link.new([m1.to_s, dir1.to_s, - m2.to_s, dir2.to_s, - "#{len}M"]) - elsif version == :gfa2 - m1len = segment(m1).sequence.size - m2len = segment(m2).sequence.size - r1 = dir1.to_sym == :- - r2 = dir2.to_sym == :- - self << RGFA::Line::Edge::GFA2.new(["*", "#{m1}#{dir1}", - "#{m2}#{dir2}", - r1 ? "0" : "#{m1len - len}", - r1 ? "#{len}" : "#{m1len}$", - r2 ? "#{m2len - len}" : "0", - r2 ? "#{m2len}$" : "#{len}", - "#{len}M"]) - else - raise RGFA::AssertionError - end - end - end - s.disconnect - end - end - end - -end diff --git a/lib/rgfa/graph_operations/rgl.rb b/lib/rgfa/graph_operations/rgl.rb deleted file mode 100644 index 4823ac0..0000000 --- a/lib/rgfa/graph_operations/rgl.rb +++ /dev/null @@ -1,187 +0,0 @@ -begin - require "rgl/adjacency" - require "rgl/implicit" - require_relative "error" - - # - # Conversion to RGL graphs - # - # @tested_in XXX - # - module RGFA::GraphOperations::RGL - - # Creates an RGL graph. - # - # @param oriented [Boolean] (defaults to: +true+) may the graph - # contain links of segments in different orientation? - # @return [RGL::ImplicitGraph] an rgl implicit directed graph - def to_rgl(oriented: true) - if oriented - to_rgl_oriented - else - to_rgl_unoriented - end - end - - # Creates an RGL graph, including links orientations. - # - # @return [RGL::ImplicitGraph] an rgl implicit directed graph; - # where vertices are RGFA::OrientedLine instances - def to_rgl_oriented - RGL::ImplicitGraph.new do |g| - g.vertex_iterator do |block| - self.each_segment do |segment| - [:+, :-].each do |orient| - block.call(OL[segment, orient]) - end - end - end - g.adjacent_iterator do |oriented_segment, block| - s = segment(oriented_segment.line) - s.dovetails.each do |l| - if l.from == s and l.from_orient == oriented_segment.orient - os = OL[segment(l.to), l.to_orient] - block.call(os) - end - if l.to == s and l.to_orient == oriented_segment.orient.invert - os = OL[segment(l.from), l.from_orient] - block.call(os.invert) - end - end - end - g.directed = true - end - end - - # Creates an RGL graph, assuming that all links orientations - # are "+". - # - # @raise [RGFA::ValueError] if the graph contains any link where - # from_orient or to_orient is :- - # @return [RGL::ImplicitGraph] an rgl implicit directed graph; - # where vertices are RGFA::Segment::GFA1 objects - def to_rgl_unoriented - RGL::ImplicitGraph.new do |g| - g.vertex_iterator {|block| self.each_segment {|s| block.call(s)}} - g.adjacent_iterator do |s, bl| - s = segment(s) - s.dovetails.each do |l| - if l.from_orient == :- or to_orient == :- - raise RGFA::ValueError, - "Graph contains links with segments in reverse orientations" - end - bl.call(segment(l.to)) if (l.from == s) - end - end - g.directed = true - end - end - - def self.included(base) - base.extend(ClassMethods) - end - - module ClassMethods - - # @param g [RGL::ImplicitGraph, RGL::DirectedAdjacencyGraph] an RGL graph. - # - # @!macro[new] from_rgl - # Accepted vertex formats: - # - # - RGFA::OrientedLine, - # where the line element is a segment specifier (see below) - # - segment specifier alone: the orientation is assumed to be :+ - # - # The segment specifier can be: - # - RGFA::Segment::GFA1 instance - # - String, segment representation (e.g. "S\tsegment\t*") - # - String, valid segment name (e.g. "segment") - # - # @raise [RGFA::FormatError] if the graph cannot be - # converted - # - # @return [RGFA] a new RGFA instance - def from_rgl(g) - gfa = RGFA.new - if not (g.respond_to?(:each_vertex) and - g.respond_to?(:each_edge)) - raise RGFA::TypeError, - "#{g} is not a valid RGL graph" - end - if not g.directed? - raise RGFA::FormatError, - "#{g} is not a directed graph" - end - g.each_vertex {|v| add_segment_if_new(gfa, v)} - g.each_edge do |s, t| - gfa << RGFA::Line::Edge::Link.new(segment_name_and_orient(s) + - segment_name_and_orient(t) + - ["*"]) - end - gfa - end - - private - - def add_segment_if_new(gfa, v) - # GFA::GraphVertex - v = v.segment if v.respond_to?(:segment) - # RGFA::OrientedLine - v = v.line if v.respond_to?(:line) - if v.kind_of?(Symbol) - # segment name as symbol - return if gfa.segment_names.include?(v) - v = RGFA::Line::Segment::GFA1.new([v.to_s, "*"]) - elsif v.kind_of?(String) - a = v.split("\t") - if a[0] == "S" - # string representation of segment - return if gfa.segment_names.include?(a[1].to_sym) - v = RGFA::Line::Segment::GFA1.new(a[1..-1]) - else - # segment name as string - return if gfa.segment_names.include?(v.to_sym) - v = RGFA::Line::Segment::GFA1.new([v, "*"]) - end - end - return if gfa.segment_names.include?(v.name) - gfa << v - end - - def segment_name_and_orient(s) - # default orientation - o = s.respond_to?(:orient) ? s.orient.to_s : "+" - # RGFA::Line::Segment (also embedded in RGFA::OrientedLine) - if s.respond_to?(:name) - s = s.name.to_s - elsif s.respond_to?(:segment) - # GFA::GraphVertex - s = s.segment.to_s - elsif s.respond_to?(:split) - a = s.split("\t") - s = a[1] if a[0] == "S" - else - s = s.to_s - end - return s, o - end - - end - - end - - module RGL::Graph - - # @!macro from_rgl - def to_rgfa - RGFA.from_rgl(self) - end - - end - -rescue LoadError - - module RGFA::GraphOperations::RGL - end - -end diff --git a/lib/rgfa/graph_operations/topology.rb b/lib/rgfa/graph_operations/topology.rb deleted file mode 100644 index da79431..0000000 --- a/lib/rgfa/graph_operations/topology.rb +++ /dev/null @@ -1,257 +0,0 @@ -# -# Methods which analyse the topology of the graph. -# -# @tested_in XXX -# -module RGFA::GraphOperations::Topology - - require "set" - - # Does the removal of the link alone divide a component - # of the graph into two? - # @return [Boolean] - # @param link [RGFA::Line::Edge::Link] a link - def cut_link?(link) - return false if link.circular? - return true if link.from.dovetails_of_end(link.from_end.end_type.invert).size == 0 - return true if link.to.dovetails_of_end(link.to_end.end_type.invert).size == 0 - c = {} - [:from, :to].each do |et| - c[et] = Set.new - visited = Set.new - segend = link.send(:"#{et}_end") - visited << segend.name - visited << link.other_end(segend).name - traverse_component(segend, c[et], visited) - end - return c[:from] != c[:to] - end - - # Does the removal of the segment and its links divide a - # component of the graph into two? - # @param segment - # [Symbol, String, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # a segment name or instance - # @return [Boolean] - def cut_segment?(segment) - segment_name = segment.kind_of?(RGFA::Line) ? segment.name : segment - segment = segment!(segment) - return false if [[0,0],[0,1],[1,0]].include?(segment.connectivity) - start_points = [] - [:L, :R].each do |et| - start_points += segment.dovetails_of_end(et).map do |l| - l.other_end([segment_name, et]).invert - end - end - cc = [] - start_points.uniq.each do |start_point| - cc << Set.new - visited = Set.new - visited << segment_name - traverse_component(start_point, cc.last, visited) - end - return cc.any?{|c|c != cc[0]} - end - - # Find the connected component of the graph in which a segment is included - # @return [Array] - # array of segment names - # @param segment - # [Symbol, String, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # a segment name or instance - # @param visited [Set] a set of segments to ignore during graph - # traversal; all segments in the found component will be added to it - def segment_connected_component(segment, visited = Set.new) - sn = segment.kind_of?(RGFA::Line) ? segment.name : segment - visited << sn - c = [sn] - [:L, :R].each {|e| traverse_component([sn, e], c, visited)} - return c - end - - # Find the connected components of the graph - # @return [Array>] - # array of components, each an array of segment names - def connected_components - components = [] - visited = Set.new - segment_names.each do |sn| - next if visited.include?(sn) - components << segment_connected_component(sn, visited) - end - return components - end - - # Split connected components of the graph into single-component RGFAs - # @return [Array] - def split_connected_components - retval = [] - ccs = connected_components - ccs.each do |cc| - gfa2 = self.clone - gfa2.rm(gfa2.segment_names - cc) - retval << gfa2 - end - return retval - end - - # Counts the dead ends. - # - # Dead ends are here defined as segment ends without dovetail edges. - # - # @return [Integer] number of dead ends in the graph - def n_dead_ends - segments.inject(0) do |n,s| - [:L, :R].each {|e| n+= 1 if s.dovetails_of_end(e).empty?} - n - end - end - - # Number of dovetail edges in the graph - # - # @return [Integer] number of links (GFA1) or E-lines representing a - # dovetail alignment (GFA2) - def n_dovetails - segments.inject(0) do |n,s| - [:L, :R].each {|e| n += s.dovetails_of_end(e).size} - n - end - return n / 2 - end - - # Number of internal edges in the graph - # - # @return [Integer] number of edges representing non-dovetail/non-containment - # alignments - def n_internals - segments.inject(0) do |n,s| - n += s.internals.size - n - end - return n / 2 - end - - # Number of containments in the graph - # - # @return [Integer] number of containments (GFA1) or E-lines - # representing containments (GFA2) - def n_containments - segments.inject(0) do |n,s| - n += s.edges_to_contained.size - n += s.edges_to_containers.size - n - end - return n / 2 - end - - # Output basic statistics about the graph's sequence and topology - # information. - # - # @param [boolean] short compact output as a single text line - # - # Compact output has the following keys: - # - +ns+: number of segments - # - +nd+: number of dovetail overlaps - # - +cc+: number of connected components - # - +de+: number of dead ends - # - +tl+: total length of segment sequences - # - +50+: N50 segment sequence length - # - # Normal output outputs a table with the same information, plus some - # additional one: the length of the largest component, as well as the shortest - # and largest and 1st/2nd/3rd quartiles of segment sequence length. - # - # @return [String] sequence and topology information collected from the graph. - # - def info(short = false) - q, n50, tlen = lenstats - nde = n_dead_ends() - ndv = n_dovetails_of_end() - cc = connected_components() - retval = [] - if short - retval << "ns=#{segments.size}" - retval << "nd=#{ndv}" - retval << "cc=#{cc.size}" - retval << "de=#{nde}" - retval << "tl=#{tlen}" - retval << "50=#{n50}" - spacer = "\t" - else - nin = n_internals() - ncn = n_containments() - pde = "%.2f%%" % ((nde.to_f*100) / (segments.size*2)) - ndv_s = "%.2f%%" % ((ndv.to_f) / (segments.size)) - nin_s = "%.2f%%" % ((nin.to_f) / (segments.size)) - ncn_s = "%.2f%%" % ((ncn.to_f) / (segments.size)) - gap_s = "%.2f%%" % ((gaps.size.to_f) / (segments.size)) - frg_s = "%.2f%%" % ((fragments.size.to_f) / (segments.size)) - retval << "== Specification version" - retval << "GFA version: #{version}" - retval << "" - retval << "== Header" - retval << "Version tag in header: #{header.VN ? header.VN : 'n.a.'}" - retval << "TS tag in header: #{header.TS ? header.TS : 'n.a.'}" - retval << "Number of tags in header: #{headers.size}" - retval << "Duplicated tags in header: #{header.n_duptags}" - retval << "" - retval << "== Graph elements" - retval << "Segment count: #{segments.size}" - retval << "Edges count: #{ndv + nin + ncn}" - retval << "- dovetails: #{ndv}" - retval << "- containments: #{ncn}" - retval << "- other (internal): #{nin}" - retval << "Gaps count: #{gaps.size}" - retval << "Fragments count: #{fragments.size}" - retval << "" - retval << "== Groups" - retval << "Paths count: #{paths.size}" - retval << "Sets count: #{sets.size}" - retval << "" - retval << "== Other GFA lines" - retval << "Comment lines: #{comments.size}" - retval << "Custom-type records: #{custom_records.size}" - retval << "" - retval << "== Segments connectivity" - retval << "Dovetails/segment: #{ndv_s}" - retval << "Segment dead ends (no dov.): #{nde}" - retval << "Segment ends, % dead: #{pde}" - retval << "Internal edges/segment: #{nin_s}" - retval << "Containments/segment: #{ncn_s}" - retval << "Gaps/segment: #{gap_s}" - retval << "Fragments/segment: #{frg_s}" - retval << "" - retval << "== Graph components (dovetails connections)" - retval << "Connected components: #{cc.size}" - cc.map!{|c|c.map{|sn|segment!(sn).length!}.inject(:+)} - retval << "Largest component (bp): #{cc.last}" - retval << "" - retval << "== Segments sequence statistics" - retval << "Total segments length (bp): #{tlen}" - retval << "N50 (bp): #{n50}" - retval << "Shortest segment (bp): #{q[0]}" - retval << "Lower quartile segment (bp): #{q[1]}" - retval << "Median segment (bp): #{q[2]}" - retval << "Upper quartile segment (bp): #{q[3]}" - retval << "Longest segment (bp): #{q[4]}" - spacer = "\n" - end - return retval.join(spacer) - end - - private - - def traverse_component(segment_end, c, visited) - segment_end = segment_end.to_segment_end - s = segment(segment_end.segment) - s.dovetails_of_end(segment_end.end_type).each do |l| - oe = l.other_end(segment_end) - sn = oe.name - next if visited.include?(sn) - visited << sn - c << sn - [:L, :R].each {|e| traverse_component([sn, e], c, visited)} - end - end - -end diff --git a/lib/rgfa/lastpos.rb b/lib/rgfa/lastpos.rb deleted file mode 100644 index c36f9bd..0000000 --- a/lib/rgfa/lastpos.rb +++ /dev/null @@ -1,136 +0,0 @@ -# A positive integer, with the additional marking (represented -# by a postfix $), which denotes the last position of a sequence. -# -# @tested_in api_positions -# -class RGFA::LastPos - - # Create a new LastPos. - # @see Integer#to_lastpos - # @see String#to_pos - def initialize(value) - @value = value - end - - attr_accessor :value - alias_method :to_i, :value - - # Validate a LastPos instance - # @return nil - def validate - if !value.kind_of?(Integer) - raise RGFA::TypeError, - "LastPos value shall be an integer, #{value.class} found" - elsif value < 0 - raise RGFA::ValueError, - "LastPos value shall be >= 0, #{value} found" - end - end - - # String representation, value with a dollar suffix - # @return [String] - def to_s - "#@value$" - end - - # A lastpos is equal to a lastpos or integer with the same value - # @return [Boolean] - # @param other [Integer,RGFA::LastPos] the value to compare. - def ==(other) - @value == other.value - end - - # Redefines respond_to? to consider methods delegated to the value. - def respond_to?(m, include_all=false) - super || @value.respond_to?(m, include_all) - end - - # Returns true - # @return [true] - def last? - true - end - - # Compatibility with Integer#first? - # @return [false] - def first? - false - end - - # Subtract other from the lastpos - # @return [Integer,RGFA::LastPos] a lastpos if other is 0, otherwise an - # integer - def -(other) - other == 0 ? self.clone : self.value - other.to_i - end - - private - - # Delegate methods to the value - def method_missing(meth, *args, &block) - @value.send meth, *args, &block - end - -end - -class String - - # Parse the string representation of a GFA2 position field - # @return [Integer,RGFA::LastPos] if the string ends with a dollar, - # then RGFA::LastPos, otherwise an integer. - # @param valid [Boolean] defaults to: +false+ is the string - # guaranteed to be a valid position value? - def to_pos(valid: false) - if self[-1] == "$" - last = true - s = self[0..-2] - else - last = false - s = self - end - begin - value = Integer(s) - rescue - raise RGFA::FormatError, - "Wrong value for position: #{self}" - end - if !valid and value < 0 - raise RGFA::ValueError, - "Negative position value (#{self})" - end - return last ? RGFA::LastPos.new(value) : value - end - -end - -class Integer - - # Compatibility with RGFA::LastPos#value - # @return [self] - def value - self - end - - # Compatibility with RGFA::LastPos#value - # @return [false] - def last? - false - end - - # Return true if zero - # @return [Boolean] - def first? - self == 0 - end - - # Convert to a RGFA::LastPos instance - # @return [RGFA::LastPos] - def to_lastpos(valid: false) - if !valid and self < 0 - raise RGFA::ValueError, - "Negative position value (#{self})" - end - RGFA::LastPos.new(self) - end - -end diff --git a/lib/rgfa/line.rb b/lib/rgfa/line.rb deleted file mode 100644 index 4170f41..0000000 --- a/lib/rgfa/line.rb +++ /dev/null @@ -1,58 +0,0 @@ -require "set" - -# -# Generic representation of a record of a RGFA file. -# -# @!macro [new] rgfa_line -# @note -# This class is usually not meant to be directly initialized by the user; -# initialize instead one of its child classes, which define the concrete -# different record types. -# -class RGFA::Line; end - -# submodules of RGFA::Line::Common define methods which are included -# in line or in its subclasses -RGFA::Line::Common = Module.new -require_relative "line/common/init" -require_relative "line/common/dynamic_fields" -require_relative "line/common/writer" -require_relative "line/common/version_conversion" -require_relative "line/common/field_datatype" -require_relative "line/common/field_data" -require_relative "line/common/equivalence" -require_relative "line/common/cloning" -require_relative "line/common/connection" -require_relative "line/common/virtual_to_real" -require_relative "line/common/update_references" -require_relative "line/common/disconnection" -require_relative "line/common/validate" - -class RGFA::Line - include RGFA::Line::Common::Init - include RGFA::Line::Common::DynamicFields - include RGFA::Line::Common::Writer - include RGFA::Line::Common::VersionConversion - include RGFA::Line::Common::FieldDatatype - include RGFA::Line::Common::FieldData - include RGFA::Line::Common::Equivalence - include RGFA::Line::Common::Cloning - include RGFA::Line::Common::Connection - include RGFA::Line::Common::VirtualToReal - include RGFA::Line::Common::UpdateReferences - include RGFA::Line::Common::Disconnection - include RGFA::Line::Common::Validate -end - -# -# Require the child classes -# -require_relative "line/header.rb" -require_relative "line/segment.rb" -require_relative "line/comment.rb" -require_relative "line/custom_record.rb" -require_relative "line/gap.rb" -require_relative "line/fragment.rb" -require_relative "line/edge.rb" -require_relative "line/group.rb" -require_relative "line/unknown.rb" diff --git a/lib/rgfa/line/comment.rb b/lib/rgfa/line/comment.rb deleted file mode 100644 index ac124af..0000000 --- a/lib/rgfa/line/comment.rb +++ /dev/null @@ -1,103 +0,0 @@ -# A comment line of a RGFA file -# -# The content of the comment line, excluding the initial +#+ and eventual -# initial spacing characters, is included in the field +content+. -# -# The initial spacing characters can be read/changed using the +spacer+ -# field. The default value is a single space. -# -# Tags are not supported by comment lines. If the line contains tags, -# these are nor parsed, but included in the +content+ field. -# Trying to set or get tag values raises exceptions. -# -# @example Direct instantiation -# l = RGFA::Line::Comment.new(["hallo"]) -# l.to_s # => "# hallo" -# # second field is the spacer (default: single space) -# l = RGFA::Line::Comment.new(["hallo", "\t"]) -# l.to_s # => "#\thallo" -# -# @example Validation -# # Content shall not contain newlines: -# RGFA::Line::Comment.new(["hallo\nhallo"]) # raises RGFA::FormatError -# # Spacer shall not contain newlines: -# RGFA::Line::Comment.new(["hallo", "\n"]) # raises RGFA::FormatError -# # Validations can be turned off: -# RGFA::Line::Comment.new(["hallo", "\n"], vlevel: 0) # nothing raised -# # No validations on content setting by default -# l = RGFA::Line::Comment.new(["hallo"]) -# l.content = "hallo\n" # nothing raised -# l.to_s # raises RGFA::FormatError -# # Validations on content setting can be turned on -# l = RGFA::Line::Comment.new(["hallo"], vlevel: 3) -# l.content = "hallo\n" # raises RGFA::FormatError -# -# @example Non-spacing characters in spacer -# l = RGFA::Line::Comment.new(["hallo", ": "]) -# # non-spacing chars will not be recognized -# # when converting the string representation back to a Line object -# l.to_s.to_rgfa_line.content # => ": hallo" -# # however, it works when converting back from array representation -# l.to_a.to_rgfa_line.content # => "hallo" -# # or if the spacer does not contain non-spacing chars -# l.spacer = " " -# l.to_s.to_rgfa_line.content # => "hallo" -# -# @example From string -# l = "# hallo".to_rgfa_line -# l.content # => "hallo" -# l.spacer # => " " -# # initializing from string, only spacing characters are recognized as spacer -# l = "#: hallo".to_rgfa_line -# l.content # => ": hallo" -# l.spacer # => "" -# -# @example To string -# l = "# hallo".to_rgfa_line -# l.to_s # => "# hallo" -# l.spacer = "" -# l.to_s # => "#hallo" -# l = "# hallo".to_rgfa_line(vlevel: 2) -# l.spacer = "\n" # raises RGFA::FormatError as validation >= 2 -# # XXX check validation levels here -# -# @example Comment lines have no tags -# RGFA::Line::Comment.new(["hallo", " ", "zz:Z:hallo"]) -# # => raises RGFA::ValueError -# l = "# hallo zz:Z:hallo".to_rgfa_line -# l.content # => "hallo zz:Z:hallo" -# l.zz # => raises NoMethodError -# l.zz = 1 # raises NoMethodError -# l.set(:zz, 1) # raises RGFA::RuntimeError -# l.get(:zz) # returns nil -# -# @tested_in api_comments -class RGFA::Line::Comment < RGFA::Line - - RECORD_TYPE = :"#" - POSFIELDS = [:content, :spacer] - PREDEFINED_TAGS = [] - DATATYPE = { - :content => :comment, - :spacer => :comment, - } - NAME_FIELD = nil - STORAGE_KEY = nil - FIELD_ALIAS = {} - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions -end - -require_relative "comment/init.rb" -require_relative "comment/tags.rb" -require_relative "comment/writer.rb" - -class RGFA::Line::Comment - include RGFA::Line::Comment::Init - include RGFA::Line::Comment::Tags - include RGFA::Line::Comment::Writer -end diff --git a/lib/rgfa/line/comment/init.rb b/lib/rgfa/line/comment/init.rb deleted file mode 100644 index 3e0a856..0000000 --- a/lib/rgfa/line/comment/init.rb +++ /dev/null @@ -1,21 +0,0 @@ -# Allows to create comments by using a default single-space spacer -# or by adding a spacer as second string in the array passed to -# RGFA::Line::Comment.new -module RGFA::Line::Comment::Init - - private - - def initialize_positional_fields(strings) - init_field_value(:content, :comment, strings[0], errmsginfo: strings) - sp = strings.size > 1 ? strings[1] : " " - init_field_value(:spacer, :comment, sp, errmsginfo: strings) - end - - def initialize_tags(strings) - if strings.size > 2 - raise RGFA::ValueError, - "Comment lines do not support tags" - end - end - -end diff --git a/lib/rgfa/line/comment/tags.rb b/lib/rgfa/line/comment/tags.rb deleted file mode 100644 index 7f53a89..0000000 --- a/lib/rgfa/line/comment/tags.rb +++ /dev/null @@ -1,26 +0,0 @@ -# Comment lines do not support tags. -# @tested_in api_comments -module RGFA::Line::Comment::Tags - - # Set the value of the comment or sp field - # - # @param fieldname [:comment, :sp] the name of the field to set - # @raise [RGFA::RuntimeError] if +fieldname+ is not one of the above - # @return [Object] +value+ - def set(fieldname, value) - if [:comment, :sp].include?(fieldname.to_sym) - super - else - raise RGFA::RuntimeError, - "Tags of comment lines cannot be set" - end - end - - private - - def method_missing(m, *args, &block) - raise NoMethodError, - "undefined method `#{m}' for #{self.inspect}" - end - -end diff --git a/lib/rgfa/line/comment/writer.rb b/lib/rgfa/line/comment/writer.rb deleted file mode 100644 index 82654ea..0000000 --- a/lib/rgfa/line/comment/writer.rb +++ /dev/null @@ -1,23 +0,0 @@ -# Output the spacer before the content -# @tested_in api_comments -module RGFA::Line::Comment::Writer - - # @return [String] a string representation of self - def to_s - "##{spacer}#{content}" - end - - alias_method :to_gfa1_s, :to_s - alias_method :to_gfa2_s, :to_s - - # @api private - module API_PRIVATE - - def to_a - ["#", content, spacer] - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/line/common/cloning.rb b/lib/rgfa/line/common/cloning.rb deleted file mode 100644 index c8eeac5..0000000 --- a/lib/rgfa/line/common/cloning.rb +++ /dev/null @@ -1,35 +0,0 @@ -module RGFA::Line::Common::Cloning - - # Clone a RGFA::Line instance. - # - # The copy will be disconnected, ie do not belong to the RGFA and do not - # contain cross-references to other lines. This allows to edit the line (eg. - # changing the unique ID) before adding it. To achieve this, all reference - # fields are copied in their string representation. All other fields are - # copied as they are, and a deep copy is done for arrays, strings and JSON - # fields. - # - # @tested_in unit_line_cloning - # - # @return [RGFA::Line] deep disconnected copy of the line instance - def clone - data_cpy = {} - @data.each_pair do |k, v| - if self.class::REFERENCE_FIELDS.include?(k) - data_cpy[k] = field_to_s(k).clone - elsif field_datatype(k) == :J - data_cpy[k] = JSON.parse(v.to_json) - elsif v.kind_of?(Array) or v.kind_of?(String) - data_cpy[k] = v.clone - else - data_cpy[k] = v - end - end - cpy = self.class.new(data_cpy, vlevel: @vlevel, virtual: @virtual, - version: @version) - cpy.instance_variable_set("@datatype", @datatype.clone) - # @refs and @rgfa are not set, so that the cpy is disconnected - return cpy - end - -end diff --git a/lib/rgfa/line/common/connection.rb b/lib/rgfa/line/common/connection.rb deleted file mode 100644 index ad2efe1..0000000 --- a/lib/rgfa/line/common/connection.rb +++ /dev/null @@ -1,77 +0,0 @@ -# -# In a connected line, some of the fields are converted into references or -# array of references to other lines. Furthermore instance variables are -# populated with backreferences to the line (e.g. connection of a segment are -# stored as references in segment arrays), to allow graph traversal. -# -# @tested_in unit_line_connection -# -module RGFA::Line::Common::Connection - - # @return [Boolean] is the line connected to other lines of a RGFA instance? - def connected? - !@rgfa.nil? - end - - attr_reader :rgfa - - # Connect the line to a RGFA instance - # @param rgfa [RGFA] the RGFA instance - # @return [void] - def connect(rgfa) - if connected? - raise RGFA::RuntimeError, - "Line #{self} is already connected to a RGFA instance" - end - previous = rgfa.search_duplicate(self) - if !previous.nil? - if previous.virtual? - return substitute_virtual_line(previous) - else - return process_not_unique(previous) - end - else - @rgfa = rgfa - initialize_references - @rgfa.register_line(self) - return nil - end - end - - # @api private - module API_PRIVATE - - def add_reference(line, key, append: true) - @refs ||= {} - @refs[key] ||= [] - @refs[key].send(append ? :push : :unshift, line) - end - - end - include API_PRIVATE - - protected - - def refs - @refs ||= {} - end - - private - - # @note SUBCLASSES with reference fields shall - # overwrite this method to connect their reference - # fields - def initialize_references - end - - # @note SUBCLASSES may overwrite this method - # if some kind of non unique lines shall be - # tolerated or handled differently (eg complement links) - def process_not_unique(previous) - raise RGFA::NotUniqueError, - "Line: #{self.to_s}\n"+ - "Line or ID not unique\n"+ - "Matching previous line: #{previous.to_s}" - end - -end diff --git a/lib/rgfa/line/common/disconnection.rb b/lib/rgfa/line/common/disconnection.rb deleted file mode 100644 index 84fed5a..0000000 --- a/lib/rgfa/line/common/disconnection.rb +++ /dev/null @@ -1,144 +0,0 @@ -# @tested_in unit_line_connection -module RGFA::Line::Common::Disconnection - - # Remove the line from the RGFA instance it belongs to, if any. - # - # The Line instance itself will still exist, but all references from it to - # other lines are deleted, as well as references to it from other lines. - # Mandatory references are turned into their non-reference representations - # (e.g. segments references in the sid fields of E lines - # or in the from/to lines of L/C lines are changed into symbols). - # - # @return [void] - def disconnect - if !connected? - raise RGFA::RuntimeError, - "Line #{self} is not connected to a RGFA instance" - end - remove_field_backreferences - remove_field_references - disconnect_dependent_lines - remove_nonfield_backreferences - remove_nonfield_references - @rgfa.unregister_line(self) - @rgfa = nil - end - - # @api private - module API_PRIVATE - - def delete_reference(line, key) - return if !@refs or !@refs[key] - idx = @refs[key].index {|x| x.equal?(line)} - return if idx.nil? - @refs[key] = - ((idx == 0 ? [] : @refs[key][0..idx-1]) + @refs[key][idx+1..-1]) - end - - def delete_first_reference(key) - return if !@refs or !@refs[key] - @refs[key].shift - end - - def delete_last_reference(key) - return if !@refs or !@refs[key] - @refs[key].pop - end - - end - include API_PRIVATE - - private - - # @note currently this method supports fields which are: references, - # oriented lines and arrays of references of oriented lines; - # if SUBCLASSES have reference fields which contain references - # in a different fashion, the method must be updated or overwritten - # in the subclass - def remove_field_references - self.class::REFERENCE_FIELDS.each do |k| - ref = get(k) - case ref - when RGFA::Line - set_existing_field(k, ref.to_sym, set_reference: true) - when RGFA::OrientedLine - ref.line = ref.name - when Array - ref.map! do |elem| - if elem.kind_of?(RGFA::Line) - elem = elem.to_sym - elsif elem.kind_of?(RGFA::OrientedLine) - elem.line = elem.name - end - elem - end - end - end - end - - # note: consider implementing the following method: - # def each_reference_in(field, &block) - # case field - # when RGFA::Line - # yield field - # when RGFA::OrientedLine - # yield field.line - # when Array - # field.dup.each {|elem| each_reference_in(elem, &block)} - # end - # end - - def remove_backreference(ref, k) - case ref - when RGFA::Line - ref.update_references(self, nil, k) - when RGFA::OrientedLine - ref.line.update_references(self, nil, k) - when Array - ref.dup.each {|elem| remove_backreference(elem, k)} - end - end - - def disconnect_dependent_line(ref) - case ref - when RGFA::Line - ref.disconnect - when RGFA::OrientedLine - ref.line.disconnect - when Array - ref.dup.each {|elem| disconnect_dependent_line(elem)} - end - end - - # @note currently this method supports fields which are: references, - # oriented lines and arrays of references of oriented lines; - # if SUBCLASSES have reference fields which contain references - # in a different fashion, the method must be updated or overwritten - # in the subclass - def remove_field_backreferences - self.class::REFERENCE_FIELDS.each do |k| - remove_backreference(get(k), k) - end - end - - def disconnect_dependent_lines - self.class::DEPENDENT_LINES.each do |k| - refs.fetch(k, []).dup.each do |ref| - disconnect_dependent_line(ref) - end - end - end - - def remove_nonfield_backreferences - self.class::OTHER_REFERENCES.each do |k| - refs.fetch(k, []).dup.each do |ref| - remove_backreference(ref, k) - end - end - end - - def remove_nonfield_references - @refs = {} - end - -end diff --git a/lib/rgfa/line/common/dynamic_fields.rb b/lib/rgfa/line/common/dynamic_fields.rb deleted file mode 100644 index f2586f0..0000000 --- a/lib/rgfa/line/common/dynamic_fields.rb +++ /dev/null @@ -1,198 +0,0 @@ -# Create methods for access to non-existing but valid tag names. -# @tested_in unit_line_dynamic_fields -module RGFA::Line::Common::DynamicFields - - # Methods are dynamically created for non-existing but valid tag names. - # - # Methods for predefined tags and positional fields - # are created dynamically for each subclass; methods for existing tags - # are created on instance initialization. - # - # --- - # - (Object) (parse=true) - # The parsed content of a field. See also #get. - # - # Parameters: - # - # Returns: - # - (String, Hash, Array, Integer, Float) the parsed content of the field - # - (nil) if the field does not exist, but is a valid tag field name - # - # --- - # - (Object) !(parse=true) - # The parsed content of a field, raising an exception if not available. - # See also #get!. - # - # Returns: - # - (String, Hash, Array, Integer, Float) the parsed content of the field - # - # Raises: - # - (RGFA::NotFoundError) if the field does not exist - # - # --- - # - # - (self) =(value) - # Sets the value of a positional field or tag, - # or creates a new tag if the fieldname is - # non-existing but a valid tag name. See also #set, #set_datatype. - # - # Parameters: - # - +*value*+ (String|Hash|Array|Integer|Float) value to set - # - # --- - # - def method_missing(m, *args, &block) - super if virtual? - field_name, operation, state = split_method_name(m) - if ((operation == :get or operation == :get!) and args.size > 1) or - (operation == :set and args.size != 1) - raise RGFA::ArgumentError, "Wrong number of arguments \n"+ - "(method: #{m}; args.size = #{args.size})" - end - case state - when :invalid - super - when :existing - case operation - when :get - if args[0] == false - field_to_s(field_name) - else - get(field_name) - end - when :get! - if args[0] == false - field_to_s!(field_name) - else - get!(field_name) - end - when :set - set_existing_field(field_name, args[0]) - return nil - end - when :valid - case operation - when :get - return nil - when :get! - raise RGFA::NotFoundError, - "No value defined for tag #{field_name}" - when :set - set(field_name, args[0]) - return nil - end - end - end - - # Redefines respond_to? to correctly handle dynamical methods. - # @see #method_missing - def respond_to?(m, include_all=false) - super || (split_method_name(m)[2] != :invalid) - end - - private - - def split_method_name(m) - if @data.has_key?(m) - return m, :get, :existing - else - case m[-1] - when "!" - var = :get! - m = m[0..-2].to_sym - when "=" - var = :set - m = m[0..-2].to_sym - else - var = :get - end - if @data.has_key?(m) - state = :existing - elsif self.class::PREDEFINED_TAGS.include?(m) or - valid_custom_tagname?(m) - state = :valid - else - state = :invalid - end - return m, var, state - end - end - - # - # Define field methods for a single field - # - def define_field_methods(fieldname) - define_singleton_method(fieldname) do - get(fieldname) - end - define_singleton_method :"#{fieldname}!" do - get!(fieldname) - end - define_singleton_method :"#{fieldname}=" do |value| - set_existing_field(fieldname, value) - end - end - - def self.included(base) - base.extend(ClassMethods) - base.instance_eval do - private_class_method :apply_definitions - private_class_method :define_field_accessors - private_class_method :define_field_aliases - private_class_method :define_reference_getters - end - end - - module ClassMethods - - def apply_definitions - define_field_accessors - define_field_aliases - define_reference_getters - end - - def define_field_accessors - (self::POSFIELDS + - self::PREDEFINED_TAGS).each do |fieldname| - define_method(fieldname) do - get(fieldname) - end - define_method :"#{fieldname}!" do - get!(fieldname) - end - define_method :"#{fieldname}=" do |value| - set_existing_field(fieldname, value) - end - end - end - - def define_field_aliases - if !self::NAME_FIELD.nil? and !self::POSFIELDS.include?(:name) - self::FIELD_ALIAS[:name] = self::NAME_FIELD - end - self::FIELD_ALIAS.each do |k,v| - alias_method :"#{k}", :"#{v}" - alias_method :"#{k}!", :"#{v}!" - alias_method :"#{k}=", :"#{v}=" - end - end - - def define_reference_getters - (self::DEPENDENT_LINES + self::OTHER_REFERENCES).each do |k| - if !method_defined?(k) - define_method(k) do - @refs ||= {} - @refs.fetch(k, []).clone.freeze - end - end - end - if !method_defined?(:all_references) - define_method :all_references do - refs.values.flatten - end - end - end - - end - -end diff --git a/lib/rgfa/line/common/equivalence.rb b/lib/rgfa/line/common/equivalence.rb deleted file mode 100644 index 84d912b..0000000 --- a/lib/rgfa/line/common/equivalence.rb +++ /dev/null @@ -1,193 +0,0 @@ -# @tested_in unit_line_equivalence -module RGFA::Line::Common::Equivalence - - # Equivalence check - # @param other [RGFA::Line, Symbol] - # @return [Boolean] if other is a symbol, is it the same as - # the result of applying to_sym to the line?; if other is a line, - # does it have the same record type, positional fields and tags (value - # and datatype) as the line? - # @see RGFA::Line::Edge::Link#== - def ==(other) - return self.to_sym == other.to_sym if other.kind_of?(Symbol) - return false if !other.kind_of?(RGFA::Line) - return false if (other.record_type != self.record_type) - return false if other.data.keys.sort != data.keys.sort - other.data.each do |k, v| - if @data[k] != other.data[k] - if field_to_s(k) != other.field_to_s(k) - return false - end - end - end - return true - end - - # Returns always false, as a line is not a placeholder (for compatibility - # with other objects which can be representable as placeholders, such as - # symbols, strings, arrays). - # @return [false] - def placeholder? - false - end - - # Computes the differences between the line and another line. - # @return [Array>] information about the differences; - # an empty array if no difference found - def diff(other) - if self.record_type != other.record_type - return [:incompatible, :record_type, self.record_type, other.record_type] - end - if self.class != other.class - raise RGFA::AssertionError if self.version == other.version - return [:incompatible, :version, self.version, other.version] - end - differences = [] - positional_fieldnames.each do |fieldname| - value1 = field_to_s(fieldname) - value2 = other.field_to_s(fieldname) - if value1 != value2 - differences << [:different, :positional_field, - fieldname, value1, value2] - end - end - (self.tagnames - other.tagnames).each do |tagname| - differences << [:exclusive, :<, :tag, - tagname, get_datatype(tagname), get(tagname)] - end - (other.tagnames - self.tagnames).each do |tagname| - differences << [:exclusive, :>, :tag, - tagname, other.get_datatype(tagname), other.get(tagname)] - end - (self.tagnames & other.tagnames).each do |tagname| - tag1 = field_to_s(tagname, tag: true) - tag2 = other.field_to_s(tagname, tag: true) - if tag1 != tag2 - differences << [:different, :tag, tagname, - get_datatype(tagname), field_to_s(tagname), - other.get_datatype(tagname), other.field_to_s(tagname)] - end - end - return differences - end - - # Computes a RGFA Ruby script for converting line into other - # @return [String] - def diffscript(other, selfvar) - diffinfo = diff(other) - outscript = [] - diffinfo.each do |diffitem| - if diffitem[0] == :incompatible - if diffitem[1] == :record_type - raise RGFA::RuntimeError, - "Cannot compute conversion script: different record type\n"+ - "Line: #{self}\n"+ - "Other: #{other}\n"+ - "#{diffitem[2]} != #{diffitem[3]}" - elsif diffitem[1] == :version - raise RGFA::RuntimeError, - "Cannot compute conversion script: different GFA version\n"+ - "Line: #{self}\n"+ - "Other: #{other}\n"+ - "#{diffitem[2]} != #{diffitem[3]}" - end - elsif diffitem[0] == :different - if diffitem[1] == :positional_field - outscript << - "#{selfvar}.set(:#{diffitem[2]},'#{diffitem[4].gsub("'","\'")}')" - elsif diffitem[1] == :tag - if diffitem[3] != diffitem[5] - outscript << - "#{selfvar}.set_datatype(:#{diffitem[2]},:#{diffitem[5]})" - end - if diffitem[4] != diffitem[6] - outscript << - "#{selfvar}.set(:#{diffitem[2]},'#{diffitem[6].gsub("'","\'")}')" - end - end - elsif diffitem[0] == :exclusive - if diffitem[1] == :> - if diffitem[2] == :tag - outscript << - "#{selfvar}.set_datatype(:#{diffitem[3]},:#{diffitem[4]})" - outscript << - "#{selfvar}.set(:#{diffitem[3]},'#{diffitem[5].gsub("'","\'")}')" - end - elsif diffitem[1] == :< - if diffitem[2] == :tag - outscript << - "#{selfvar}.delete(:#{diffitem[3]})" - end - end - end - end - return outscript.join("\n") - end - - # @api private - module API_PRIVATE - - # Compares the field values line instance to an hash of - # fieldnames => values. - # - # Each field in the line must have a value equal to that indicated - # in the hash, except those indicated in the +ignore_fields+ array - # and those containing placeholder values. The values can be decoded - # (e.g. 1, an Integer) or encoded (e.g. "1", a String). - # - # @param hash [Hash(Symbol=>Object)] an hash value of fieldnames => values - # @param ignore_fields [Array] list of hash keys to skip in the comparison - # - # @return [Boolean] - def field_values?(hash, ignore_fields = []) - if hash[:record_type] and !ignore_fields.include?(:record_type) - return false if record_type != hash[:record_type] - end - ((hash.keys - ignore_fields) - [:record_type]).each do |fieldname| - value = get(fieldname) - return false if value.nil? - next if value.placeholder? - if value != hash[fieldname] - return false if field_to_s(fieldname) != hash[fieldname] - end - end - return true - end - - # Compares the fields of the line to those of a reference line. - # - # The record type, positional fields and tags are compared, except - # those listed in the +ignore_fields+ list. Fields containing placeholder - # values in any of the two lines are ignored. - # - # @param refline [RGFA::Line] a reference line instance for the comparison - # @param ignore_fields [Array] list of fields (record_type, positional - # fields and/or tags) to skip in the comparison; the special value +:name+ - # will exclude the name field from the comparison - # - # @return [Boolean] - def eql_fields?(refline, ignore_fields = []) - dealias_fieldnames!(ignore_fields) - unless ignore_fields.include?(:record_type) - return false if self.record_type != refline.record_type - end - fieldnames = (refline.positional_fieldnames + refline.tagnames) - fieldnames -= ignore_fields - if ignore_fields.include?(:name) - fieldnames.delete(refline.class::NAME_FIELD) - end - fieldnames.each do |fieldname| - refvalue = refline.get(fieldname) - next if refvalue.placeholder? - value = get(fieldname) - return false if value.nil? - next if value.placeholder? - return false if value != refvalue - end - return true - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/line/common/field_data.rb b/lib/rgfa/line/common/field_data.rb deleted file mode 100644 index 14c3546..0000000 --- a/lib/rgfa/line/common/field_data.rb +++ /dev/null @@ -1,163 +0,0 @@ -# -# Access to the data contained in fields. -# -# @tested_in api_tags, api_positionals, unit_line -# -module RGFA::Line::Common::FieldData - - # @return [Symbol] record type code - def record_type - self.class::RECORD_TYPE - end - - # @return [Array] name of the positional fields - # @note these names are not always the field names - # in the specification, - # as these may be implemented as aliases to cope with - # different names for the same content in GFA1 vs GFA2 - # @api private - def positional_fieldnames - if @version.nil? - raise RGFA::VersionError, "Version is not set" - end - self.class::POSFIELDS - end - - # @return [Array] name of the defined tags - def tagnames - (@data.keys - positional_fieldnames) - end - - # Set the value of a field. - # - # If a datatype for a new custom tag is not set, - # the default for the value assigned to the field will be used - # (e.g. J for Hashes, i for Integer, etc). - # - # @param fieldname [Symbol] the name of the field to set - # (positional field, predefined tag (uppercase) or custom tag (lowercase)) - # @raise [RGFA::FormatError] if +fieldname+ is not a - # valid predefined or custom tag name - # @return [Object] +value+ - def set(fieldname, value) - if @data.has_key?(fieldname) or predefined_tag?(fieldname) - return set_existing_field(fieldname, value) - elsif self.class::FIELD_ALIAS.has_key?(fieldname) - return set(self.class::FIELD_ALIAS[fieldname], value) - elsif virtual? - raise RGFA::RuntimeError, "Virtual lines do not have tags" - elsif (@vlevel == 0) or valid_custom_tagname?(fieldname) - define_field_methods(fieldname) - if !@datatype[fieldname].nil? - return set_existing_field(fieldname, value) - elsif !value.nil? - @datatype[fieldname] = value.default_gfa_tag_datatype - return @data[fieldname] = value - end - else - raise RGFA::FormatError, - "#{fieldname} is not an existing or predefined field or a "+ - "valid custom tag" - end - end - - # Get the value of a field - # @param fieldname [Symbol] name of the field - # @return [Object,nil] value of the field - # or +nil+ if field is not defined - def get(fieldname) - v = @data[fieldname] - if v.kind_of?(String) - t = field_datatype(fieldname) - if t != :Z and t != :seq - # value was not parsed or was set to a string by the user - return (@data[fieldname] = v.parse_gfa_field(t, - safe: @vlevel >= 1, - fieldname: fieldname, - line: @data)) - else - v.validate_gfa_field(t, fieldname) if (@vlevel >= 3) - end - elsif !v.nil? - if (@vlevel >= 3) - t = field_datatype(fieldname) - v.validate_gfa_field(t, fieldname) - end - else - dealiased_fieldname = self.class::FIELD_ALIAS[fieldname] - return get(dealiased_fieldname) if !dealiased_fieldname.nil? - end - return v - end - - # Value of a field, raising an exception if it is not defined - # @param fieldname [Symbol] name of the field - # @raise [RGFA::NotFoundError] if field is not defined - # @return [Object,nil] value of the field - def get!(fieldname) - v = get(fieldname) - raise RGFA::NotFoundError, - "No value defined for tag #{fieldname}" if v.nil? - return v - end - - # Remove a tag from the line, if it exists; do nothing if it does not - # @param tagname [Symbol] the tag name of the tag to remove - # @return [Object, nil] the deleted value or nil, if the field was not defined - def delete(tagname) - if tagnames.include?(tagname) - @datatype.delete(tagname) - return @data.delete(tagname) - else - return nil - end - end - - protected - - def data - @data - end - - private - - def set_existing_field(fieldname, value, set_reference: false) - renaming_connected = false - if @rgfa - if !set_reference and - (self.class::REFERENCE_FIELDS.include?(fieldname) or - self.class::BACKREFERENCE_RELATED_FIELDS.include?(fieldname)) - raise RGFA::Line::RuntimeError, - "The value of field '#{fieldname}' cannot be changed"+ - "as the line belongs to a RGFA instance" - end - if (fieldname == self.class::STORAGE_KEY) or - (self.class::STORAGE_KEY == :name and - fieldname == self.class::NAME_FIELD) - renaming_connected = true - @rgfa.unregister_line(self) - end - end - if value.nil? - @data.delete(fieldname) - else - if @vlevel >= 3 - field_or_default_datatype(fieldname, value) - value.validate_gfa_field(field_datatype(fieldname), fieldname) - end - @data[fieldname] = value - end - if renaming_connected - @rgfa.register_line(self) - end - end - - def dealias_fieldname(fieldname) - self.class::FIELD_ALIAS.fetch(fieldname, fieldname) - end - - def dealias_fieldnames!(fieldnames) - fieldnames.map!{|fieldname| dealias_fieldname(fieldname)} - end - -end diff --git a/lib/rgfa/line/common/field_datatype.rb b/lib/rgfa/line/common/field_datatype.rb deleted file mode 100644 index 73e7edd..0000000 --- a/lib/rgfa/line/common/field_datatype.rb +++ /dev/null @@ -1,68 +0,0 @@ -# -# Methods to set and get the datatype of fields. -# -# @tested_in api_tags -# -module RGFA::Line::Common::FieldDatatype - - # Returns a symbol, which specifies the datatype of a field - # - # @param fieldname [Symbol] the tag name of the field - # @return [RGFA::Field::FIELD_DATATYPE] the datatype symbol - def get_datatype(fieldname) - fieldname = self.class::FIELD_ALIAS.fetch(fieldname, fieldname) - field_or_default_datatype(fieldname, @data[fieldname]) - end - - # Set the datatype of a tag. - # - # If an existing tag datatype is changed, its content may become - # invalid (call #validate_field if necessary). - # - # @param fieldname [Symbol] the field name (it is not required that - # the field exists already) - # @param datatype [RGFA::Field::FIELD_DATATYPE] the datatype - # @raise [RGFA::ArgumentError] if +datatype+ is not - # a valid datatype for tags - # @return [RGFA::Field::FIELD_DATATYPE] the datatype - def set_datatype(fieldname, datatype) - if predefined_tag?(fieldname) - if get_datatype(fieldname) != datatype - raise RGFA::RuntimeError, - "Cannot set the datatype of #{fieldname} to #{datatype}\n"+ - "The datatype of a predefined tag cannot be changed" - return - end - elsif !valid_custom_tagname?(fieldname) and @vlevel > 0 - raise RGFA::FormatError, - "#{fieldname} is not a valid custom tag name" - end - unless RGFA::Field::TAG_DATATYPE.include?(datatype) - raise RGFA::ArgumentError, "Unknown datatype: #{datatype}" - end - @datatype[fieldname] = datatype - end - - protected - - def datatype - @datatype - end - - private - - def field_datatype(fieldname) - @datatype.fetch(fieldname, self.class::DATATYPE[fieldname]) - end - - def field_or_default_datatype(fieldname, value) - t = field_datatype(fieldname) - if t.nil? - return nil if value.nil? - t = value.default_gfa_tag_datatype - @datatype[fieldname] = t - end - return t - end - -end diff --git a/lib/rgfa/line/common/init.rb b/lib/rgfa/line/common/init.rb deleted file mode 100644 index 6cdc836..0000000 --- a/lib/rgfa/line/common/init.rb +++ /dev/null @@ -1,346 +0,0 @@ -# -# Initialization of line instances. -# @tested_in unit_line -# -module RGFA::Line::Common::Init - - # List of allowed record_type values - RECORD_TYPES = [ :H, :S, :L, :C, :P, :"#", :G, :F, :E, :O, :U, nil ] - - # List of data types which are parsed only on access; - # all other are parsed when read. - DELAYED_PARSING_DATATYPES = [ - :alignment_gfa1, - :alignment_gfa2, - :alignment_list_gfa1, - :oriented_segments, - :H, - :J, - :B, - ] - - # Dependency of record type from version - # - specific => only for a specific version - # - generic => same syntax for all versions - # - different => different syntax in different versions - RECORD_TYPE_VERSIONS = - { - :specific => - {:gfa1 => [:C, :L, :P], - :gfa2 => [:E, :G, :F, :O, :U, nil]}, - :generic => [:H, :"#"], - :different => [:S] - } - - # @param data [Array] the content of the line; if - # an array of strings, this is interpreted as the splitted content - # of a GFA file line; note: an hash - # is also allowed, but this is for internal usage and shall be considered - # private - # @param vlevel [Integer] see paragraph Validation - # @param virtual [Boolean] (default: +false+) - # mark the line as virtual, i.e. not yet found in the GFA file; - # e.g. a link is allowed to refer to a segment which is not - # yet created; in this case a segment marked as virtual is created, - # which is replaced by a non-virtual segment, when the segment - # line is later found - # @param version [RGFA::VERSIONS, nil] GFA version, nil if unknown - # - # Constants defined by subclasses - # - # Subclasses of RGFA::Line _must_ define the following constants: - # - RECORD_TYPE [RGFA::Line::RECORD_TYPES] - # - POSFIELDS [Array] positional fields - # - FIELD_ALIAS [Hash{Symbol=>Symbol}] alternative names for positional - # fields - # - PREDEFINED_TAGS [Array] predefined tags - # - DATATYPE [Hash{Symbol=>Symbol}]: - # datatypes for the positional fields and the tags - # - # @raise [RGFA::FormatError] - # if too less positional fields are specified - # @raise [RGFA::FormatError] - # if a non-predefined tag uses upcase letters - # @raise [RGFA::NotUniqueError] - # if a tag name is used more than once - # @raise [RGFA::TypeError] - # if the type of a predefined tag does not - # respect the specified type. - # - # @return [RGFA::Line] - # - # Validation levels - # - # - 0: no validation (validate manually if needed) - # - 1: (default) validation when parsing/accessing for the first time a field - # - 2: validation when parsing/accessing for the first time as well as - # when converting a field to string - # - 3: validation on each field access - # - def initialize(data, vlevel: 1, virtual: false, version: nil) - unless self.class.const_defined?(:"RECORD_TYPE") - raise RGFA::RuntimeError, "This class shall not be directly instantiated" - end - @vlevel = vlevel - @virtual = virtual - @datatype = {} - @data = {} - @rgfa = nil - @version = version - if data.kind_of?(Hash) - @data.merge!(data) - else - # normal initialization, data is an array of strings - if @version.nil? - process_unknown_version(data) - else - validate_version - initialize_positional_fields(data) - initialize_tags(data) - end - validate_record_type_specific_info if @vlevel >= 1 - if @version.nil? - raise "RECORD_TYPE_VERSIONS has no value for #{record_type}" - end - end - end - - # @return self - # @param vlevel [Boolean] ignored (compatibility reasons) - # @param version [Boolean] ignored (compatibility reasons) - def to_rgfa_line(vlevel: nil, version: nil) - self - end - - private - - def process_unknown_version(data) - rt = self.class::RECORD_TYPE - if RECORD_TYPE_VERSIONS[:generic].include?(rt) - @version = :generic - initialize_positional_fields(data) - initialize_tags(data) - return - end - RECORD_TYPE_VERSIONS[:specific].each do |k, v| - if v.include?(rt) - @version = k - initialize_positional_fields(data) - initialize_tags(data) - return - end - end - if RECORD_TYPE_VERSIONS[:different].include?(rt) - raise RGFA::RuntimeError, - "GFA version not specified\n"+ - "Records of type #{rt} have different syntax according to the version" - end - end - - def validate_version - rt = self.class::RECORD_TYPE - if !RGFA::VERSIONS.include?(@version) - raise RGFA::VersionError, - "GFA specification version unknown (#{version})" - else - RECORD_TYPE_VERSIONS[:specific].each do |k, v| - if v.include?(rt) - if version != k - raise RGFA::VersionError, - "Records of type #{record_type} are incompatible "+ - "with version #{@version}" - end - return - end - end - end - end - - def n_positional_fields - self.class::POSFIELDS.size - end - - def init_field_value(n ,t, s, errmsginfo: nil) - if @vlevel >= 1 - s = s.parse_gfa_field(t, safe: true, fieldname: n, - line: errmsginfo) - elsif !DELAYED_PARSING_DATATYPES.include?(t) - s = s.parse_gfa_field(t, safe: @vlevel >= 1, fieldname: n, - line: errmsginfo) - end - @data[n] = s - end - - def initialize_positional_fields(strings) - if @version.nil? - raise RGFA::AssertionError, - "Bug found, please report\n"+ - "strings: #{strings.inspect}" - end - if (@vlevel >= 1) and (strings.size < n_positional_fields) - raise RGFA::FormatError, - "#{n_positional_fields} positional fields expected, "+ - "#{strings.size}) found\n#{strings.inspect}" - end - n_positional_fields.times do |i| - n = self.class::POSFIELDS[i] - init_field_value(n, self.class::DATATYPE[n], strings[i], - errmsginfo: strings) - end - end - - def initialize_tags(strings) - n_positional_fields.upto(strings.size-1) do |i| - initialize_tag(*strings[i].parse_gfa_tag, errmsginfo: strings) - end - end - - def initialize_tag(n, t, s, errmsginfo: nil) - if @vlevel > 0 - if @data.has_key?(n) - raise RGFA::NotUniqueError, - "Tag #{n} found multiple times" - elsif predefined_tag?(n) - validate_predefined_tag_type(n, t) - else - validate_custom_tagname(n) - @datatype[n] = t - end - else - (@datatype[n] = t) if !field_datatype(n) - end - init_field_value(n, t, s, errmsginfo: errmsginfo) - end - - def self.included(base) - base.extend(ClassMethods) - base.class_eval do - private_class_method :subclass_GFA1 - private_class_method :subclass_GFA2 - private_class_method :subclass_unknown_version - end - end - - module ClassMethods - - # Select a subclass based on the record type - # @param version [RGFA::VERSIONS, nil] GFA version, nil if unknown - # @raise [RGFA::TypeError] if the record_type is not valid - # @raise [RGFA::VersionError] if the version is unknown - # @return [Class] a subclass of RGFA::Line - def subclass(record_type, version: nil) - case version - when :gfa1 - subclass_GFA1(record_type) - when :gfa2 - subclass_GFA2(record_type) - when nil - subclass_unknown_version(record_type) - else - raise RGFA::VersionError, - "GFA specification version unknown (#{version})" - end - end - - def subclass_GFA1(record_type) - if record_type.nil? - raise RGFA::VersionError, - "RGFA uses virtual records of unknown type for GFA2 only" - end - case record_type.to_sym - when :H then RGFA::Line::Header - when :S then RGFA::Line::Segment::GFA1 - when :"#" then RGFA::Line::Comment - when :L then RGFA::Line::Edge::Link - when :C then RGFA::Line::Edge::Containment - when :P then RGFA::Line::Group::Path - else raise RGFA::VersionError, - "Custom record types are not supported in GFA1: '#{record_type}'" - end - end - - def subclass_GFA2(record_type) - case record_type.to_sym - when :H then RGFA::Line::Header - when :S then RGFA::Line::Segment::GFA2 - when :"#" then RGFA::Line::Comment - when :E then RGFA::Line::Edge::GFA2 - when :F then RGFA::Line::Fragment - when :G then RGFA::Line::Gap - when :O then RGFA::Line::Group::Ordered - when :U then RGFA::Line::Group::Unordered - else RGFA::Line::CustomRecord - end - end - - def subclass_unknown_version(record_type) - case record_type.to_sym - when :H then RGFA::Line::Header - when :S then RGFA::Line::Segment::Factory - when :"#" then RGFA::Line::Comment - when :L then RGFA::Line::Edge::Link - when :C then RGFA::Line::Edge::Containment - when :P then RGFA::Line::Group::Path - when :E then RGFA::Line::Edge::GFA2 - when :F then RGFA::Line::Fragment - when :G then RGFA::Line::Gap - when :O then RGFA::Line::Group::Ordered - when :U then RGFA::Line::Group::Unordered - else RGFA::Line::CustomRecord - end - end - - end - -end - -# Extensions to the String core class. -# -class String - - # Parses a line of a RGFA file and creates an object of the correct - # record type child class of {RGFA::Line} - # @return [subclass of RGFA::Line] - # @raise [RGFA::Error] if the fields do not comply to the RGFA specification - # @param vlevel [Integer] (defaults to: 1) - # see RGFA::Line#initialize - # @param version [RGFA::VERSIONS, nil] GFA version, nil if unknown - def to_rgfa_line(vlevel: 1, version: nil) - if self[0] == "#" - self =~ /^#(\s*)(.*)$/ - return RGFA::Line::Comment.new([$2, $1], - vlevel: vlevel, - version: version) - else - split(RGFA::Line::SEPARATOR).to_rgfa_line(vlevel: vlevel, - version: version) - end - end - -end - -# Extensions to the Array core class. -# -class Array - - # Parses an array containing the fields of a RGFA file line and creates an - # object of the correct record type child class of {RGFA::Line} - # @note - # This method modifies the content of the array; if you still - # need the array, you must create a copy before calling it - # @return [subclass of RGFA::Line] - # @raise [RGFA::Error] if the fields do not comply to the RGFA specification - # @param vlevel [Integer] (defaults to: 1) - # see RGFA::Line#initialize - # @param version [RGFA::VERSIONS, nil] GFA version, nil if unknown - # @api private - def to_rgfa_line(vlevel: 1, version: nil) - sk = RGFA::Line.subclass(self[0], version: version) - if sk == RGFA::Line::CustomRecord - sk.new(self, vlevel: vlevel, version: version) - else - sk.new(self[1..-1], vlevel: vlevel, version: version) - end - end - -end diff --git a/lib/rgfa/line/common/update_references.rb b/lib/rgfa/line/common/update_references.rb deleted file mode 100644 index 67d3d1c..0000000 --- a/lib/rgfa/line/common/update_references.rb +++ /dev/null @@ -1,121 +0,0 @@ -# Update of references caused by a virtual line becoming real. -# -# @tested_in unit_line_connection -# -module RGFA::Line::Common::UpdateReferences - - # @api private - module API_PRIVATE - - # When a line is found, which substitutes a virtual line, this method - # is called on each line which had references to the virtual line. - # - # @note SUBCLASSES which can be referenced by virtual lines - # may implement a specialize #backreferences_keys method to - # support this mechanism (the default will work in all cases - # of the current specification, but is not optimized for record type) - # - # @param oldref [RGFA::Line] - # @param newref [RGFA::Line] - # @param key_in_ref [Array] key of the reference in the - # line _referencing_ this line (note: not _in this line_) - # @return [void] - # - def update_references(oldref, newref, key_in_ref) - keys = backreference_keys(oldref, key_in_ref) - update_field_references(oldref, newref, - self.class::REFERENCE_FIELDS & keys) - if instance_variable_defined?(:@refs) - # note: keeping the two types of nonfield references separate helps - # in subclasses where only one must be redefined - update_dependent_line_references(oldref, newref, - self.class::DEPENDENT_LINES & @refs.keys & keys) - update_other_references(oldref, newref, - self.class::OTHER_REFERENCES & @refs.keys & keys) - end - end - - end - include API_PRIVATE - - private - - # Return a list of fields and/or @ref keys, which indicates - # where a reference "ref" _may_ be stored (in order to be able - # to locate it and update it). - # - # The default is: all reference fields, dependent line references - # and other references. - # - # @note SUBCLASSES may overwrite this method if they - # can be referenced by virtual lines, by providing more - # specific results, depending on the ref and key_in_ref; - # this can make the update faster - # @return [Array] fieldnames and/or @refs keys - def backreference_keys(ref, key_in_ref) - self.class::REFERENCE_FIELDS + - self.class::DEPENDENT_LINES + - self.class::OTHER_REFERENCES - end - - # @note this methods supports fields which contain references, - # oriented lines or array of references or oriented lines; - # if SUBCLASSES contain fields which reference to line in a - # different fashion, the method must be updated or overwritten - # by the subclass - def update_reference_in_field(field, oldref, newref) - value = get(field) - case value - when RGFA::Line - if value.equal?(oldref) - set_existing_field(field, newref, set_reference: true) - end - when RGFA::OrientedLine - if value.line.equal?(oldref) - value.line = newref - end - when Array - update_reference_in_array(value, oldref, newref) - end - end - - def update_reference_in_array(array, oldref, newref) - array.map! do |elem| - case elem - when RGFA::Line - elem = newref if elem.equal?(oldref) - when RGFA::OrientedLine - if elem.line.equal?(oldref) - if oldref.respond_to?(:complement?) - elem.orient = elem.orient.invert if oldref.complement?(newref) - end - elem.line = newref - end - end - elem - end.compact! - end - - def update_field_references(oldref, newref, possible_fieldnames) - possible_fieldnames.each do |fn| - update_reference_in_field(fn, oldref, newref ? newref : oldref.to_sym) - end - end - - def update_nonfield_references(oldref, newref, possible_keys) - possible_keys.each do |key| - array = @refs[key] - update_reference_in_array(array, oldref, newref) if !array.nil? - end - end - - def update_dependent_line_references(oldref, newref, possible_keys) - update_nonfield_references(oldref, newref, possible_keys) - end - - # @note SUBCLASSES may redefine this method - def update_other_references(oldref, newref, possible_keys) - update_nonfield_references(oldref, newref, possible_keys) - end - -end diff --git a/lib/rgfa/line/common/validate.rb b/lib/rgfa/line/common/validate.rb deleted file mode 100644 index 37237ec..0000000 --- a/lib/rgfa/line/common/validate.rb +++ /dev/null @@ -1,72 +0,0 @@ -# -# Methods for the validation of single fields and of the entire line -# -# @tested_in api_positionals, api_tags -# -module RGFA::Line::Common::Validate - - # Raises an error if the content of the field does not correspond to - # the field type - # - # @param fieldname [Symbol] the tag name of the field to validate - # @raise [RGFA::FormatError] if the content of the field is - # not valid, according to its required type - # @return [void] - def validate_field(fieldname) - fieldname = self.class::FIELD_ALIAS.fetch(fieldname, fieldname) - v = @data[fieldname] - t = field_or_default_datatype(fieldname, v) - v.validate_gfa_field(t, fieldname) - return nil - end - - # Validate the RGFA::Line instance - # @raise [RGFA::FormatError] if any field content is not valid - # @return [void] - def validate - fieldnames = positional_fieldnames + tagnames - validate_tagnames_and_types if @vlevel == 0 # otherwise validated at init - fieldnames.each {|fieldname| validate_field(fieldname) } - validate_record_type_specific_info - end - - private - - def validate_tagnames_and_types - tagnames.each do |n| - if predefined_tag?(n) - validate_predefined_tag_type(n, field_datatype(n)) - elsif not valid_custom_tagname?(n) - raise RGFA::FormatError, - "Custom tags must be lower case; found: #{n}" - end - end - end - - def validate_predefined_tag_type(tagname, datatype) - unless datatype == self.class::DATATYPE[tagname] - raise RGFA::TypeError, - "Tag #{tagname} must be of type "+ - "#{self.class::DATATYPE[tagname]}, #{datatype} found" - end - end - - def validate_custom_tagname(tagname) - if not valid_custom_tagname?(tagname) - raise RGFA::FormatError, - "Custom tags must be lower case; found: #{tagname}" - end - end - - def valid_custom_tagname?(tagname) - /^[a-z][a-z0-9]$/ =~ tagname - end - - def validate_record_type_specific_info - end - - def predefined_tag?(tagname) - self.class::PREDEFINED_TAGS.include?(tagname) - end - -end diff --git a/lib/rgfa/line/common/version_conversion.rb b/lib/rgfa/line/common/version_conversion.rb deleted file mode 100644 index ac030bd..0000000 --- a/lib/rgfa/line/common/version_conversion.rb +++ /dev/null @@ -1,46 +0,0 @@ -# -# Version attribute and support of the conversion of GFA1 lines to GFA2 and -# vice-versa. -# -# @tested_in api_version, api_version_conversion -# -module RGFA::Line::Common::VersionConversion - - # @!attribute [r] version - # @return [RGFA::VERSIONS, nil] GFA specification version - attr_reader :version - - [:gfa1, :gfa2].each do |shall_version| - # @note RGFA::Line subclasses do not usually redefine this method, but - # the corresponding versioned to_a method - # @return [String] a string representation of self - define_method :"to_#{shall_version}_s" do - send(:"to_#{shall_version}_a").join(RGFA::Line::SEPARATOR) - end - - # @return [RGFA::Line] convertion to the selected version - define_method :"to_#{shall_version}" do - v = (shall_version == :gfa1) ? :gfa1 : :gfa2 - if (v == version) - return self - else - send(:"to_#{shall_version}_a").to_rgfa_line(version: v, vlevel: @vlevel) - end - end - end - - # @api private - module API_PRIVATE - [:gfa1, :gfa2].each do |shall_version| - - # @note RGFA::Line subclasses can redefine this method to convert - # between versions - # @return [Array] an array of string representations of the fields - define_method :"to_#{shall_version}_a" do - send(:to_a) - end - - end - end - include API_PRIVATE -end diff --git a/lib/rgfa/line/common/virtual_to_real.rb b/lib/rgfa/line/common/virtual_to_real.rb deleted file mode 100644 index 2c0509c..0000000 --- a/lib/rgfa/line/common/virtual_to_real.rb +++ /dev/null @@ -1,87 +0,0 @@ -# Support of virtual lines and the creation of real lines which substitute -# previously introduced virtual lines. -# -# @tested_in api_references_virtual -module RGFA::Line::Common::VirtualToReal - - # Is the line virtual? - # - # Is this RGFA::Line a virtual line representation - # (i.e. a placeholder for an expected but not encountered yet line)? - # - # @return [Boolean] - # - # @api private - def virtual? - @virtual - end - - private - - def substitute_virtual_line(previous) - @rgfa = previous.rgfa - import_references(previous) - @rgfa.unregister_line(previous) - @rgfa.register_line(self) - return nil - end - - # This is called when a virtual line (previous) is - # substituted by a real line - def import_references(previous) - if !previous.kind_of?(RGFA::Line::Unknown) - import_field_references(previous) - update_field_backreferences(previous) - else - initialize_references - end - import_nonfield_references(previous) - update_nonfield_backreferences(previous) - end - - def import_field_references(previous) - (self.class::REFERENCE_FIELDS + - self.class::BACKREFERENCE_RELATED_FIELDS).each do |k| - ref = previous.get(k) - set_existing_field(k, ref, set_reference: true) - end - end - - def update_backreference_in(ref, previous, k) - case ref - when RGFA::Line - ref.update_references(previous, self, k) - when RGFA::OrientedLine - ref.line.update_references(previous, self, k) - when Array - ref.each do |item| - update_backreference_in(item, previous, k) - end - end - end - - # @note currently this method supports fields which are: references, - # oriented lines and arrays of references of oriented lines; - # if SUBCLASSES have reference fields which contain references - # in a different fashion, the method must be updated or overwritten - # in the subclass - def update_field_backreferences(previous) - self.class::REFERENCE_FIELDS.each do |k| - ref = get(k) - update_backreference_in(ref, previous, k) - end - end - - def import_nonfield_references(previous) - @refs = previous.refs - end - - def update_nonfield_backreferences(previous) - @refs.each do |k, v| - v.each do |ref| - update_backreference_in(ref, previous, k) - end - end - end - -end diff --git a/lib/rgfa/line/common/writer.rb b/lib/rgfa/line/common/writer.rb deleted file mode 100644 index 00ff4c2..0000000 --- a/lib/rgfa/line/common/writer.rb +++ /dev/null @@ -1,93 +0,0 @@ -# -# Methods to write a single field or the entire line to string. -# -# @tested_in unit_line, api_tags, api_positionals -# -module RGFA::Line::Common::Writer - - # Separator in the string representation of RGFA lines - SEPARATOR = "\t" - - # @return [String] a string representation of self - def to_s - to_a.join(RGFA::Line::SEPARATOR) - end - - # @!macro [new] field_to_s - # Compute the string representation of a field. - # - # @param fieldname [Symbol] the tag name of the field - # @param tag [Boolean] (defaults to: +false+) - # return the tagname:datatype:value representation - # - # @raise [RGFA::NotFoundError] if field is not defined - # @return [String] the string representation - def field_to_s(fieldname, tag: false) - fieldname = self.class::FIELD_ALIAS.fetch(fieldname, fieldname) - field = @data[fieldname] - raise RGFA::NotFoundError, - "No value defined for tag #{fieldname}" if field.nil? - t = field_or_default_datatype(fieldname, field) - if !field.kind_of?(String) - field = field.to_gfa_field(datatype: t, fieldname: fieldname) - end - field.validate_gfa_field(t, fieldname) if @vlevel >= 2 - return tag ? field.to_gfa_tag(fieldname, datatype: t) : field - end - - # Return a description of the internal state of the instance. - # Wraps the superclass inspect method, in order to provide a more - # compact description, due to the references and backreferences - # contained in line instances. - # @return [String] - def inspect - if instance_variable_defined?(:@refs) and !@refs.nil? - local_refs = @refs - @refs = {} - local_refs.each do |k, v| - @refs[k] ||= [] - v.each {|l| @refs[k] << l.to_s.gsub("\t"," ")} - end - end - if !@rgfa.nil? - local_rgfa = @rgfa - @rgfa = "" - end - retval = super - @refs = local_refs if local_refs - @rgfa = local_rgfa if local_rgfa - retval - end - - # @api private - module API_PRIVATE - - # @return [Array] an array of string representations of the fields - def to_a - a = [record_type.to_s] - positional_fieldnames.each {|fn| a << field_to_s(fn, tag: false)} - tagnames.each {|fn| a << field_to_s(fn, tag: true)} - if virtual? - a << "co:Z:RGFA_virtual_line" - end - return a - end - - end - include API_PRIVATE - - private - - # Returns the tags as an array of [fieldname, datatype, value] - # triples. - # @api private - # @return [Array<[Symbol, Symbol, Object]>] - def tags - retval = [] - tagnames.each do |of| - retval << [of, get_datatype(of), get(of)] - end - return retval - end - -end diff --git a/lib/rgfa/line/custom_record.rb b/lib/rgfa/line/custom_record.rb deleted file mode 100644 index 88cc03f..0000000 --- a/lib/rgfa/line/custom_record.rb +++ /dev/null @@ -1,37 +0,0 @@ -# A custom line of a GFA2 file -# "Any line that does not begin with a recognized code can be ignored. -# This will allow users to have additional descriptor lines specific to their -# special processes." -# -# Parsing of custom lines is handled as follows: -# - divide content by tabs -# - from the back, fields are parsed using parse_gfa_tag; -# until an exception is thrown, they are all considered tags -# - from the first exception to the first field, they are all considered -# positional fields with name field0, field1, etc -# -# @tested_in api_custom_records -class RGFA::Line::CustomRecord < RGFA::Line - - RECORD_TYPE = nil - POSFIELDS = [:record_type] - FIELD_ALIAS = {} - PREDEFINED_TAGS = [] - NAME_FIELD = nil - STORAGE_KEY = nil - DATATYPE = { - :record_type => :custom_record_type, - } - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions -end - -require_relative "custom_record/init.rb" - -class RGFA::Line::CustomRecord - include RGFA::Line::CustomRecord::Init -end diff --git a/lib/rgfa/line/custom_record/init.rb b/lib/rgfa/line/custom_record/init.rb deleted file mode 100644 index f5465b2..0000000 --- a/lib/rgfa/line/custom_record/init.rb +++ /dev/null @@ -1,51 +0,0 @@ -# Allow to parse as tags all fields which are valid GFA tags on the right end -# of the line -# @tested_in api_custom_records -module RGFA::Line::CustomRecord::Init - - # List of names of positional fields (:field1, :field2, ...) - # @return [Array] - def positional_fieldnames - @positional_fieldnames - end - - # List of tag names - # @return [Array] - def tagnames - (@data.keys - @positional_fieldnames - [:record_type]) - end - - private - - def initialize_positional_fields(strings) - # delayed, see #delayed_inizialize_positional_fields - end - - def initialize_tags(strings) - first_tag = strings.size - (strings.size-1).downto(1) do |i| - initialize_tag(*strings[i].parse_gfa_tag) rescue break - first_tag = i - end - delayed_initialize_positional_fields(strings, first_tag) - end - - def delayed_initialize_positional_fields(strings, n_positional_fields) - @positional_fieldnames = [] - if ["P", "C", "L"].include?(strings[0]) - raise RGFA::VersionError, - "GFA-like line (P,C,L) found in GFA2\n"+ - "Line: #{strings.join(' ')}\n"+ - "Custom lines with record_type P, C and L are not supported by RGFA." - end - init_field_value(:record_type, :custom_record_type, strings[0], - errmsginfo: strings) - 1.upto(n_positional_fields-1) do |i| - n = :"field#{i}" - init_field_value(n, :generic, strings[i], errmsginfo: strings) - @positional_fieldnames << n - @datatype[n] = :generic - end - end - -end diff --git a/lib/rgfa/line/edge.rb b/lib/rgfa/line/edge.rb deleted file mode 100644 index 9a82956..0000000 --- a/lib/rgfa/line/edge.rb +++ /dev/null @@ -1,7 +0,0 @@ -# An edge line is an E line of a GFA2 file -# or a L/C line of a GFA1 file -class RGFA::Line::Edge < RGFA::Line -end -require_relative "edge/gfa2.rb" -require_relative "edge/link.rb" -require_relative "edge/containment.rb" diff --git a/lib/rgfa/line/edge/common/alignment_type.rb b/lib/rgfa/line/edge/common/alignment_type.rb deleted file mode 100644 index 71b5513..0000000 --- a/lib/rgfa/line/edge/common/alignment_type.rb +++ /dev/null @@ -1,26 +0,0 @@ -RGFA::Line::Edge::Common ||= Module.new - -# -# Requirements: +alignment_type+ -# -module RGFA::Line::Edge::Common::AlignmentType - - # @return [Boolean] does the line represent an internal - # overlap (GFA2 edge, not representable in GFA1)? - def internal? - alignment_type == :I - end - - # @return [Boolean] does the line represent a containment - # (GFA1 containment or GFA2 edge equivalent to a GFA1 containment)? - def containment? - alignment_type == :C - end - - # @return [Boolean] does the line represent a dovetail overlap? - # (GFA1 link or GFA2 edge equivalent to a GFA1 link)? - def dovetail? - alignment_type == :L - end - -end diff --git a/lib/rgfa/line/edge/common/from_to.rb b/lib/rgfa/line/edge/common/from_to.rb deleted file mode 100644 index 3a55c3e..0000000 --- a/lib/rgfa/line/edge/common/from_to.rb +++ /dev/null @@ -1,72 +0,0 @@ -RGFA::Line::Edge::Common ||= Module.new - -# Methods regarding the ends (from/to) of a GFA1 link/containment -# -# Requirements: +from+, +from_orient+, +to+, +to_orient+. -module RGFA::Line::Edge::Common::FromTo - - # @return [Boolean] is the from and to segments are equal - def circular? - from.to_sym == to.to_sym - end - - # @return [Boolean] is the from and to segments are equal - def circular_same_end? - from_end == to_end - end - - # @note the result is meaningful only for links and dovetails edges - # @return [RGFA::SegmentEnd] the segment end represented by the - # from/from_orient fields - def from_end - [from, from_orient == :+ ? :R : :L].to_segment_end - end - - # @note the result is meaningful only for links and dovetails edges - # @return [RGFA::SegmentEnd] the segment end represented by the - # to/to_orient fields - def to_end - [to, to_orient == :+ ? :L : :R].to_segment_end - end - - # Signature of the segment ends, for debugging - # @api private - def segment_ends_s - [from_end.to_s, to_end.to_s].join("---") - end - - # The from segment name, in both cases where from is a segment name (Symbol) - # or a segment (RGFA::Line::Segment::GFA1) - # @return [Symbol] - def from_name - from.to_sym - end - - # The to segment name, in both cases where to is a segment name (Symbol) - # or a segment (RGFA::Line::Segment::GFA1) - # @return [Symbol] - def to_name - to.to_sym - end - - # @note the result is meaningful only for links and dovetails edges - # @param segment_end [RGFA::SegmentEnd] one of the two segment ends - # of the line - # @return [RGFA::SegmentEnd] the other segment end - # @raise [RGFA::ArgumentError] if segment_end is not a valid segment end - # representation - # @raise [RuntimeError] if segment_end is not a segment end of the line - def other_end(segment_end) - segment_end = segment_end.to_segment_end - if (from_end == segment_end) - return to_end - elsif (to_end == segment_end) - return from_end - else - raise RGFA::ArgumentError, - "Segment end '#{segment_end.inspect}' not found\n"+ - "(from=#{from_end.inspect};to=#{to_end.inspect})" - end - end - -end diff --git a/lib/rgfa/line/edge/containment.rb b/lib/rgfa/line/edge/containment.rb deleted file mode 100644 index 9ba2f5b..0000000 --- a/lib/rgfa/line/edge/containment.rb +++ /dev/null @@ -1,56 +0,0 @@ -# A containment line of a RGFA file -# @tested_in api_positionals, api_references_edges_gfa1 -class RGFA::Line::Edge::Containment < RGFA::Line::Edge - - RECORD_TYPE = :C - POSFIELDS = [:from_segment, :from_orient, :to_segment, :to_orient, :pos, :overlap] - FIELD_ALIAS = {:container => :from_segment, - :contained => :to_segment, - :from => :from_segment, - :to => :to_segment, - :container_orient => :from_orient, - :contained_orient => :to_orient} - PREDEFINED_TAGS = [:MQ, :NM] - DATATYPE = { - :from_segment => :segment_name_gfa1, - :from_orient => :orientation, - :to_segment => :segment_name_gfa1, - :to_orient => :orientation, - :pos => :position_gfa1, - :overlap => :alignment_gfa1, - :MQ => :i, - :NM => :i, - } - NAME_FIELD = nil - STORAGE_KEY = nil - REFERENCE_FIELDS = [:from_segment, :to_segment] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions -end - -require_relative "common/from_to" -require_relative "common/alignment_type" -require_relative "gfa1/to_gfa2" -require_relative "gfa1/references" -require_relative "gfa1/oriented_segments" -require_relative "gfa1/alignment_type" -require_relative "gfa1/other" -require_relative "containment/canonical" -require_relative "containment/pos" -require_relative "containment/to_gfa2" - -class RGFA::Line::Edge::Containment - include RGFA::Line::Edge::Common::FromTo - include RGFA::Line::Edge::Common::AlignmentType - include RGFA::Line::Edge::GFA1::ToGFA2 - include RGFA::Line::Edge::GFA1::References - include RGFA::Line::Edge::GFA1::OrientedSegments - include RGFA::Line::Edge::GFA1::AlignmentType - include RGFA::Line::Edge::GFA1::Other - include RGFA::Line::Edge::Containment::Canonical - include RGFA::Line::Edge::Containment::Pos - include RGFA::Line::Edge::Containment::ToGFA2 -end diff --git a/lib/rgfa/line/edge/containment/canonical.rb b/lib/rgfa/line/edge/containment/canonical.rb deleted file mode 100644 index e64655a..0000000 --- a/lib/rgfa/line/edge/containment/canonical.rb +++ /dev/null @@ -1,35 +0,0 @@ -module RGFA::Line::Edge::Containment::Canonical - - # Returns true if the containment is canonical, false otherwise - # - # == Definition of canonical containment - # - # A containment is canonical if the from orientation is + - # - # === Details - # - # Each containment has an equivalent complement containment. - # Consider a containment of B (length:8) in A (length:100) at position 9 of A - # with a cigar 1M1I2M3D4M (i.e. rpos = 19). - # - # A+ B+ 1M1I2M3D4M 9 == A- B- 4M3D2M1I1M 80 - # A+ B- 1M1I2M3D4M 9 == A- B+ 4M3D2M1I1M 80 - # A- B+ 1M1I2M3D4M 9 == A+ B- 4M3D2M1I1M 80 - # A- B- 1M1I2M3D4M 9 == A+ B+ 4M3D2M1I1M 80 - # - # Pos in the complement is equal to the length of A minus the right pos - # of B before reversing. - # - # We require here that A != B as A == B makes no sense for containments. - # Thus it is always possible to express the containment using a positive - # from orientation. - # - # For this reason the canon is simply defined as + from orientation. - # - # @return [Boolean] - # - def canonical? - from_orient == :+ - end - -end diff --git a/lib/rgfa/line/edge/containment/pos.rb b/lib/rgfa/line/edge/containment/pos.rb deleted file mode 100644 index 23f989d..0000000 --- a/lib/rgfa/line/edge/containment/pos.rb +++ /dev/null @@ -1,11 +0,0 @@ -module RGFA::Line::Edge::Containment::Pos - - # Computes the rightmost coordinate of the contained sequence in the container - # @return [Integer] 0-based right coordinate of contained in container - # @raise [RGFA::ValueError] if the overlap is not a CIGAR string - def rpos - raise RGFA::ValueError if overlap.kind_of?(RGFA::Placeholder) - pos + overlap.length_on_reference - end - -end diff --git a/lib/rgfa/line/edge/containment/to_gfa2.rb b/lib/rgfa/line/edge/containment/to_gfa2.rb deleted file mode 100644 index a3684cd..0000000 --- a/lib/rgfa/line/edge/containment/to_gfa2.rb +++ /dev/null @@ -1,23 +0,0 @@ -module RGFA::Line::Edge::Containment::ToGFA2 - - # GFA2 positions of the alignment on the +from+ segment - # @!macro [new] coords - # @return [(Integer|Lastpos,Integer|Lastpos)] begin and end - # @raise [RGFA::RuntimeError] if the segment length cannot be determined, - # because the segment line is unknown - # @raise [RGFA::ValueError] if the segment length is not specified - # in the segment line - def from_coords - check_overlap - rpos = pos + overlap.length_on_reference - rpos = rpos.to_lastpos if rpos == lastpos_of(:from) - return [pos, rpos] - end - - # GFA2 positions of the alignment on the +to+ segment - # @!macro coords - def to_coords - return [0, lastpos_of(:to)] - end - -end diff --git a/lib/rgfa/line/edge/gfa1/alignment_type.rb b/lib/rgfa/line/edge/gfa1/alignment_type.rb deleted file mode 100644 index 52f51d3..0000000 --- a/lib/rgfa/line/edge/gfa1/alignment_type.rb +++ /dev/null @@ -1,9 +0,0 @@ -RGFA::Line::Edge::GFA1 ||= Module.new - -module RGFA::Line::Edge::GFA1::AlignmentType - - def alignment_type - return record_type - end - -end diff --git a/lib/rgfa/line/edge/gfa1/oriented_segments.rb b/lib/rgfa/line/edge/gfa1/oriented_segments.rb deleted file mode 100644 index 2852d52..0000000 --- a/lib/rgfa/line/edge/gfa1/oriented_segments.rb +++ /dev/null @@ -1,17 +0,0 @@ -RGFA::Line::Edge::GFA1 ||= Module.new - -module RGFA::Line::Edge::GFA1::OrientedSegments - - # @return [RGFA::OrientedLine] the oriented segment represented by the - # from/from_orient fields - def oriented_from - OL[from, from_orient] - end - - # @return [RGFA::OrientedLine] the oriented segment represented by the - # to/to_orient fields - def oriented_to - OL[to, to_orient] - end - -end diff --git a/lib/rgfa/line/edge/gfa1/other.rb b/lib/rgfa/line/edge/gfa1/other.rb deleted file mode 100644 index 041d218..0000000 --- a/lib/rgfa/line/edge/gfa1/other.rb +++ /dev/null @@ -1,39 +0,0 @@ -RGFA::Line::Edge::GFA1 ||= Module.new - -module RGFA::Line::Edge::GFA1::Other - - # @param oriented_segment [RGFA::OrientedLine] - # one of the two oriented segments of the line - # @return [RGFA::OrientedLine] the other oriented segment - # @raise [RGFA::NotFoundError] if segment_end is not a segment end of the line - def other_oriented_segment(oriented_segment) - if (oriented_from == oriented_segment) - return oriented_to - elsif (oriented_to == oriented_segment) - return oriented_from - else - raise RGFA::NotFoundError, - "Oriented segment '#{oriented_segment.inspect}' not found\n"+ - "Line: #{self}" - end - end - - # The other segment of a connection line - # @param segment [RGFA::Line::Segment::GFA1, Symbol] segment name or instance - # @raise [RGFA::NotFoundError] - # if segment is not involved in the connection - # @return [Symbol] the name or instance of the other segment of the connection - # if circular, then +segment+ - def other(segment) - segment_name = segment.to_sym - if segment_name == from.to_sym - to - elsif segment_name == to.to_sym - from - else - raise RGFA::NotFoundError, - "Line #{self} does not involve segment #{segment_name}" - end - end - -end diff --git a/lib/rgfa/line/edge/gfa1/references.rb b/lib/rgfa/line/edge/gfa1/references.rb deleted file mode 100644 index c0b8cc8..0000000 --- a/lib/rgfa/line/edge/gfa1/references.rb +++ /dev/null @@ -1,48 +0,0 @@ -RGFA::Line::Edge::GFA1 ||= Module.new - -module RGFA::Line::Edge::GFA1::References - - private - - def initialize_references - [:from, :to].each do |dir| - s = @rgfa.segment(get(:"#{dir}_segment")) - if s.nil? - raise RGFA::NotFoundError if @rgfa.segments_first_order - s = RGFA::Line::Segment::GFA1.new({:name => get(dir), - :sequence => "*"}, - version: :gfa1, - virtual: true) - s.connect(@rgfa) - end - set_existing_field(:"#{dir}_segment", s, set_reference: true) - if self.record_type == :L - et = send(:"#{dir}_end").end_type - key = :"dovetails_#{et}" - else - key = (dir == :from) ? - :edges_to_contained : - :edges_to_containers - end - s.add_reference(self, key) - end - end - - def import_field_references(previous) - [:from_segment, :to_segment].each do |dir| - set_existing_field(dir, @rgfa.segment(get(dir)), set_reference: true) - end - end - - def backreference_keys(ref, key_in_ref) - case ref.record_type - when :P - [:paths] - when :S - [:from_segment, :to_segment] - else - [] - end - end - -end diff --git a/lib/rgfa/line/edge/gfa1/to_gfa2.rb b/lib/rgfa/line/edge/gfa1/to_gfa2.rb deleted file mode 100644 index 6e9aa40..0000000 --- a/lib/rgfa/line/edge/gfa1/to_gfa2.rb +++ /dev/null @@ -1,84 +0,0 @@ -RGFA::Line::Edge::GFA1 ||= Module.new - -# Methods for the access of / conversion from a GFA1 link/containment -# as / to a GFA2 edge -# -# Requirements: +from+, +from_orient+, +to+, +to_orient+, -# +from_coords+, +to_coords+. -module RGFA::Line::Edge::GFA1::ToGFA2 - - def eid - i = get(:id) - if i.nil? - return RGFA::Placeholder.new - #i = "#{from_name}#{from_orient} #{to_name}#{to_orient} #{overlap}" - end - return i - end - alias_method :to_sym, :eid - - def sid1 - oriented_from - end - - def sid2 - oriented_to - end - - def beg1 - from_cords[0] - end - - def end1 - from_cords[1] - end - - def beg2 - to_coords[1] - end - - def end2 - to_cords[1] - end - - def alignment - overlap - end - - def to_gfa2_a - a = ["E"] - i = get(:id) - a << (i ? i.to_s : "*") - a << sid1.to_s - a << sid2.to_s - a += from_coords.map(&:to_s) - a += to_coords.map(&:to_s) - a << field_to_s(:overlap) - (tagnames-[:id]).each {|fn| a << field_to_s(fn, tag: true)} - return a - end - - private - - def lastpos_of(field) - if !send(field).kind_of?(RGFA::Line) - raise RGFA::RuntimeError, - "Line #{self} is not embedded in a RGFA object" - end - l = send(field).length - if l.nil? - raise RGFA::ValueError, - "Length of segment #{to.name} unknown" - end - l.to_lastpos - end - - def check_overlap - if overlap.kind_of?(RGFA::Placeholder) - raise RGFA::ValueError, - "Link: #{self.to_s}\n"+ - "Missing overlap, cannot compute overlap coordinates" - end - end - -end diff --git a/lib/rgfa/line/edge/gfa2.rb b/lib/rgfa/line/edge/gfa2.rb deleted file mode 100644 index 5c895a0..0000000 --- a/lib/rgfa/line/edge/gfa2.rb +++ /dev/null @@ -1,46 +0,0 @@ -# An edge line of a GFA2 file -# @tested_in api_positionals, api_references_edges_gfa2 -class RGFA::Line::Edge::GFA2 < RGFA::Line::Edge - - RECORD_TYPE = :E - POSFIELDS = [:eid, :sid1, :sid2, :beg1, :end1, :beg2, :end2, :alignment] - PREDEFINED_TAGS = [] - DATATYPE = { - :eid => :optional_identifier_gfa2, - :sid1 => :oriented_identifier_gfa2, - :sid2 => :oriented_identifier_gfa2, - :beg1 => :position_gfa2, - :end1 => :position_gfa2, - :beg2 => :position_gfa2, - :end2 => :position_gfa2, - :alignment => :alignment_gfa2, - } - NAME_FIELD = :eid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [:sid1, :sid2] - BACKREFERENCE_RELATED_FIELDS = [:beg1, :end1, :beg2, :end2] - DEPENDENT_LINES = [:paths, :sets] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :eid - -end - -require_relative "common/from_to" -require_relative "common/alignment_type" -require_relative "gfa2/to_gfa1" -require_relative "gfa2/alignment_type" -require_relative "gfa2/references" -require_relative "gfa2/other" - -class RGFA::Line::Edge::GFA2 - include RGFA::Line::Edge::Common::FromTo - include RGFA::Line::Edge::Common::AlignmentType - include RGFA::Line::Edge::GFA2::AlignmentType - include RGFA::Line::Edge::GFA2::ToGFA1 - include RGFA::Line::Edge::GFA2::References - include RGFA::Line::Edge::GFA2::Other -end diff --git a/lib/rgfa/line/edge/gfa2/alignment_type.rb b/lib/rgfa/line/edge/gfa2/alignment_type.rb deleted file mode 100644 index 990352c..0000000 --- a/lib/rgfa/line/edge/gfa2/alignment_type.rb +++ /dev/null @@ -1,80 +0,0 @@ -module RGFA::Line::Edge::GFA2::AlignmentType - - # @return [:C, :L, :I] +:C+ if containment, +:L+ if link, - # +:I+ (internal) if any other local alignment - def alignment_type - st1 = substring_type(beg1, end1)[0] - st2 = substring_type(beg2, end2)[0] - alignment_type_for_substring_types(st1, st2) - end - - private - - # @param st1 [substring_type] value for sid1 - # @param st2 [substring_type] value for sid2 - # @return [:C, :L, :I] +:C+ if containment, +:L+ if link, - # +:I+ (internal) if any other local alignment - def alignment_type_for_substring_types(st1, st2) - if st1 == :whole or st2 == :whole - return :C - elsif sid1.orient == sid2.orient - if (st1 == :pfx and st2 == :sfx) or (st1 == :sfx and st2 == :pfx) - return :L - else - return :I - end - else - if (st1 == :pfx and st2 == :pfx) or (st1 == :sfx and st2 == :sfx) - return :L - else - return :I - end - end - end - - # Analyze the begin and end position and determine if the substring is - # the whole string, or a (possibly empty) other substring, ie a prefix, - # a suffix, or an internal alignment - # @param begpos [RGFA::LastPos,Integer] - # begin position of the substring on a segment - # @param endpos [RGFA::LastPos,Integer] - # end position of the substring on a segment - # @return [Array] The first value is the - # substring type, which a symbol (one of: +:pfx+, +:sfx+, +:whole+, - # +:internal+). Thereby, with pfx or sfx is meant a prefix or suffix which - # is not the complete string. With internal is meant a substring which - # starts after the first position and ends before the last position. The - # second value is a boolean, +true+ if the substring is empty, +false+ - # otherwise. - def substring_type(begpos, endpos) - if begpos.value > endpos.value - raise RGFA::ValueError, - "Line: #{self.to_s}\n"+ - "begin > end: #{begpos.value}$ > #{endpos.value}" - end - if begpos.first? - if endpos.first? - return :pfx, true - elsif endpos.last? - return :whole, false - else - return :pfx, false - end - elsif begpos.last? - if !endpos.last? - raise RGFA::FormatError, - "Line: #{self.to_s}\n"+ - "Wrong use of $ marker\n"+ - "#{endpos.value} >= #{begpos.value}$" - end - return :sfx, true - else - if endpos.last? - return :sfx, false - else - return :internal, begpos.value == endpos.value - end - end - end - -end diff --git a/lib/rgfa/line/edge/gfa2/other.rb b/lib/rgfa/line/edge/gfa2/other.rb deleted file mode 100644 index 0468423..0000000 --- a/lib/rgfa/line/edge/gfa2/other.rb +++ /dev/null @@ -1,40 +0,0 @@ -RGFA::Line::Edge::GFA2 ||= Module.new - -module RGFA::Line::Edge::GFA2::Other - - # @param oriented_segment [RGFA::OrientedLine] - # one of the two oriented segments of the line - # @return [RGFA::OrientedLine] the other oriented segment - # @raise [RGFA::NotFoundError] if segment_end is not a segment end of the line - def other_oriented_segment(oriented_segment) - if (sid1 == oriented_segment) - return sid2 - elsif (sid2 == oriented_segment) - return sid1 - else - raise RGFA::NotFoundError, - "Oriented segment '#{oriented_segment}' not found\n"+ - "Line: #{self}" - end - end - - # The other segment of a connection line - # @param segment [RGFA::Line::Segment::GFA2, Symbol] segment name or instance - # @raise [RGFA::NotFoundError] - # if segment is not involved in the connection - # @return [RGFA::Line::Segment::GFA2, Symbol] the instance or symbol - # of the other segment of the connection - # (which is the +segment+ itself, when the connection is circular) - def other(segment) - segment_name = segment.to_sym - if segment_name == sid1.name - sid2.line - elsif segment_name == sid2.name - sid1.line - else - raise RGFA::NotFoundError, - "Line #{self} does not involve segment #{segment_name}" - end - end - -end diff --git a/lib/rgfa/line/edge/gfa2/references.rb b/lib/rgfa/line/edge/gfa2/references.rb deleted file mode 100644 index dd09f42..0000000 --- a/lib/rgfa/line/edge/gfa2/references.rb +++ /dev/null @@ -1,78 +0,0 @@ -module RGFA::Line::Edge::GFA2::References - - private - - def initialize_references - st1 = substring_type(beg1, end1)[0] - st2 = substring_type(beg2, end2)[0] - [1,2].each do |snum| - sid = :"sid#{snum}" - orient = get(sid).orient - s = @rgfa.segment(get(sid).line) - if s.nil? - raise RGFA::NotFoundError if @rgfa.segments_first_order - s = RGFA::Line::Segment::GFA2.new({:sid => get(sid).line, - :slen => 1, - :sequence => "*"}, - version: :gfa2, - virtual: true) - s.connect(@rgfa) - end - set_existing_field(sid, OL[s, orient], set_reference: true) - s.add_reference(self, refkey_for_s(snum, st1, st2)) - end - end - - def refkey_for_s(snum, st1, st2) - if st1 == :whole - if st2 == :whole - return snum == 1 ? :edges_to_contained : :edges_to_containers - else - return snum == 1 ? :edges_to_containers : :edges_to_contained - end - elsif st2 == :whole - return snum == 2 ? :edges_to_containers : :edges_to_contained - elsif sid1.orient == sid2.orient - if (st1 == :pfx and st2 == :sfx) - return snum == 1 ? :dovetails_L : :dovetails_R - elsif (st1 == :sfx and st2 == :pfx) - return snum == 1 ? :dovetails_R : :dovetails_L - else - return :internals - end - else - if (st1 == :pfx and st2 == :pfx) - return :dovetails_L - elsif (st1 == :sfx and st2 == :sfx) - return :dovetails_R - else - return :internals - end - end - end - - def import_field_references(previous) - [:sid1, :sid2].each do |sid| - set_existing_field(sid, OL[@rgfa.segment(get(sid).line), - get(sid).orient], - set_reference: true) - end - end - - def backreference_keys(ref, key_in_ref) - case ref.record_type - when :U - [:sets] - when :O - [:paths] - when :S - [:sid1, :sid2] - else - raise RGFA::AssertionError, - "Bug found, please report\n"+ - "ref: #{ref}\n"+ - "key_in_ref: #{key_in_ref}" - end - end - -end diff --git a/lib/rgfa/line/edge/gfa2/to_gfa1.rb b/lib/rgfa/line/edge/gfa2/to_gfa1.rb deleted file mode 100644 index 0c36340..0000000 --- a/lib/rgfa/line/edge/gfa2/to_gfa1.rb +++ /dev/null @@ -1,179 +0,0 @@ -module RGFA::Line::Edge::GFA2::ToGFA1 - - # @return [Array] an array of fields of the equivalent line - # in GFA1, if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def to_gfa1_a - at = alignment_type() - if at == :internal - raise RGFA::ValueError, - "Internal overlap, cannot convert to GFA1\n#{self}" - end - a = [at] - (sid1_from? ? [:sid1, :sid2] : [:sid2, :sid1]).each do |sid| - ol = get(sid) - a << ol.name.to_s - a << ol.orient.to_s - end - if at == :C - a << pos.to_s - end - a << overlap.to_s - if !eid.placeholder? - a << eid.to_gfa_tag(:id, datatype: :Z) - end - tagnames.each {|fn| a << field_to_s(fn, tag: true)} - return a - end - - # @return [RGFA::Alignment::Placeholder, RGFA::Alignment::CIGAR] - # value of the GFA1 +overlap+ field, - # if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def overlap - check_not_internal(:overlap) - sid1_from? ? alignment : alignment.complement - end - - # @return [RGFA::OrientedLine] value of the GFA1 +from+ and +from_orient+ - # fields, if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def oriented_from - sid1_from? ? sid1 : sid2 - end - - # @return [RGFA::OrientedLine] value of the GFA1 +to+ and +to_orient+ - # fields, if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def oriented_to - sid1_from? ? sid2 : sid1 - end - - # @return [Symbol, RGFA::Line::Segment::GFA2] value of the GFA1 +from+ field, - # if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def from - oriented_from.line - end - alias_method :from_segment, :from - - # Set the line of the field which will be returned by calling from - # @param value [Symbol, RGFA::Line::Segment::GFA2] - # @return [nil] - def from=(value) - oriented_from.line = value - end - alias_method :from_segment=, :from= - - # @return [:+, :-] value of the GFA1 +from_orient+ field, - # if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def from_orient - oriented_from.orient - end - - # Set the orientation of the field which will be returned by calling from - # @param value [:+,:-] - # @return [nil] - def from_orient=(value) - oriented_from.orient = value - end - - # @return [Symbol, RGFA::Line::Segment::GFA2] value of the GFA1 +to+ field, - # if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def to - oriented_to.line - end - alias_method :to_segment, :to - - # Set the line of the field which will be returned by calling to - # @param value [Symbol, RGFA::Line::Segment::GFA2] - # @return [nil] - def to=(value) - oriented_to.line = value - end - alias_method :to_segment=, :from= - - # @return [:+, :-] value of the GFA1 +to_orient+ field, - # if the edge is a link or containment - # @raise [RGFA::ValueError] if the edge is internal - def to_orient - oriented_to.orient - end - - # Set the orientation of the field which will be returned by calling to - # @param value [:+,:-] - # @return [nil] - def to_orient=(value) - oriented_to.orient = value - end - - # @return [Integer] value of the GFA1 +pos+ field, - # if the edge is a containment - # @raise [RGFA::ValueError] if the edge is not - # a containment - def pos - case alignment_type - when :I - raise RGFA::ValueError, "Line: #{self.to_s}\n"+ - "Internal alignment, pos is not defined" - when :L - raise RGFA::ValueError, "Line: #{self.to_s}\n"+ - "Dovetail alignment, pos is not defined" - when :C - if beg1.first? - (beg2.first? and end2.last?) ? beg1 : beg2 - else - beg1 - end - end - end - - private - - def check_not_internal(fn) - if internal? - raise RGFA::ValueError, - "Line: #{self.to_s}\n"+ - "Internal alignment, #{fn} is not defined" - end - end - - # Role of a segment in an overlap, given coordinates and orientation. - # @returns [Symbol] :pfx, :sfx, :contained, :other - def segment_role(begpos, endpos, orient) - if begpos.first? - if endpos.last? - return :contained - else - return orient == :+ ? :pfx : :sfx - end - else - if endpos.last? - return orient == :+ ? :sfx : :pfx - else - return :other - end - end - end - - # @return [Boolean] does the sid1 correspond to from in GFA1? - def sid1_from? - sr1 = segment_role(beg1, end1, sid1.orient) - sr2 = segment_role(beg2, end2, sid2.orient) - if sr2 == :contained - return true - elsif sr1 == :contained - return false - elsif sr1 == :sfx and sr2 == :pfx - return true - elsif sr2 == :sfx and sr1 == :pfx - return false - else - raise RGFA::ValueError, "Internal overlap, from undefined\n#{self}\n"+ - "Roles: segment1 is #{sr1}; segment2 is #{sr2}" - end - end - -end diff --git a/lib/rgfa/line/edge/link.rb b/lib/rgfa/line/edge/link.rb deleted file mode 100644 index d930060..0000000 --- a/lib/rgfa/line/edge/link.rb +++ /dev/null @@ -1,59 +0,0 @@ -# A link connects two segments, or a segment to itself. -# @tested_in api_positionals, api_references_edges_gfa1 -class RGFA::Line::Edge::Link < RGFA::Line::Edge - - RECORD_TYPE = :L - POSFIELDS = [:from_segment, :from_orient, :to_segment, :to_orient, :overlap] - PREDEFINED_TAGS = [:MQ, :NM, :RC, :FC, :KC] - FIELD_ALIAS = {:from => :from_segment, :to => :to_segment} - DATATYPE = { - :from_segment => :segment_name_gfa1, - :from_orient => :orientation, - :to_segment => :segment_name_gfa1, - :to_orient => :orientation, - :overlap => :alignment_gfa1, - :MQ => :i, - :NM => :i, - :RC => :i, - :FC => :i, - :KC => :i, - } - NAME_FIELD = nil - STORAGE_KEY = nil - REFERENCE_FIELDS = [:from_segment, :to_segment] - BACKREFERENCE_RELATED_FIELDS = [:to_orient, :from_orient, :overlap] - DEPENDENT_LINES = [:paths] - OTHER_REFERENCES = [] - - apply_definitions - -end - -require_relative "common/alignment_type" -require_relative "common/from_to" -require_relative "gfa1/to_gfa2" -require_relative "gfa1/references" -require_relative "gfa1/oriented_segments" -require_relative "gfa1/alignment_type" -require_relative "gfa1/other" -require_relative "link/canonical" -require_relative "link/complement" -require_relative "link/equivalence" -require_relative "link/references" -require_relative "link/to_gfa2" - -class RGFA::Line::Edge::Link - include RGFA::Line::Edge::Common::FromTo - include RGFA::Line::Edge::Common::AlignmentType - include RGFA::Line::Edge::GFA1::ToGFA2 - include RGFA::Line::Edge::GFA1::References - include RGFA::Line::Edge::GFA1::OrientedSegments - include RGFA::Line::Edge::GFA1::AlignmentType - include RGFA::Line::Edge::GFA1::Other - include RGFA::Line::Edge::Link::Canonical - include RGFA::Line::Edge::Link::Complement - include RGFA::Line::Edge::Link::Equivalence - include RGFA::Line::Edge::Link::References - include RGFA::Line::Edge::Link::ToGFA2 -end - diff --git a/lib/rgfa/line/edge/link/canonical.rb b/lib/rgfa/line/edge/link/canonical.rb deleted file mode 100644 index 38798e7..0000000 --- a/lib/rgfa/line/edge/link/canonical.rb +++ /dev/null @@ -1,49 +0,0 @@ -module RGFA::Line::Edge::Link::Canonical - - # Returns true if the link is canonical, false otherwise - # - # == Definition of canonical link - # - # A link if canonical if: - # - from != to and from < to (lexicographically); or - # - from == to and at least one of from_orient or to_orient is + - # - # === Details - # - # In the special case in which from == to (== s) we have the - # following equivalences: - # - # s + s + == s - s - - # s - s - == s + s + (same as previous case) - # s + s - == s + s - (equivalent to itself) - # s - s + == s - s + (equivalent to itself) - # - # Considering the values on the left, the first one can be taken as - # canonical, the second not, because it can be transformed in the first - # one; the other two values are canonical, as they are only equivalent - # to themselves. - # - # @return [Boolean] - # - def canonical? - if from_name < to_name - return true - elsif from_name > to_name - return false - else - return [from_orient, to_orient].include?(:+) - end - end - - # Returns the unchanged link if the link is canonical, - # otherwise complements the link and returns it. - # - # @note The path references are not corrected by this method; therefore - # the method shall be used before the link is embedded in a graph. - # - # @return [RGFA::Line::Edge::Link] self - def canonicize! - complement! if !canonical? - end - -end diff --git a/lib/rgfa/line/edge/link/complement.rb b/lib/rgfa/line/edge/link/complement.rb deleted file mode 100644 index 305a407..0000000 --- a/lib/rgfa/line/edge/link/complement.rb +++ /dev/null @@ -1,47 +0,0 @@ -module RGFA::Line::Edge::Link::Complement - - # Creates the equivalent link with from/to inverted. - # - # The CIGAR operations (order/type) are inverted as well. - # Tags are left unchanged. - # - # @note The path references are not copied to the complement link. - # - # @note This method shall be overridden if custom tags - # are defined, which have a ``complementation'' operation which determines - # their value in the equivalent complement link. - # - # @return [RGFA::Line::Edge::Link] the inverted link. - def complement - l = self.clone - l.from = to - l.from_orient = to_orient.invert - l.to = from - l.to_orient = from_orient.invert - l.overlap = overlap.complement - l - end - - # Complements the link inplace. - # The tags are left unchanged. - # - # @note The path references are not complemented by this method; therefore - # the method shall be used before the link is embedded in a graph. - # - # @note This method shall be overridden if custom tags - # are defined, which have a ``complementation'' operation which determines - # their value in the complement link. - # - # @return [RGFA::Line::Edge::Link] self - def complement! - tmp = self.from - self.from = self.to - self.to = tmp - tmp = self.from_orient - self.from_orient = self.to_orient.invert - self.to_orient = tmp.invert - self.overlap = self.overlap.complement - return self - end - -end diff --git a/lib/rgfa/line/edge/link/equivalence.rb b/lib/rgfa/line/edge/link/equivalence.rb deleted file mode 100644 index 3138bb7..0000000 --- a/lib/rgfa/line/edge/link/equivalence.rb +++ /dev/null @@ -1,148 +0,0 @@ -module RGFA::Line::Edge::Link::Equivalence - - # Computes an hash for including a link in an Hash tables, - # so that the hash of a link and its complement is the same. - # Thereby, tags are not considered. - # @see #eql? - def hash - from_end.to_s.hash + to_end.to_s.hash + - overlap.to_s.hash + overlap.complement.to_s.hash - end - - # Compares two links and determine their equivalence. - # Thereby, tags are not considered. - # - # @note Inverting the strand of both links and reversing - # the CIGAR operations (order/type), one obtains an - # equivalent complement link. - # - # @param other [RGFA::Line::Edge::Link] a link - # @return [Boolean] are self and other equivalent? - # @see #== - # @see #same? - # @see #complement? - def eql?(other) - return (same?(other) or complement?(other)) - end - - # Compares the tags of two links. - # - # @note This method shall be overridden if custom tags - # are defined, which have a ``complementation'' operation which determines - # their value in the equivalent but complement link. - # - # @param other [RGFA::Line::Edge::Link] a link - # @return [Boolean] are self and other equivalent? - # @see #== - def eql_tags?(other) - (self.tagnames.sort == other.tagnames.sort) and - tagnames.each {|fn| self.get(fn) == other.get(fn)} - end - - # Compares two links and determine their equivalence. - # Tags must have the same content. - # - # @note Inverting the strand of both links and reversing - # the CIGAR operations (order/type), one obtains an equivalent - # link. - # - # @param other [RGFA::Line::Edge::Link] a link - # @return [Boolean] are self and other equivalent? - # @see #eql? - # @see #eql_tags? - #def ==(other) - # eql?(other) and eql_tags?(other) - #end - - # Compares two links and determine their equivalence. - # Thereby, tags are not considered. - # - # @param other [RGFA::Line::Edge::Link] a link - # @return [Boolean] are self and other equivalent? - # @see #eql? - # @see #complement? - # @see #== - def same?(other) - (from_end == other.from_end and - to_end == other.to_end and - overlap == other.overlap) - end - - # Compares the link to the complement of another link - # and determine their equivalence. - # Thereby, tags are not considered. - # - # @param other [RGFA::Line::Edge::Link] the other link - # @return [Boolean] are self and the complement of other equivalent? - # @see #eql? - # @see #same? - # @see #== - def complement?(other) - (from_end == other.to_end and - to_end == other.from_end and - overlap == other.overlap.complement) - end - - # Compares a link and optionally the complement link, - # with two oriented_segments and optionally an overlap. - # @param [RGFA::OrientedLine] other_oriented_from - # @param [RGFA::OrientedLine] other_oriented_to - # @param allow_complement [Boolean] - # shall the complement link also be considered? - # @param [RGFA::Alignment::CIGAR] other_overlap compared only if not empty - # @return [Boolean] does the link or, if +allow_complement+, - # the complement link go from the first - # oriented segment to the second with an overlap equal to the provided one - # (if not empty)? - def compatible?(other_oriented_from, other_oriented_to, other_overlap = [], - allow_complement = true) - other_overlap = other_overlap.to_alignment(version: :gfa1, valid: true) - if compatible_direct?(other_oriented_from, other_oriented_to, other_overlap) - return true - elsif allow_complement - return compatible_complement?(other_oriented_from, - other_oriented_to, - other_overlap) - else - return false - end - end - - # Compares a link with two oriented segments and optionally an overlap. - # @param [RGFA::OrientedLine] other_oriented_from - # @param [RGFA::OrientedLine] other_oriented_to - # @param [RGFA::Alignment::CIGAR] other_overlap compared only if not empty - # @return [Boolean] does the link go from the first - # oriented segment to the second with an overlap equal to the provided one - # (if not empty)? - def compatible_direct?(other_oriented_from, other_oriented_to, - other_overlap = []) - (oriented_from == other_oriented_from and - oriented_to == other_oriented_to) and - (overlap.empty? or other_overlap.empty? or (overlap == other_overlap)) - end - - # Compares the complement link with two oriented segments and optionally an - # overlap. - # @param [RGFA::OrientedLine] other_oriented_from - # @param [RGFA::OrientedLine] other_oriented_to - # @param [RGFA::Alignment::CIGAR] other_overlap compared only if not empty - # @return [Boolean] does the complement link go from the first - # oriented segment to the second with an overlap equal to the provided one - # (if not empty)? - def compatible_complement?(other_oriented_from, other_oriented_to, - other_overlap = []) - (oriented_to == other_oriented_from.invert and - oriented_from == other_oriented_to.invert) and - (overlap.empty? or other_overlap.empty? or - (overlap == other_overlap.complement)) - end - - private - - def complement_ends?(other) - (from_end == other.to_end and - to_end == other.from_end) - end - -end diff --git a/lib/rgfa/line/edge/link/references.rb b/lib/rgfa/line/edge/link/references.rb deleted file mode 100644 index 742dde4..0000000 --- a/lib/rgfa/line/edge/link/references.rb +++ /dev/null @@ -1,13 +0,0 @@ -module RGFA::Line::Edge::Link::References - - private - - def process_not_unique(previous) - if complement?(previous) - # do nothing - else - super - end - end - -end diff --git a/lib/rgfa/line/edge/link/to_gfa2.rb b/lib/rgfa/line/edge/link/to_gfa2.rb deleted file mode 100644 index 0d4fed0..0000000 --- a/lib/rgfa/line/edge/link/to_gfa2.rb +++ /dev/null @@ -1,33 +0,0 @@ -module RGFA::Line::Edge::Link::ToGFA2 - - # GFA2 positions of the alignment on the +from+ segment - # @!macro [new] coords - # @return [(Integer|Lastpos,Integer|Lastpos)] begin and end - # @raise [RGFA::ValueError] if the overlap is not specified - # @raise [RGFA::RuntimeError] if the segment length cannot be determined, - # because the segment line is unknown - # @raise [RGFA::ValueError] if the segment length is not specified - # in the segment line - def from_coords - check_overlap - if from_orient == :+ - from_l = lastpos_of(:from) - return [from_l - overlap.length_on_reference, from_l] - else - return [0, overlap.length_on_reference] - end - end - - # GFA2 positions of the alignment on the +to+ segment - # @!macro coords - def to_coords - check_overlap - if to_orient == :+ - return [0, overlap.length_on_query] - else - to_l = lastpos_of(:to) - return [to_l - overlap.length_on_query, to_l] - end - end - -end diff --git a/lib/rgfa/line/fragment.rb b/lib/rgfa/line/fragment.rb deleted file mode 100644 index 241ce7a..0000000 --- a/lib/rgfa/line/fragment.rb +++ /dev/null @@ -1,33 +0,0 @@ -# A fragment line of a GFA2 file -# @tested_in api_references_f_g_lines, api_positionals -class RGFA::Line::Fragment < RGFA::Line - - RECORD_TYPE = :F - POSFIELDS = [:sid, :external, :s_beg, :s_end, :f_beg, :f_end, :alignment] - FIELD_ALIAS = {} - PREDEFINED_TAGS = [] - NAME_FIELD = nil - STORAGE_KEY = :external - DATATYPE = { - :sid => :identifier_gfa2, - :external => :oriented_identifier_gfa2, - :s_beg => :position_gfa2, - :s_end => :position_gfa2, - :f_beg => :position_gfa2, - :f_end => :position_gfa2, - :alignment => :alignment_gfa2 - } - REFERENCE_FIELDS = [:sid] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - -end - -require_relative "fragment/references.rb" - -class RGFA::Line::Fragment - include RGFA::Line::Fragment::References -end diff --git a/lib/rgfa/line/fragment/references.rb b/lib/rgfa/line/fragment/references.rb deleted file mode 100644 index fae20a6..0000000 --- a/lib/rgfa/line/fragment/references.rb +++ /dev/null @@ -1,23 +0,0 @@ -# Sets the reference to the segment in fragments, when they are connected -# to a RGFA instance; creates a virtual segment, if the segment has not -# been found yet -module RGFA::Line::Fragment::References - - private - - def initialize_references - s = @rgfa.segment(get(:sid)) - if s.nil? - raise RGFA::NotFoundError if @rgfa.segments_first_order - s = RGFA::Line::Segment::GFA2.new({:sid => get(:sid), - :slen => 1, - :sequence => "*"}, - version: :gfa2, - virtual: true) - s.connect(@rgfa) - end - set_existing_field(:sid, s, set_reference: true) - s.add_reference(self, :fragments) - end - -end diff --git a/lib/rgfa/line/gap.rb b/lib/rgfa/line/gap.rb deleted file mode 100644 index 7bc3682..0000000 --- a/lib/rgfa/line/gap.rb +++ /dev/null @@ -1,33 +0,0 @@ -# A gap line of a GFA2 file -# @tested_in api_references_f_g_lines, api_positionals -class RGFA::Line::Gap < RGFA::Line - - RECORD_TYPE = :G - POSFIELDS = [:gid, :sid1, :sid2, :disp, :var] - FIELD_ALIAS = { :name => :gid } - PREDEFINED_TAGS = [] - NAME_FIELD = :gid - STORAGE_KEY = :name - DATATYPE = { - :gid => :optional_identifier_gfa2, - :sid1 => :oriented_identifier_gfa2, - :sid2 => :oriented_identifier_gfa2, - :disp => :i, - :var => :optional_integer - } - REFERENCE_FIELDS = [:sid1, :sid2] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :gid - -end - -require_relative "gap/references.rb" - -class RGFA::Line::Gap - include RGFA::Line::Gap::References -end diff --git a/lib/rgfa/line/gap/references.rb b/lib/rgfa/line/gap/references.rb deleted file mode 100644 index 140d6e2..0000000 --- a/lib/rgfa/line/gap/references.rb +++ /dev/null @@ -1,47 +0,0 @@ -# Sets the reference to the segments in gaps, when they are connected -# to a RGFA instance; creates virtual segments, if the segments have not -# been found yet. -# -# Computes the key to the reference to the gap in segments (gaps_R or gaps_L) -# depending on the orientations. -module RGFA::Line::Gap::References - - private - - def initialize_references - [1,2].each do |snum| - sid = :"sid#{snum}" - orient = get(sid).orient - linesymbol = get(sid).line - s = @rgfa.segment(linesymbol) - if s.nil? - raise RGFA::NotFoundError if @rgfa.segments_first_order - s = RGFA::Line::Segment::GFA2.new({:sid => linesymbol, - :slen => 1, - :sequence => "*"}, - version: :gfa2, - virtual: true) - s.connect(@rgfa) - end - set_existing_field(sid, OL[s,orient], set_reference: true) - s.add_reference(self, refkey_for_s(snum)) - end - end - - def refkey_for_s(snum) - case [sid1.orient, sid2.orient] - when [:+, :+] - return (snum == 1) ? :gaps_R : :gaps_L - when [:+, :-] - return :gaps_R - when [:-, :+] - return :gaps_L - when [:-, :-] - return (snum == 1) ? :gaps_L : :gaps_R - else - raise RGFA::AssertionError, "Bug found, please report\n"+ - "snum: #{snum}" - end - end - -end diff --git a/lib/rgfa/line/group.rb b/lib/rgfa/line/group.rb deleted file mode 100644 index 1869421..0000000 --- a/lib/rgfa/line/group.rb +++ /dev/null @@ -1,6 +0,0 @@ -# A group is a U O or P line -class RGFA::Line::Group < RGFA::Line -end -require_relative "group/unordered.rb" -require_relative "group/ordered.rb" -require_relative "group/path.rb" diff --git a/lib/rgfa/line/group/gfa2/references.rb b/lib/rgfa/line/group/gfa2/references.rb deleted file mode 100644 index 05f3ccf..0000000 --- a/lib/rgfa/line/group/gfa2/references.rb +++ /dev/null @@ -1,63 +0,0 @@ -RGFA::Line::Group::GFA2 ||= Module.new - -module RGFA::Line::Group::GFA2::References - - private - - def prepare_and_check_ref(ref) - ref = line_for_ref_symbol(ref) if ref.kind_of?(Symbol) - check_ref_class(ref) - check_ref_connection(ref) - check_ref_not_self(ref) - return ref - end - - def check_ref_class(item) - if ![RGFA::Line::Edge::GFA2, - RGFA::Line::Segment::GFA2, - RGFA::Line::Gap, - RGFA::Line::Group::Ordered, - self::class].include?(item.class) - raise RGFA::ArgumentError, - "Line: #{self}\n"+ - "Cannot add items of class #{item.class}\n"+ - "Only GFA2 edges, segments, gaps, groups[*] "+ - "can be added\n(* = unordered groups to unordered groups only)." - end - end - - def check_ref_connection(item) - if line.rgfa != self.rgfa - raise RGFA::ArgumentError, - "Line: #{self}\n"+ - "Item: #{item.inspect}"+ - "The item added to the group must be connected\n"+ - "to the same RGFA object as the group" - end - end - - def check_ref_not_self(item) - if (line == self) - raise RGFA::RuntimeError, - "Line: #{self}\n"+ - "Item is the line itself\n"+ - "A group is not allowed to refer to itself" - end - end - - def line_for_ref_symbol(ref) - line = @rgfa.line(ref) - if line.nil? - if @rgfa.segments_first_order - raise RGFA::NotFoundError, "Group: #{self}\n"+ - "requires a non-existing ref with ID #{ref}" - end - line = RGFA::Line::Unknown.new({:name => ref}, virtual: true, - version: :gfa2) - @rgfa << line - end - line.add_reference(self, (record_type == :O) ? :paths : :sets) - return line - end - -end diff --git a/lib/rgfa/line/group/gfa2/same_id.rb b/lib/rgfa/line/group/gfa2/same_id.rb deleted file mode 100644 index aad9fe9..0000000 --- a/lib/rgfa/line/group/gfa2/same_id.rb +++ /dev/null @@ -1,36 +0,0 @@ -RGFA::Line::Group::GFA2 ||= Module.new - -module RGFA::Line::Group::GFA2::SameID - - private - - def process_not_unique(previous) - @rgfa = previous.rgfa - initialize_references - cur_items = get(:items) - substitute_virtual_line(previous) - set_existing_field(:items, get(:items) + cur_items, set_reference: true) - import_tags_of_previous_group_definition(previous) - return nil - end - - def import_tags_of_previous_group_definition(previous) - previous.tagnames.each do |tag| - prv = previous.get(tag) - cur = get(tag) - if cur - if cur != prv - raise RGFA::NotUniqueError, - "Same tag defined differently in "+ - "multiple group lines with same ID\n"+ - "Previous tag definition: #{prv}\n"+ - "New tag definition: #{cur}\n"+ - "Group ID: #{name}" - end - else - set(tag,prv) - end - end - end - -end diff --git a/lib/rgfa/line/group/ordered.rb b/lib/rgfa/line/group/ordered.rb deleted file mode 100644 index d680601..0000000 --- a/lib/rgfa/line/group/ordered.rb +++ /dev/null @@ -1,37 +0,0 @@ -# An ordered group line of a GFA2 file -class RGFA::Line::Group::Ordered < RGFA::Line::Group - - RECORD_TYPE = :O - POSFIELDS = [:pid, :items] - PREDEFINED_TAGS = [] - FIELD_ALIAS = {} - DATATYPE = { - :pid => :optional_identifier_gfa2, - :items => :oriented_identifier_list_gfa2, - } - NAME_FIELD = :pid - STORAGE_KEY = :name - REFERENCE_FIELDS = [:items] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:paths, :sets] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :pid - -end - -require_relative "gfa2/references" -require_relative "gfa2/same_id" -require_relative "ordered/references" -require_relative "ordered/captured_path" -require_relative "ordered/to_gfa1" - -class RGFA::Line::Group::Ordered - include RGFA::Line::Group::GFA2::References - include RGFA::Line::Group::GFA2::SameID - include RGFA::Line::Group::Ordered::References - include RGFA::Line::Group::Ordered::CapturedPath - include RGFA::Line::Group::Ordered::ToGFA1 -end diff --git a/lib/rgfa/line/group/ordered/captured_path.rb b/lib/rgfa/line/group/ordered/captured_path.rb deleted file mode 100644 index 5254850..0000000 --- a/lib/rgfa/line/group/ordered/captured_path.rb +++ /dev/null @@ -1,250 +0,0 @@ -require "set" - -module RGFA::Line::Group::Ordered::CapturedPath - - def captured_segments - captured_path.select{|x|x.line.kind_of?(RGFA::Line::Segment::GFA2)} - end - - def captured_edges - captured_path.select{|x|x.line.kind_of?(RGFA::Line::Edge::GFA2)} - end - - def captured_path - if !connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - compute_captured_path[0] - end - - protected - - def compute_captured_path - path, prev_edge = [], false - items.each do |item| - path, prev_edge = push_item_on_se_path(path, prev_edge, item) - end - return path, prev_edge - end - - private - - def push_item_on_se_path(path, prev_edge, item) - case item.line - when Symbol - raise RGFA::RuntimeError, - "Captured path cannot be computed; a reference has not been resolved\n"+ - "Line: #{self}\n"+ - "Unresolved reference: #{item.line} (Symbol found)" - when RGFA::Line::Segment::GFA2 - if !item.line.connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed; item is not connected\n"+ - "Line: #{self}\n"+ - "Item: #{item.line}" - end - push_segment_on_se_path(path, prev_edge, item) - prev_edge = false - when RGFA::Line::Edge::GFA2 - if !item.line.connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed; item is not connected\n"+ - "Line: #{self}\n"+ - "Item: #{item.line}" - end - if path.empty? - push_first_edge_on_se_path(path, items) - else - push_nonfirst_edge_on_se_path(path, item) - end - prev_edge = true - when RGFA::Line::Group::Ordered - if !item.line.connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed; item is not connected\n"+ - "Line: #{self}\n"+ - "Item: #{item.line}" - end - subpath, prev_edge_subpath = item.line.compute_captured_path - raise RGFA::AssertionError if subpath.empty? - if item.orient == :+ - subpath.each do |subpath_item| - path, prev_edge = - push_item_on_se_path(path, prev_edge, subpath_item) - end - else - subpath.reverse.each do |subpath_item| - path, prev_edge = - push_item_on_se_path(path, prev_edge, subpath_item.invert) - end - end - prev_edge = prev_edge_subpath - when RGFA::Line::Unknown - raise RGFA::RuntimeError, - "Captured path cannot be computed; a reference has not been resolved\n"+ - "Line: #{self}\n"+ - "Unresolved reference: #{item.name} (Virtual unknown line)" - else - raise RGFA::TypeError, - "Line: #{self}\t"+ - "Cannot compute captured path:\t"+ - "Error: items of type #{item.line.class} are not supported\t"+ - "Unsupported item: #{item}" - end - return path, prev_edge - end - - def push_first_edge_on_se_path(path, items) - oriented_edge = items[0] - oss = [oriented_edge.line.sid1, oriented_edge.line.sid2] - oss.map!{|x|x.invert} if oriented_edge.orient == :"-" - if items.size > 1 - nextitem = items[1] - case nextitem.line - when RGFA::Line::Segment::GFA2 - oss.reverse! if nextitem == oss[0] - # if oss does not include nextitem an error will be raised - # in the next iteration, so does not need to be handled here - when RGFA::Line::Edge::GFA2 - oss_of_next = [nextitem.line.sid1, nextitem.line.sid2] - oss_of_next.map!{|x|x.invert} if nextitem.orient == :"-" - oss.reverse! if oss_of_next.include?(oss[0]) - # if oss_of_next have no element in common with oss an error will be - # raised in the next iteration, so does not need to be handled here - when RGFA::Line::Group::Ordered - subpath = item.line.captured_path - return if subpath.empty? # does not need to be further handled here - if item.orient == :+ - firstsubpathsegment = supath[0] - else - firstsubpathsegment = supath[-1].invert - end - oss.reverse! if firstsubpathsegment == oss[0] - # if oss does not include in firstsubpathsegment - # error will be raised in next iteration, ie not handled here - else - # don't need to handle here other cases, as they will be handled - # in the next iteration of push_item_on_se_path - end - end - path << oss[0] - path << oriented_edge - path << oss[1] - end - - def push_nonfirst_edge_on_se_path(path, oriented_edge) - prev_os = path[-1] - path << oriented_edge - possible_prev = [oriented_edge.line.sid1, oriented_edge.line.sid2] - possible_prev.map!{|os|os.invert} if oriented_edge.orient == :"-" - if prev_os == possible_prev[0] - path << possible_prev[1] - elsif prev_os == possible_prev[1] - path << possible_prev[0] - else - raise RGFA::NotFoundError, - "Path is not valid, elements are not contiguous\n"+ - "Line: #{self}\n"+ - "Previous elements:\n"+ - path.map{|e|" #{e} (#{e.line})\n"}.join+ - "Current element:\n"+ - " #{oriented_edge} (#{oriented_edge.line})" - end - end - - def push_segment_on_se_path(path, prev_edge, oriented_segment) - if !path.empty? - case path[-1].line - when RGFA::Line::Segment::GFA2 - if prev_edge - check_s_is_as_expected(path, oriented_segment) - return # do not add segment, as it is already there - else - path << find_edge_from_path_to_segment(path, oriented_segment) - end - when RGFA::Line::Edge::GFA2 - check_s_to_e_contiguity(path, oriented_segment) - else - raise RGFA::AssertionError - end - end - path << oriented_segment - end - - def check_s_is_as_expected(path, oriented_segment) - if path[-1] != oriented_segment - raise RGFA::InconsistencyError, - "Path is not valid\n"+ - "Line: #{self}\n"+ - "Previous elements:\n"+ - path[0..-2].map{|e|" #{e} (#{e.line})\n"}+ - "Expected element:\n"+ - " #{path[-1]} (#{path[-1].line})\n" - "Current element:\n"+ - " #{segment} (#{segment.line})\n" - end - end - - def check_s_to_e_contiguity(path, oriented_segment) - # check that segment is an extremity of path[-1] - # and that the other extremity is path[-2] - if !(path[-1].sid1 == segment and path[-1].sid2 == path[-2]) and - !(path[-1].sid1 == path[-2] and path[-1].sid2 == segment) - raise RGFA::InconsistencyError, - "Path is not valid\n"+ - "Line: #{self}\n"+ - "Previous elements:\n"+ - path.map{|e|" #{e} (#{e.line})\n"}.join+ - "Current element:\n"+ - " #{oriented_segment} (#{oriented_segment.line})\n" - end - end - - def find_edge_from_path_to_segment(path, oriented_segment) - edges = [] - oriented_segment.line.edges.each do |edge| - if (edge.sid1 == oriented_segment and edge.sid2 == path[-1]) or - (edge.sid1 == path[-1] and edge.sid2 == oriented_segment) - edges << OL[edge, :+] - elsif (edge.sid1 == oriented_segment.invert and - edge.sid2 == path[-1].invert) or - (edge.sid1 == path[-1].invert and - edge.sid2 == oriented_segment.invert) - edges << OL[edge, :-] - end - end - if edges.size == 0 - raise RGFA::NotFoundError, - "Path is not valid, segments are not contiguous\n"+ - "Line: #{self}\n"+ - "Previous elements:\n"+ - path.map{|e|" #{e} (#{e.line})\n"}.join+ - "Current element:\n"+ - " #{oriented_segment} (#{oriented_segment.line})\n" - elsif edges.size > 1 - raise RGFA::NotUniqueError, - "Path is not unique\n"+ - "Line: #{self}\n"+ - "Previous elements:\n"+ - path.map{|e|" #{e} (#{e.line})\n"}.join+ - "Current element:\n"+ - " #{oriented_segment} (#{oriented_segment.line})\n"+ - "Possible edges\n"+ - edges.map{|e|" #{e} (#{e.line})\n"}.join - end - return edges[0] - end - - def check_captured_path_elem_connected(item) - if !item.connected? - raise RGFA::RuntimeError, - "Cannot compute induced set\n"+ - "Non-connected element found\n"+ - "Item: #{item}\nLine: #{self}" - end - end - -end diff --git a/lib/rgfa/line/group/ordered/references.rb b/lib/rgfa/line/group/ordered/references.rb deleted file mode 100644 index 44057b2..0000000 --- a/lib/rgfa/line/group/ordered/references.rb +++ /dev/null @@ -1,79 +0,0 @@ -module RGFA::Line::Group::Ordered::References - - # Add an item to the group as last item - # @param item [RGFA::Line, Symbol] - # GFA2 edge, segment, gap or group line to add - # @return [void] - def append_item(item) - if !connected? - add_item_to_unconnected_group(item, true) - else - add_item_to_connected_group(item, true) - compute_induced_set # check contiguity - end - end - - # Add an item to the group as first item - # @param item [RGFA::Line, Symbol] - # GFA2 edge, segment, gap or group line to add - # @return [void] - def prepend_item(item) - if !connected? - add_item_to_unconnected_group(item, false) - else - add_item_to_connected_group(item, false) - compute_induced_set # check contiguity - end - end - - # Remove the first item from the group - # @param item [Symbol, RGFA::Line] - # GFA2 edge, segment, gap or group line to remove - # @return [void] - def rm_first_item - if !connected? - items = items[1..-1] - else - items[0].update_reference(self, :paths) - self.delete_reference(items[0], :items) - compute_induced_set # check contiguity - end - return nil - end - - # Remove the last item from the group - # @param item [Symbol, RGFA::Line] - # GFA2 edge, segment, gap or group line to remove - # @return [void] - def rm_last_item - if !connected? - items = items[0..-2] - else - items[-1].update_reference(self, :paths) - self.delete_reference(items[-1], :items) - compute_induced_set # check contiguity - end - return nil - end - - private - - def add_item_to_unconnected_group(item, append = true) - item.line = item.name if item.line.kind_of?(RGFA::Line) - items.send(append ? :push : :unshift, item) - return nil - end - - def add_item_to_connected_group(item, append = true) - item.line = prepare_and_check_ref(item.line) - self.add_reference(item, :items, append: append) - return nil - end - - def initialize_references - items.size.times do |i| - items[i].line = line_for_ref_symbol(items[i].line) - end - end - -end diff --git a/lib/rgfa/line/group/ordered/to_gfa1.rb b/lib/rgfa/line/group/ordered/to_gfa1.rb deleted file mode 100644 index fbbef3f..0000000 --- a/lib/rgfa/line/group/ordered/to_gfa1.rb +++ /dev/null @@ -1,27 +0,0 @@ -module RGFA::Line::Group::Ordered::ToGFA1 - - def to_gfa1_a - a = ["P"] - if name.placeholder? - raise RGFA::ValueError, - "Conversion to GFA1 failed\n"+ - "The path name is a placeholder\t"+ - "Line: #{self}" - end - a << name.to_s - segment_names = [] - captured_segments.each do |oline| - oline.name.validate_gfa_field(:segment_name_gfa1) - segment_names << oline.to_s - end - a << segment_names.join(",") - overlaps = [] - captured_edges.each do |oline| - oline.line.overlap.validate_gfa_field(:alignment_gfa1) - overlaps << oline.line.overlap.to_s - end - a << overlaps.join(",") - return a - end - -end diff --git a/lib/rgfa/line/group/path.rb b/lib/rgfa/line/group/path.rb deleted file mode 100644 index 57de8ac..0000000 --- a/lib/rgfa/line/group/path.rb +++ /dev/null @@ -1,39 +0,0 @@ -# A path line of a GFA1 file -# -class RGFA::Line::Group::Path < RGFA::Line::Group - - RECORD_TYPE = :P - POSFIELDS = [:path_name, :segment_names, :overlaps] - PREDEFINED_TAGS = [] - FIELD_ALIAS = {} - DATATYPE = { - :path_name => :path_name_gfa1, - :segment_names => :oriented_identifier_list_gfa1, - :overlaps => :alignment_list_gfa1, - } - NAME_FIELD = :path_name - STORAGE_KEY = :name - REFERENCE_FIELDS = [:segment_names, :overlaps] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [:links] - - apply_definitions - - alias_method :to_sym, :path_name - -end - -require_relative "path/topology" -require_relative "path/references" -require_relative "path/validation" -require_relative "path/captured_path" -require_relative "path/to_gfa2" - -class RGFA::Line::Group::Path - include RGFA::Line::Group::Path::Topology - include RGFA::Line::Group::Path::References - include RGFA::Line::Group::Path::Validation - include RGFA::Line::Group::Path::CapturedPath - include RGFA::Line::Group::Path::ToGFA2 -end diff --git a/lib/rgfa/line/group/path/captured_path.rb b/lib/rgfa/line/group/path/captured_path.rb deleted file mode 100644 index 0458e5c..0000000 --- a/lib/rgfa/line/group/path/captured_path.rb +++ /dev/null @@ -1,43 +0,0 @@ -module RGFA::Line::Group::Path::CapturedPath - - def captured_edges - if !connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - links - end - - def captured_segments - if !connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - segment_names - end - - def captured_path - if !connected? - raise RGFA::RuntimeError, - "Captured path cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - retval = [] - (segment_names.size-1).times do |i| - retval << segment_names[i] - retval << links[i] - end - retval << segment_names[-1] - if segment_names.size == links.size - retval << links[-1] - retval << segment_names[0] - end - return retval - end - -end diff --git a/lib/rgfa/line/group/path/references.rb b/lib/rgfa/line/group/path/references.rb deleted file mode 100644 index 9efc884..0000000 --- a/lib/rgfa/line/group/path/references.rb +++ /dev/null @@ -1,89 +0,0 @@ -module RGFA::Line::Group::Path::References - - private - - # Computes the list of links which are required to support - # the path - # @return - # [Array<[RGFA::OrientedLine, RGFA::OrientedLine, RGFA::Alignment::CIGAR]>] - # an array, which elements are 3-tuples (from oriented segment, - # to oriented segment, cigar) - # @api private - def compute_required_links - has_undef_overlaps = undef_overlaps? - retval = [] - is_circular = self.circular? - segment_names.size.times do |i| - j = i+1 - if j == self.segment_names.size - is_circular ? j = 0 : break - end - cigar = has_undef_overlaps ? - RGFA::Alignment::Placeholder.new : self.overlaps[i] - retval << [self.segment_names[i], self.segment_names[j], cigar] - end - retval - end - - # Are the overlaps a single "*"? This is a compact representation of - # a linear path where all CIGARs are "*" - # @return [Boolean] - # @api private - def undef_overlaps? - overlaps.size == 1 and overlaps[0].empty? - end - - def initialize_references - initialize_links - initialize_segments - end - - def initialize_links - refs[:links] = [] - compute_required_links.each do |from,to,cigar| - l = nil - orient = :+ - if @rgfa.segment(from.line) and @rgfa.segment(to.line) - l = @rgfa.search_link(from, to, cigar) - if !l.nil? and l.compatible_complement?(from, to, cigar) - orient = :- - end - end - if l.nil? - if @rgfa.segments_first_order - raise RGFA::NotFoundError, "Path: #{self}\n"+ - "requires a non-existing link:\n"+ - "from=#{from} to=#{to} cigar=#{cigar}" - end - l = RGFA::Line::Edge::Link.new({:from_segment => from.line, - :from_orient => from.orient, - :to_segment => to.line, - :to_orient => to.orient, - :overlap => cigar}, - virtual: true, - version: :gfa1) - l.connect(@rgfa) - end - @refs[:links] << OL[l,orient] - l.add_reference(self, :paths) - end - end - - def initialize_segments - segment_names.each do |sn_with_o| - s = @rgfa.segment(sn_with_o.line) - sn_with_o.line = s - s.add_reference(self, :paths) - end - end - - def backreference_keys(ref, key_in_ref) - case ref.record_type - when :L - [:links] - when :S - [:segment_names] - end - end - -end diff --git a/lib/rgfa/line/group/path/to_gfa2.rb b/lib/rgfa/line/group/path/to_gfa2.rb deleted file mode 100644 index 8e016d8..0000000 --- a/lib/rgfa/line/group/path/to_gfa2.rb +++ /dev/null @@ -1,25 +0,0 @@ -module RGFA::Line::Group::Path::ToGFA2 - - def to_gfa2_a - items = [] - captured_path.each do |oline| - case oline.line - when RGFA::Line::Segment::GFA1 - items << oline.to_s - when RGFA::Line::Edge::Link - eid = oline.line.eid - if eid.placeholder? - raise RGFA::ValueError, - "Links has no identifier\n"+ - "Path conversion to GFA2 failed" - end - items << eid + oline.orient.to_s - end - end - a = ["O"] - a << field_to_s(:path_name) - a << items.join(" ") - return a - end - -end diff --git a/lib/rgfa/line/group/path/topology.rb b/lib/rgfa/line/group/path/topology.rb deleted file mode 100644 index 7cd10fc..0000000 --- a/lib/rgfa/line/group/path/topology.rb +++ /dev/null @@ -1,17 +0,0 @@ -module RGFA::Line::Group::Path::Topology - - # Is the path circular? In this case the number of CIGARs must be - # equal to the number of segments. - # @return [Boolean] - def circular? - self.overlaps.size == self.segment_names.size - end - - # Is the path linear? This is the case when the number of CIGARs - # is equal to the number of segments minus 1, or the CIGARs are - # represented by a single "*". - def linear? - !circular? - end - -end diff --git a/lib/rgfa/line/group/path/validation.rb b/lib/rgfa/line/group/path/validation.rb deleted file mode 100644 index 1817540..0000000 --- a/lib/rgfa/line/group/path/validation.rb +++ /dev/null @@ -1,27 +0,0 @@ -module RGFA::Line::Group::Path::Validation - - private - - def validate_lists_size - n_overlaps = self.overlaps.size - n_segments = self.segment_names.size - if n_overlaps == n_segments - 1 - # case 1: linear path - return true - elsif n_overlaps == 1 and self.overlaps[0].empty? - # case 2: linear path, single "*" to represent overlaps which are all "*" - return true - elsif n_overlaps == n_segments - # case 3: circular path - else - raise RGFA::InconsistencyError, - "Path has #{n_segments} oriented segments, "+ - "but #{n_overlaps} overlaps" - end - end - - def validate_record_type_specific_info - validate_lists_size - end - -end diff --git a/lib/rgfa/line/group/unordered.rb b/lib/rgfa/line/group/unordered.rb deleted file mode 100644 index e119848..0000000 --- a/lib/rgfa/line/group/unordered.rb +++ /dev/null @@ -1,35 +0,0 @@ -# An unordered group line of a GFA2 file -class RGFA::Line::Group::Unordered < RGFA::Line::Group - - RECORD_TYPE = :U - POSFIELDS = [:pid, :items] - PREDEFINED_TAGS = [] - FIELD_ALIAS = {} - DATATYPE = { - :pid => :optional_identifier_gfa2, - :items => :identifier_list_gfa2, - } - NAME_FIELD = :pid - STORAGE_KEY = :name - REFERENCE_FIELDS = [:items] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:sets] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :pid - -end - -require_relative "gfa2/references" -require_relative "gfa2/same_id" -require_relative "unordered/references" -require_relative "unordered/induced_set" - -class RGFA::Line::Group::Unordered - include RGFA::Line::Group::GFA2::References - include RGFA::Line::Group::GFA2::SameID - include RGFA::Line::Group::Unordered::References - include RGFA::Line::Group::Unordered::InducedSet -end diff --git a/lib/rgfa/line/group/unordered/induced_set.rb b/lib/rgfa/line/group/unordered/induced_set.rb deleted file mode 100644 index d27b327..0000000 --- a/lib/rgfa/line/group/unordered/induced_set.rb +++ /dev/null @@ -1,102 +0,0 @@ -require "set" - -module RGFA::Line::Group::Unordered::InducedSet - - def induced_set - if !connected? - raise RGFA::RuntimeError, - "Induced set cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - iss = induced_segments_set - ise = compute_induced_edges_set(iss) - (iss + ise).freeze - end - - def induced_edges_set - if !connected? - raise RGFA::RuntimeError, - "Induced set cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - compute_induced_edges_set(induced_segments_set).freeze - end - - def induced_segments_set - if !connected? - raise RGFA::RuntimeError, - "Induced set cannot be computed\n"+ - "Line is not connected to a RGFA instance\n"+ - "Line: #{self}" - end - segments_set = Set.new - items.each do |item| - case item - when Symbol - raise RGFA::RuntimeError, - "Induced set cannot be computed; a reference has not been resolved\n"+ - "Line: #{self}\n"+ - "Unresolved reference: #{item.line} (Symbol found)" - when RGFA::Line::Segment::GFA2 - check_induced_set_elem_connected(item) - segments_set << item - when RGFA::Line::Edge::GFA2 - check_induced_set_elem_connected(item) - [item.sid1.line, item.sid2.line].each do |sl| - check_induced_set_elem_connected(sl) - segments_set << sl - end - when RGFA::Line::Group::Ordered - check_induced_set_elem_connected(item) - subset = item.captured_segments - raise RGFA::AssertionError if subset.empty? - subset.each do |elem| - segments_set << elem.line - end - when RGFA::Line::Group::Unordered - check_induced_set_elem_connected(item) - subset = item.induced_segments_set - raise RGFA::AssertionError if subset.empty? - subset.each do |elem| - segments_set << elem - end - when RGFA::Line::Unknown - raise RGFA::RuntimeError, - "Induced set cannot be computed; a reference has not been resolved\n"+ - "Line: #{self}\n"+ - "Unresolved reference: #{item.name} (Virtual unknown line)" - else - raise RGFA::TypeError, - "Line: #{self}\t"+ - "Cannot compute induced set:\t"+ - "Error: items of type #{item.class} are not supported\t"+ - "Unsupported item: #{item}" - end - end - return segments_set.to_a.freeze - end - - private - - def check_induced_set_elem_connected(item) - if !item.connected? - raise RGFA::RuntimeError, - "Cannot compute induced set\n"+ - "Non-connected element found\n"+ - "Item: #{item}\nLine: #{self}" - end - end - - def compute_induced_edges_set(segments_set) - edges_set = Set.new - segments_set.each do |item| - item.edges.each do |edge| - edges_set << edge if segments_set.include?(edge.other(item)) - end - end - return edges_set.to_a - end - -end diff --git a/lib/rgfa/line/group/unordered/references.rb b/lib/rgfa/line/group/unordered/references.rb deleted file mode 100644 index 5b83267..0000000 --- a/lib/rgfa/line/group/unordered/references.rb +++ /dev/null @@ -1,71 +0,0 @@ -module RGFA::Line::Group::Unordered::References - - # Add an item to the group - # @param item [RGFA::Line, Symbol] - # GFA2 edge, segment, gap or group line to add - # @return [void] - def add_item(item) - if !connected? - add_item_to_unconnected_group(item) - else - add_item_to_connected_group(item) - end - end - - # Remove an item from the group - # @param item [Symbol, RGFA::Line] - # GFA2 edge, segment, gap or group line to remove - # @return [void] - def rm_item(item) - if !connected? - rm_item_from_unconnected_group(item) - else - rm_item_from_connected_group(item) - end - end - - private - - def rm_item_from_unconnected_group(item) - item = item.name if item.kind_of?(RGFA::Line) - check_item_included(item) - self.items.delete(item) - return nil - end - - def rm_item_from_connected_group(item) - item = @rgfa.line(item) - check_item_included(item) - line.delete_reference(self, :sets) - self.delete_reference(line, :items) - return nil - end - - def check_item_included(item) - if !items.include?(item) - raise RGFA::NotFoundError, - "Line: #{self}\n"+ - "Item: #{item.inspect}"+ - "Items of the line do not include the item" - end - end - - def add_item_to_unconnected_group(item, append = true) - item = item.name if item.kind_of?(RGFA::Line) - items.send(append ? :push : :unshift, item) - return nil - end - - def add_item_to_connected_group(item, append = true) - self.add_reference(prepare_and_check_ref(item), - :items, append: append) - return nil - end - - def initialize_references - items.size.times do |i| - items[i] = line_for_ref_symbol(items[i]) - end - end - -end diff --git a/lib/rgfa/line/header.rb b/lib/rgfa/line/header.rb deleted file mode 100644 index ee3bab9..0000000 --- a/lib/rgfa/line/header.rb +++ /dev/null @@ -1,35 +0,0 @@ -# A header line of a RGFA file -# -# @tested_in api_header -class RGFA::Line::Header < RGFA::Line - - RECORD_TYPE = :H - POSFIELDS = [] - PREDEFINED_TAGS = [:VN, :TS] - FIELD_ALIAS = {} - DATATYPE = { - :VN => :Z, - :TS => :i - } - REFERENCE_FIELDS = [] - NAME_FIELD = nil - STORAGE_KEY = :merge - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - -end - -require_relative "header/version_conversion.rb" -require_relative "header/multiline.rb" -require_relative "header/connection.rb" -require_relative "header/field_data.rb" - -class RGFA::Line::Header - include RGFA::Line::Header::VersionConversion - include RGFA::Line::Header::Multiline - include RGFA::Line::Header::Connection - include RGFA::Line::Header::FieldData -end diff --git a/lib/rgfa/line/header/connection.rb b/lib/rgfa/line/header/connection.rb deleted file mode 100644 index d34bc76..0000000 --- a/lib/rgfa/line/header/connection.rb +++ /dev/null @@ -1,29 +0,0 @@ -# Disallow connection by user to RGFA lines -# @tested_in api_header -module RGFA::Line::Header::Connection - - # @api private - module API_PRIVATE - - # Connect is not allowed for header lines except for the single header - # instance created during initialization of the RGFA - # - # @raise [RGFA::RuntimeError] always, except during RGFA initialization - # - # @return [nil] - def connect(rgfa) - unless rgfa.header.eql?(self) - raise RGFA::RuntimeError, - "RGFA::Line::Header instances cannot be connected\n"+ - "Use RGFA#add_line(this_line) to add the information\n"+ - "contained in this header line to the header of a RGFA instance." - else - @rgfa = rgfa - end - return nil - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/line/header/field_data.rb b/lib/rgfa/line/header/field_data.rb deleted file mode 100644 index 96fecbf..0000000 --- a/lib/rgfa/line/header/field_data.rb +++ /dev/null @@ -1,17 +0,0 @@ -# Disallow editing the VN tag in connected header lines -# @tested_in api_header -module RGFA::Line::Header::FieldData - - private - - def set_existing_field(fieldname, value, set_reference: false) - if fieldname == :VN and !get(:VN).nil? and self.connected? - raise RGFA::RuntimeError, - "The value of the header tag VN cannot be edited\n"+ - "For version conversion use to_gfa1 or to_gfa2" - else - super - end - end - -end diff --git a/lib/rgfa/line/header/multiline.rb b/lib/rgfa/line/header/multiline.rb deleted file mode 100644 index e5d631c..0000000 --- a/lib/rgfa/line/header/multiline.rb +++ /dev/null @@ -1,160 +0,0 @@ -# -# Support for headers defined in multiple lines of the GFA file. The lines may -# also contain the same tag defined multiple times in different lines, with -# multiple values. -# -# @tested_in api_header, unit_header -module RGFA::Line::Header::Multiline - - # Tags which are allowed to be defined with a single value in the header - # (if multiple header line contain them, they must contain the same value) - SINGLE_DEFINITION_TAGS = [:VN, :TS] - - # Set a header tag value. - # - # If the tag +tagname+ does not exist yet, it is set to +value+. If it exists - # and is defined a single time (i.e. it is not a field array): if the tagname - # is in the {SINGLE_DEFINITION_TAGS} list, if value is the same as before, it - # is ignored, if different, an error is raised; for other tags: a field array - # is created, containing the previous value and +value+. If the previous value - # it is a field array (i.e. the tag was already defined multiple times), - # +value+ is added to it. - # - # @param tagname [Symbol] a valid GFA custom or predefined tag name - # @param value [Object] the value to which the tag shall be set; if the tag - # was already defined (or if a datatype parameter is specified), - # the value must be compatible with the datatype - # @param datatype [RGFA::Field::TAG_DATATYPE, nil] the GFA tag datatype to - # use; if none is specified, it is determined by the previous values for - # the field (if any) or by the class of value (otherwise) - # - # @raise [RGFA::FormatError] if the tagname is invalid - # @raise [RGFA::TypeError, RGFA::FormatError] - # if the value is not compatible with the specified datatype - # @raise [RGFA::InconsistencyError] - # if the value is not compatible with the datatype of the previously - # defined value; or a tag in the {SINGLE_DEFINITION_TAGS} list is defined - # multiple times with different values. - # - # @return [self] - def add(tagname, value, datatype=nil) - tagname = tagname.to_sym - prev = get(tagname) - if prev.nil? - set_datatype(tagname, datatype) if datatype - set(tagname, value) - return self - elsif !prev.kind_of?(RGFA::FieldArray) - if SINGLE_DEFINITION_TAGS.include?(tagname) - if field_to_s(tagname) == value.to_gfa_field(fieldname: tagname) - return self - else - raise RGFA::InconsistencyError, - "Inconsistent values for header tag #{tagname} found\n"+ - "Previous definition: #{prev}\n"+ - "Current definition: #{value}" - end - end - prev = RGFA::FieldArray.new(get_datatype(tagname), [prev]) - set_existing_field(tagname, prev) - end - if @vlevel > 1 - prev.vpush(value, datatype, tagname) - else - prev << value - end - return self - end - - # Compute the string representation of a header field. - # - # @param fieldname [Symbol] the tag name of the field - # @param tag [Boolean] (defaults to: +false+) - # return the tagname:datatype:value representation - # - # @raise [RGFA::NotFoundError] if field is not defined - # - # @return [String] the string representation - def field_to_s(fieldname, tag: false) - prev = get(fieldname) - if prev.kind_of?(RGFA::FieldArray) - prev.validate_gfa_field(nil, fieldname) if @vlevel >= 2 - return tag ? prev.to_gfa_tag(fieldname) - : prev.to_gfa_field(fieldname: fieldname) - else - super - end - end - - # @api private - # @tested_in unit_header - module API_PRIVATE - - # Count the tags which are represented by FieldArray (i.e. with multiple - # definitions). - # - # @return [Integer (>= 0)] - def n_duptags - n = 0 - tagnames.each do |tn| - n += 1 if get(tn).kind_of?(RGFA::FieldArray) - end - return n - end - - # Split the header line into single-tag lines. - # - # If a tag is a FieldArray, this is splitted into multiple fields with the - # same fieldname (note that it leads to an invalid GFA, if all instances - # are output in the same GFA line). - # - # @return [Array] - def split - tags.map do |tagname, datatype, value| - h = RGFA::Line::Header.new([], vlevel: @vlevel) - h.set_datatype(tagname, datatype) - h.set(tagname, value) - h - end - end - - # Merge an additional {RGFA::Line::Header} line into this header line. - # - # @param gfa_line [RGFA::Line::Header] the header line to merge - # - # @return [self] - def merge(gfa_line) - gfa_line.tagnames.each do |of| - add(of, gfa_line.get(of), gfa_line.get_datatype(of)) - end - self - end - - end - include API_PRIVATE - - private - - # Array of tags data. - # - # Returns the tags as an array of [fieldname, datatype, value] arrays. If a - # field is a FieldArray, this is splitted into multiple fields with the same - # fieldname. - # - # @return [Array<(Symbol, Symbol, Object)>] - def tags - retval = [] - tagnames.each do |of| - value = get(of) - if value.kind_of?(RGFA::FieldArray) - value.each do |elem| - retval << [of, value.datatype, elem] - end - else - retval << [of, get_datatype(of), value] - end - end - return retval - end - -end diff --git a/lib/rgfa/line/header/version_conversion.rb b/lib/rgfa/line/header/version_conversion.rb deleted file mode 100644 index 0149470..0000000 --- a/lib/rgfa/line/header/version_conversion.rb +++ /dev/null @@ -1,30 +0,0 @@ -module RGFA::Line::Header::VersionConversion - - # @api private - # @tested_in unit_header - module API_PRIVATE - - # Return the string representation of the tags, changing the value - # of the VN tag to 2.0, if this is present - # @return [Array] array of strings representing the tags - def to_gfa2_a - a = ["H"] - (a << "VN:Z:2.0") if self.VN - (tagnames-[:VN]).each {|fn| a << field_to_s(fn, tag: true)} - return a - end - - # Return the string representation of the tags, changing the value - # of the VN tag to 1.0, if this is present - # @return [Array] array of strings representing the tags - def to_gfa1_a - a = ["H"] - (a << "VN:Z:1.0") if self.VN - (tagnames-[:VN]).each {|fn| a << field_to_s(fn, tag: true)} - return a - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/line/segment.rb b/lib/rgfa/line/segment.rb deleted file mode 100644 index 17fd725..0000000 --- a/lib/rgfa/line/segment.rb +++ /dev/null @@ -1,11 +0,0 @@ -# -# Parent class for classes representing S lines -# RGFA::Line::Segment::GFA1 and -# RGFA::Line::Segment::GFA2 -# -class RGFA::Line::Segment < RGFA::Line -end - -require_relative "segment/gfa1.rb" -require_relative "segment/gfa2.rb" -require_relative "segment/factory.rb" diff --git a/lib/rgfa/line/segment/coverage.rb b/lib/rgfa/line/segment/coverage.rb deleted file mode 100644 index 73a6ada..0000000 --- a/lib/rgfa/line/segment/coverage.rb +++ /dev/null @@ -1,38 +0,0 @@ -module RGFA::Line::Segment::Coverage - - # @!macro [new] coverage - # The coverage computed from a count_tag. - # If unit_length is provided then: count/(length-unit_length+1), - # otherwise: count/length. - # The latter is a good approximation if length >>> unit_length. - # @param [Symbol] count_tag (defaults to +:RC+) - # integer tag storing the count, usually :KC, :RC or :FC - # @param [Integer] unit_length the (average) length of a read (for - # :RC), fragment (for :FC) or k-mer (for :KC) - # @return [Integer] coverage, if count_tag and length are defined - # @return [nil] otherwise - # @see #coverage! - def coverage(count_tag: :RC, unit_length: 1) - if tagnames.include?(count_tag) and self.length - return (self.get(count_tag).to_f)/(self.length-unit_length+1) - else - return nil - end - end - - # @see #coverage - # @!macro coverage - # @raise [RGFA::NotFoundError] if segment does not have count_tag - # @!macro length_needed - def coverage!(count_tag: :RC, unit_length: 1) - c = coverage(count_tag: count_tag, unit_length: unit_length) - if c.nil? - self.length! - raise RGFA::NotFoundError, - "Tag #{count_tag} undefined for segment #{name}" - else - return c - end - end - -end diff --git a/lib/rgfa/line/segment/factory.rb b/lib/rgfa/line/segment/factory.rb deleted file mode 100644 index 496e985..0000000 --- a/lib/rgfa/line/segment/factory.rb +++ /dev/null @@ -1,38 +0,0 @@ -# -# Factory of instances of the classes -# RGFA::Line::Segment::GFA1 and -# RGFA::Line::Segment::GFA2 -# -class RGFA::Line::Segment::Factory < RGFA::Line::Segment - - def self.new(data, vlevel: 1, virtual: false, version: nil) - if version == :gfa1 - return RGFA::Line::Segment::GFA1.new(data, - vlevel: vlevel, virtual: virtual, version: version) - elsif version == :gfa2 - return RGFA::Line::Segment::GFA2.new(data, - vlevel: vlevel, virtual: virtual, version: version) - elsif version.nil? - begin - return RGFA::Line::Segment::GFA1.new(data, - vlevel: vlevel, virtual: virtual, version: :gfa1) - rescue => err_gfa1 - begin - return RGFA::Line::Segment::GFA2.new(data, - vlevel: vlevel, virtual: virtual, version: :gfa2) - rescue => err_gfa2 - raise RGFA::FormatError, - "The segment line has an invalid format for both GFA1 and GFA2\n"+ - "GFA1 Error: #{err_gfa1.class}\n"+ - "#{err_gfa1.message}\n"+ - "GFA2 Error: #{err_gfa2.class}\n"+ - "#{err_gfa2.message}\n" - end - end - else - raise RGFA::VersionError, - "GFA specification version unknown (#{version})" - end - end - -end diff --git a/lib/rgfa/line/segment/gfa1.rb b/lib/rgfa/line/segment/gfa1.rb deleted file mode 100644 index aca80c5..0000000 --- a/lib/rgfa/line/segment/gfa1.rb +++ /dev/null @@ -1,45 +0,0 @@ -# A segment line of a RGFA file -class RGFA::Line::Segment::GFA1 < RGFA::Line::Segment - - RECORD_TYPE = :S - POSFIELDS = [:name, :sequence] - PREDEFINED_TAGS = [:LN, :RC, :FC, :KC, :SH, :UR] - DATATYPE = { - :name => :segment_name_gfa1, - :sequence => :sequence_gfa1, - :LN => :i, - :RC => :i, - :FC => :i, - :KC => :i, - :SH => :H, - :UR => :Z, - } - NAME_FIELD = :name - STORAGE_KEY = :name - FIELD_ALIAS = { :sid => :name } - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:dovetails_L, :dovetails_R, - :edges_to_contained, :edges_to_containers] - gfa2_compatibility = [:gaps_L, :gaps_R, :fragments, :internals, :sets] - OTHER_REFERENCES = [:paths] + gfa2_compatibility - - apply_definitions - - alias_method :to_sym, :name - -end - -require_relative "gfa1_to_gfa2" -require_relative "length_gfa1" -require_relative "coverage" -require_relative "references" -require_relative "writer_wo_sequence" - -class RGFA::Line::Segment::GFA1 - include RGFA::Line::Segment::GFA1ToGFA2 - include RGFA::Line::Segment::LengthGFA1 - include RGFA::Line::Segment::Coverage - include RGFA::Line::Segment::References - include RGFA::Line::Segment::WriterWoSequence -end diff --git a/lib/rgfa/line/segment/gfa1_to_gfa2.rb b/lib/rgfa/line/segment/gfa1_to_gfa2.rb deleted file mode 100644 index ca34fa0..0000000 --- a/lib/rgfa/line/segment/gfa1_to_gfa2.rb +++ /dev/null @@ -1,11 +0,0 @@ -module RGFA::Line::Segment::GFA1ToGFA2 - - # @return [Array] an array of GFA2 field strings - def to_gfa2_a - a = ["S", field_to_s(:name, tag: false), length!.to_s, - field_to_s(:sequence, tag: false)] - (tagnames-[:LN]).each {|fn| a << field_to_s(fn, tag: true)} - return a - end - -end diff --git a/lib/rgfa/line/segment/gfa2.rb b/lib/rgfa/line/segment/gfa2.rb deleted file mode 100644 index 254dec0..0000000 --- a/lib/rgfa/line/segment/gfa2.rb +++ /dev/null @@ -1,43 +0,0 @@ -# A segment line of a RGFA file -class RGFA::Line::Segment::GFA2 < RGFA::Line - - RECORD_TYPE = :S - POSFIELDS = [:sid, :slen, :sequence] - PREDEFINED_TAGS = [:RC, :FC, :KC, :SH, :UR] - DATATYPE = { - :sid => :identifier_gfa2, - :slen => :i, - :sequence => :sequence_gfa2, - :RC => :i, - :FC => :i, - :KC => :i, - :SH => :H, - :UR => :Z, - } - NAME_FIELD = :sid - STORAGE_KEY = :name - FIELD_ALIAS = { :length => :slen, :LN => :slen } - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:dovetails_L, :dovetails_R, :gaps_L, :gaps_R, - :edges_to_contained, :edges_to_containers, - :fragments, :internals, :paths, :sets] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :sid - -end - -require_relative "gfa2_to_gfa1" -require_relative "coverage" -require_relative "references" -require_relative "writer_wo_sequence" - -class RGFA::Line::Segment::GFA2 - include RGFA::Line::Segment::GFA2ToGFA1 - include RGFA::Line::Segment::Coverage - include RGFA::Line::Segment::References - include RGFA::Line::Segment::WriterWoSequence -end diff --git a/lib/rgfa/line/segment/gfa2_to_gfa1.rb b/lib/rgfa/line/segment/gfa2_to_gfa1.rb deleted file mode 100644 index 0514c84..0000000 --- a/lib/rgfa/line/segment/gfa2_to_gfa1.rb +++ /dev/null @@ -1,12 +0,0 @@ -module RGFA::Line::Segment::GFA2ToGFA1 - - # @return [Array] an array of GFA1 field strings - def to_gfa1_a - a = ["S", field_to_s(:name, tag: false), - field_to_s(:sequence, tag: false)] - a << slen.to_gfa_tag(:LN, datatype: :i) - tagnames.each {|fn| a << field_to_s(fn, tag: true)} - return a - end - -end diff --git a/lib/rgfa/line/segment/length_gfa1.rb b/lib/rgfa/line/segment/length_gfa1.rb deleted file mode 100644 index e112c27..0000000 --- a/lib/rgfa/line/segment/length_gfa1.rb +++ /dev/null @@ -1,49 +0,0 @@ -module RGFA::Line::Segment::LengthGFA1 - - # @!macro [new] length - # @return [Integer] value of LN tag, if segment has LN tag - # @return [Integer] sequence length if no LN and sequence not "*" - # @return [nil] if sequence is "*" - # @see #length! - def length - if self.LN - self.LN - elsif !sequence.placeholder? and !sequence.kind_of?(RGFA::Placeholder) - sequence.length - else - nil - end - end - - # @!macro length - # @!macro [new] length_needed - # @raise [RGFA::NotFoundError] if not an LN tag and - # the sequence is "*" - # @see #length - def length! - l = self.length() - raise RGFA::NotFoundError, - "No length information available" if l.nil? - return l - end - - # @raise [RGFA::InconsistencyError] - # if sequence length and LN tag are not consistent. - def validate_length - if !sequence.placeholder? and tagnames.include?(:LN) - if self.LN != sequence.length - raise RGFA::InconsistencyError, - "Length in LN tag (#{self.LN}) "+ - "is different from length of sequence field (#{sequence.length})" - end - end - end - - private - - def validate_record_type_specific_info - validate_length - end - -end - diff --git a/lib/rgfa/line/segment/references.rb b/lib/rgfa/line/segment/references.rb deleted file mode 100644 index ad324ed..0000000 --- a/lib/rgfa/line/segment/references.rb +++ /dev/null @@ -1,185 +0,0 @@ -module RGFA::Line::Segment::References - - # References to the graph lines which involve the segment as dovetail overlap - # @return [Array] an array of lines; the lines themselves - # can be modified, but the array is frozen - def dovetails - dovetails_L + dovetails_R - end - - # References to the graph lines which involve the segment as dovetail overlap - # @param extremity [:L,:R] left of right extremity of the segment - # @return [Array] an array of lines; the lines themselves - # can be modified, but the array is frozen - def dovetails_of_end(extremity) - send(:"dovetails_#{extremity}") - end - - # References to the graph lines which involve the segment as dovetail overlap - def gaps - gaps_L + gaps_R - end - - # References to the graph lines which involve the segment as dovetail overlap - # @param extremity [:L,:R] left of right extremity of the segment - def gaps_of_end(extremity) - send(:"gaps_#{extremity}") - end - - # References to graph edges (C lines for GFA1, E for GFA2) which involve the - # segment in a containment relationship. - def containments - edges_to_contained + edges_to_containers - end - - # Computes the connectivity of a segment from its number of dovetail overlaps. - # - # @return [Array] - # conn. symbols respectively of the :L and :R ends of +segment+. - # - # Connectivity symbol: (+conn_symbol+) - # - Let _n_ be the number of links to an end (+:L+ or +:R+) of a segment. - # Then the connectivity symbol is +:M+ if n > 1, otherwise _n_. - # - def connectivity - if !connected? - raise RGFA::ArgumentError, - "Cannot compute the connectivity of #{self}\n"+ - "Segment is not connected to a RGFA instance" - end - connectivity_symbols(dovetails_L.size, dovetails_R.size) - end - - # List of dovetail-neighbours of a segment - # @return [Array] segments connected to the current - # segment by dovetail overlap relationships (L lines for GFA1, - # dovetail-representing E lines for GFA2) - def neighbours - dovetails.map{|l|l.other(self)}.uniq - end - - # List of dovetail-neighbours of a segment - # @return [Array] segments connected to the current - # segment by dovetail overlap relationships (L lines for GFA1, - # dovetail-representing E lines for GFA2) - # @param extremity [:L,:R] left of right extremity of the segment - def neighbours_of_end(extremity) - dovetails_of_end(extremity).map{|l|l.other(self)}.uniq - end - - # List of segments which contain the segment - # @return [Array] segments connected to the current - # segment by containment relationships (C lines for GFA1, - # containment-representing E lines for GFA2), where the current segment is - # the contained segment - def containers - edges_to_containers.map(&:from).uniq - end - - # List of segments which are contained in the segment - # @return [Array] segments connected to the current - # segment by containment relationships (C lines for GFA1, - # containment-representing E lines for GFA2), where the current segment is - # the container segment - def contained - edges_to_contained.map(&:to).uniq - end - - # List of edges which refer to the segment - # @return [Array] - def edges - dovetails + containments + internals - end - - # List of edges or gaps which connect to the specified segment - # @param segment [Symbol, RGFA::Segment] a segment name or instance - # @param collection [Symbol] (defaults to: +edges+) which edges or gaps - # shall be considered; a method of segment which returns an array of - # instances which respond to the method +other+ (e.g. edges, gaps, - # containments, edges_to_contained; dovetails_L; gaps_L; etc) - # @return [Array] - def relations_to(segment, collection = :edges) - case segment - when Symbol - relations_to_symbol(segment, collection) - when RGFA::Line - relations_to_line(segment, collection) - end - end - - # List of edges or gaps which connect to the specified oriented segment - # @param orientation [:+,:-] orientation of self - # @param oriented_segment [RGFA::OrientedLine] an oriented line - # @param collection [Symbol] (defaults to: +edges+) which edges or gaps - # shall be considered; a method of segment which returns an array of - # instances which respond to the method +other_oriented_segment+ - # (e.g. edges, gaps, containments, edges_to_contained; dovetails_L; - # gaps_L; etc) - # @return [Array] - def oriented_relations(orientation, oriented_segment, collection = :edges) - send(collection).select do |e| - (e.other_oriented_segment(OL[self, orientation]) rescue nil) == - oriented_segment.to_oriented_line - end - end - - # List of edges or gaps which connect to the specified segment end - # @param orientation [:+,:-] orientation of self - # @param oriented_segment [RGFA::OrientedLine] an oriented segment - # @param collection [Symbol] (defaults to: +edges+) which edges or gaps - # shall be considered; a method of segment which returns an array of - # instances which respond to the method +other_end+ - # (e.g. dovetails, gaps) - # @return [Array] - def end_relations(extremity, segment_end, collection = :edges) - send(collection).select do |e| - (e.other_end([self, extremity].to_segment_end) rescue nil) == - segment_end.to_segment_end - end - end - - private - - def relations_to_symbol(segment_symbol, collection) - send(collection).select do |e| - e.other(self).name == segment_symbol - end - end - - def relations_to_line(segment, collection) - send(collection).select do |e| - e.other(self).eql?(segment) - end - end - - def connectivity_symbols(n,m) - [connectivity_symbol(n), connectivity_symbol(m)] - end - - def connectivity_symbol(n) - n > 1 ? :M : n - end - - def backreference_keys(ref, key_in_ref) - case ref.record_type - when :E - [:dovetails_L, :dovetails_R, :internals, - :edges_to_containers, :edges_to_contained] - when :L - [:dovetails_L, :dovetails_R] - when :C - (key_in_ref == :from_segment) ? [:edges_to_contained] : [:edges_to_containers] - when :G - [:gaps_L, :gaps_R] - when :F - [:fragments] - when :P, :O - [:paths] - when :U - [:sets] - else - [] - end - end - -end diff --git a/lib/rgfa/line/segment/writer_wo_sequence.rb b/lib/rgfa/line/segment/writer_wo_sequence.rb deleted file mode 100644 index a8768aa..0000000 --- a/lib/rgfa/line/segment/writer_wo_sequence.rb +++ /dev/null @@ -1,17 +0,0 @@ -module RGFA::Line::Segment::WriterWoSequence - - # @return string representation of the segment - # @param [Boolean] without_sequence if +true+, output "*" instead of sequence - def to_s(without_sequence: false) - if !without_sequence - return super() - else - saved = self.sequence - self.sequence = "*" - retval = super() - self.sequence = saved - return retval - end - end - -end diff --git a/lib/rgfa/line/unknown.rb b/lib/rgfa/line/unknown.rb deleted file mode 100644 index 2c47599..0000000 --- a/lib/rgfa/line/unknown.rb +++ /dev/null @@ -1,31 +0,0 @@ -# A GFA2 line which was referred to only by G or O lines -# and has not been found yet (ie is always virtual) -# @tested_in api_positionals, unit_unknown, api_references -class RGFA::Line::Unknown < RGFA::Line - - RECORD_TYPE = nil - POSFIELDS = [:name] - FIELD_ALIAS = { } - PREDEFINED_TAGS = [] - DATATYPE = { - :name => :identifier_gfa2, - } - REFERENCE_FIELDS = [] - NAME_FIELD = :name - STORAGE_KEY = :name - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:sets, :paths] - OTHER_REFERENCES = [] - - apply_definitions - - alias_method :to_sym, :name -end - -require_relative "unknown/writer.rb" -require_relative "unknown/virtual.rb" - -class RGFA::Line::Unknown - include RGFA::Line::Unknown::Writer - include RGFA::Line::Unknown::Virtual -end diff --git a/lib/rgfa/line/unknown/virtual.rb b/lib/rgfa/line/unknown/virtual.rb deleted file mode 100644 index 0ce3dc4..0000000 --- a/lib/rgfa/line/unknown/virtual.rb +++ /dev/null @@ -1,11 +0,0 @@ -# An unknown line is always virtual -# @tested_in unit_unknown -module RGFA::Line::Unknown::Virtual - - # An unknown line is always virtual - # @return [true] - def virtual? - true - end - -end diff --git a/lib/rgfa/line/unknown/writer.rb b/lib/rgfa/line/unknown/writer.rb deleted file mode 100644 index a1a2818..0000000 --- a/lib/rgfa/line/unknown/writer.rb +++ /dev/null @@ -1,11 +0,0 @@ -# Special string representation for virtual unknown lines -# @tested_in unit_unknown -module RGFA::Line::Unknown::Writer - - # A string representation of the unknown line - # @return [String] - def to_s - "?record_type?\t#{name}\tco:Z:line_created_by_RGFA" - end - -end diff --git a/lib/rgfa/lines.rb b/lib/rgfa/lines.rb deleted file mode 100644 index 79a7d4a..0000000 --- a/lib/rgfa/lines.rb +++ /dev/null @@ -1,58 +0,0 @@ -require_relative "error" - -RGFA::Lines = Module.new - -require_relative "lines/headers" -require_relative "lines/collections" -require_relative "lines/creators" -require_relative "lines/destructors" -require_relative "lines/finders" - -# -# Methods for the RGFA class, which allow to handle lines of multiple types. -# -module RGFA::Lines - - include RGFA::Lines::Headers - include RGFA::Lines::Collections - include RGFA::Lines::Creators - include RGFA::Lines::Destructors - include RGFA::Lines::Finders - - # @api private - GFA1Specific = [ - RGFA::Line::Edge::Link, - RGFA::Line::Edge::Containment, - RGFA::Line::Group::Path, - RGFA::Line::Segment::GFA1, - ] - - # @api private - GFA2Specific = [ - RGFA::Line::CustomRecord, - RGFA::Line::Fragment, - RGFA::Line::Gap, - RGFA::Line::Edge::GFA2, - RGFA::Line::Segment::GFA2, - RGFA::Line::Group::Unordered, - RGFA::Line::Group::Ordered, - RGFA::Line::Unknown, - ] - - private - - def api_private_check_gfa_line(gfa_line, callermeth) - if !gfa_line.kind_of?(RGFA::Line) - raise RGFA::TypeError, - "Note: ##{callermeth} is API private, do not call it directly\n"+ - "Error: line class is #{gfa_line.class} and not RGFA::Line" - elsif !gfa_line.rgfa.equal?(self) - raise RGFA::RuntimeError, - "Note: ##{callermeth} is API private, do not call it directly\n"+ - "Error: line.rgfa is "+ - "#{gfa_line.rgfa.class}:#{gfa_line.rgfa.object_id} and not "+ - "RGFA:#{self.object_id}" - end - end - -end diff --git a/lib/rgfa/lines/collections.rb b/lib/rgfa/lines/collections.rb deleted file mode 100644 index 4c93803..0000000 --- a/lib/rgfa/lines/collections.rb +++ /dev/null @@ -1,206 +0,0 @@ -# -# Methods for the RGFA class, which allow to add lines. -# -# @tested_in api_lines_collections -# -module RGFA::Lines::Collections - - # Names of the collections and record type of lines which - # do not have a name field - COLLECTIONS_NO_NAME = { - :comments => :"#", - :gfa1_containments => :C, - :gfa1_links => :L, - } - - # Names of the collections and record type of lines which - # have a mandatory name field - COLLECTIONS_MANDATORY_NAME = { - :segments => :S, - :gfa1_paths => :P - } - - # Names of the collections and record type of lines which - # have an optional name field - COLLECTIONS_OPTIONAL_NAME = { - :gfa2_edges => :E, - :gaps => :G, - :sets => :U, - :gfa2_paths => :O - } - - # @!method comments - # All comment lines of the RGFA - # @return [Array] - COLLECTIONS_NO_NAME.each do |k, v| - define_method(k){@records[v]} - end - - # @!method sets - # All unordered group lines of the RGFA - # @return [Array] - # @!method set_names - # List all names of sets in the RGFA - # @return [Array] - # @!method gaps - # All gap lines of the RGFA - # @return [Array] - # @!method gap_names - # List all names of gaps in the RGFA - # @return [Array] - COLLECTIONS_OPTIONAL_NAME.each do |k, v| - define_method(k) {@records[v].values.flatten} - define_method(:"#{k[0..-2]}_names") {@records[v].keys - [nil]} - end - - # @!method segments - # All segment lines of the RGFA - # @return [Array] - # @!method segment_names - # List all names of segments in the RGFA - # @return [Array] - COLLECTIONS_MANDATORY_NAME.each do |k, v| - define_method(k) {@records[v].values} - define_method(:"#{k[0..-2]}_names") {@records[v].keys} - end - - # All edge lines of the RGFA - # @return [Array] - def edges - if version == :gfa1 - gfa1_links + gfa1_containments - elsif version == :gfa2 - gfa2_edges - else - gfa1_links + gfa1_containments + gfa2_edges - end - end - - # All dovetail lines of the RGFA - # (GFA1 links, GFA2 dovetail edges) - # @return [Array] - def dovetails - if version == :gfa1 - gfa1_links - elsif version == :gfa2 - gfa2_edges.select {|e| e.dovetail?} - else - gfa1_links + gfa2_edges.select {|e| e.dovetail?} - end - end - - # All containment lines of the RGFA - # (GFA1 containments, GFA2 containment edges) - # @return [Array] - def containments - if version == :gfa1 - gfa1_containments - elsif version == :gfa2 - gfa2_edges.select {|e| e.containment?} - else - gfa1_containments + gfa2_edges.select {|e| e.containment?} - end - end - - # List all names of edges in the RGFA - # @return [Array] - def edge_names - gfa2_edge_names - end - - # All path or ordered set lines of the RGFA - # @return [Array] - def paths - gfa1_paths + gfa2_paths - end - - # List all names of path lines in the RGFA - # @return [Array] - def path_names - gfa1_path_names + gfa2_path_names - end - - # All fragments of the RGFA instance - # @return [Array] - def fragments - @records[:F].values.flatten - end - - # All names of external sequences mentioned in fragments - # @return [Array] - def external_names - @records[:F].keys - end - - # All names of lines - # (segments and paths in GFA1/GFA2; edges, gaps, sets in GFA2) - # @return [Array] - def names - segment_names + edge_names + gap_names + path_names + set_names - end - - # Record types only allowed in GFA1 (in RGFA) - GFA1_ONLY_KEYS = [:L, :C, :P] - - # Record types allowed in GFA2 (except custom lines); - # nil is a placeholder for virtual lines of unknown type - NONCUSTOM_GFA2_KEYS = [:H, :"#", :F, :S, :E, :G, :U, :O, nil] - - # All record type keys of custom records of the RGFA - # @return [Array] - def custom_record_keys - keys = (@records.keys-[:H]).select {|k|!@records[k].empty?} - case @version - when :gfa1 - [] - when :gfa2 - keys - NONCUSTOM_GFA2_KEYS - else - keys - NONCUSTOM_GFA2_KEYS - GFA1_ONLY_KEYS - end - end - - # All custom records of the RGFA instance - # @return [Array] - def custom_records(record_type=nil) - if record_type - return [].freeze if !custom_record_keys.include?(record_type) - collection = @records[record_type] - case collection - when nil - return [].freeze - when Array - return collection.clone.freeze - when Hash - return collection.values.freeze - end - else - cr = [] - custom_record_keys.each do |k| - collection = @records[k] - case collection - when Array - cr += collection - when Hash - cr += collection.values - end - end - cr.freeze - end - end - - # All lines of the RGFA instance - # @return [Array] - def lines - comments + headers + segments + edges + - paths + sets + gaps + fragments + - custom_records - end - - # Iterate over each line of the RGFA instance - # @yield [Array] - def each_line(&block) - lines.each(&block) - end - -end diff --git a/lib/rgfa/lines/creators.rb b/lib/rgfa/lines/creators.rb deleted file mode 100644 index 0dd7581..0000000 --- a/lib/rgfa/lines/creators.rb +++ /dev/null @@ -1,205 +0,0 @@ -# -# Methods for the RGFA class, which allow to add lines. -# -module RGFA::Lines::Creators - - # Add a line to a RGFA - # - # @overload <<(gfa_line_string) - # @param [String] gfa_line_string representation of a RGFA line - # @overload <<(gfa_line) - # @param [RGFA::Line] gfa_line instance of a subclass of RGFA::Line - # @raise [RGFA::NotUniqueError] if multiple segment or path lines - # with the same name are added - # @raise [RGFA::ArgumentError] if the argument is not a RGFA::Line or String - # @return [RGFA] self - # - # @tested_in api_lines_creators - def add_line(gfa_line) - case version - when :gfa1 - add_line_GFA1(gfa_line) - when :gfa2 - add_line_GFA2(gfa_line) - when nil - add_line_unknown_version(gfa_line) - else - raise # This point should never be reached - end - return self - end - alias_method :<<, :add_line - - # Process the lines in the line queue in which lines of type P,L,C are put - # during parsing while waiting for an additional signal which allows to - # identify the version as GFA1. - # - # The user usually does not need to call this method, unless a RGFA is created - # from scratch in memory and the user wants to do something on an incomplete - # or invalid GFA (as any valid GFA will contain segments, it will call this - # method automatically). - # - # @tested_in api_lines_version - # - # @return [void] - def process_line_queue - if @version.nil? - @version = @version_guess - end - @line_queue.size.times {self << @line_queue.shift} - end - - # @api private - module API_PRIVATE - - # Register a line in the RGFA, i.e. add a reference to the - # appropriate reference collection in the @records hash. - # - # @tested_in unit_rgfa_lines - # - # @return [void] - def register_line(gfa_line) - api_private_check_gfa_line(gfa_line, "register_line") - storage_key = gfa_line.class::STORAGE_KEY - case storage_key - when :merge - @records[gfa_line.record_type].merge(gfa_line) - when :name - @records[gfa_line.record_type] ||= {} - if gfa_line.name.empty? - @records[gfa_line.record_type][nil] ||= [] - @records[gfa_line.record_type][nil] << gfa_line - else - @records[gfa_line.record_type][gfa_line.name] = gfa_line - end - when :external - @records[gfa_line.record_type][gfa_line.external.line] ||= [] - @records[gfa_line.record_type][gfa_line.external.line] << gfa_line - when nil - @records[gfa_line.record_type] ||= [] - @records[gfa_line.record_type] << gfa_line - end - end - - end - include API_PRIVATE - - private - - def add_line_unknown_version(gfa_line) - if gfa_line.kind_of?(String) - rt = gfa_line[0].to_sym - elsif gfa_line.kind_of?(RGFA::Line) - rt = gfa_line.record_type - else - raise RGFA::ArgumentError, - "Only strings and RGFA::Line instances can be added" - end - case rt - when :"#" - gfa_line.to_rgfa_line(vlevel: @vlevel).connect(self) - when :H - gfa_line = gfa_line.to_rgfa_line(vlevel: @vlevel) - header.merge(gfa_line) - if gfa_line.VN - @version = case gfa_line.VN - when "1.0" then :gfa1 - when "2.0" then :gfa2 - else gfa_line.VN.to_sym - end - @version_explanation = "specified in header VN tag" - validate_version if @vlevel > 0 - @line_queue.size.times {self << @line_queue.shift} - end - when :S - gfa_line = gfa_line.to_rgfa_line(vlevel: @vlevel) - @version = gfa_line.version - @version_explanation = "implied by: syntax of S #{gfa_line.name} line" - process_line_queue - gfa_line.connect(self) - when :E, :F, :G, :U, :O - @version = :gfa2 - @version_explanation = "implied by: presence of a #{rt} line" - gfa_line = gfa_line.to_rgfa_line(vlevel: @vlevel, version: @version) - process_line_queue - gfa_line.connect(self) - when :L, :C, :P - @version_guess = :gfa1 - @line_queue << gfa_line - else - @line_queue << gfa_line - end - end - - def add_line_GFA1(gfa_line) - if gfa_line.kind_of?(String) - if gfa_line[0] == "S" - gfa_line = gfa_line.to_rgfa_line(vlevel: @vlevel) - else - gfa_line = gfa_line.to_rgfa_line(version: :gfa1, vlevel: @vlevel) - end - elsif RGFA::Lines::GFA2Specific.include?(gfa_line.class) - raise RGFA::VersionError, - "Version: 1.0 (#{@version_explanation})\n"+ - "Cannot add instance of incompatible line type "+ - "(#{gfa_line.class})" - end - case gfa_line.record_type - when :H - if @vlevel > 0 and gfa_line.VN and gfa_line.VN.to_sym != :"1.0" - raise RGFA::VersionError, - "Header line specified wrong version (#{gfa_line.VN})\n"+ - "Line: #{gfa_line}\n"+ - "File version: 1.0 (#{@version_explanation})" - end - header.merge(gfa_line) - when :S - if gfa_line.version == :gfa2 - raise RGFA::VersionError, - "Version: 1.0 (#{@version_explanation})\n"+ - "GFA2 segment found: #{gfa_line}" - end - gfa_line.connect(self) - when :L, :P, :C, :"#" - gfa_line.connect(self) - else - raise RGFA::TypeError, - "Invalid record type #{rt}" # should be unreachable - end - end - - def add_line_GFA2(gfa_line) - if gfa_line.kind_of?(String) - if gfa_line[0] == "S" - gfa_line = gfa_line.to_rgfa_line(vlevel: @vlevel) - else - gfa_line = gfa_line.to_rgfa_line(version: :gfa2, vlevel: @vlevel) - end - elsif RGFA::Lines::GFA1Specific.include?(gfa_line.class) - raise RGFA::VersionError, - "Version: 2.0 (#{@version_explanation})\n"+ - "Cannot add instance of incompatible line type "+ - "(#{gfa_line.class})" - end - case gfa_line.record_type - when :H - if @vlevel > 0 and gfa_line.VN and gfa_line.VN.to_sym != :"2.0" - raise RGFA::VersionError, - "Header line specified wrong version (#{gfa_line.VN})\n"+ - "Line: #{gfa_line}\n"+ - "File version: 2.0 (#{@version_explanation})" - end - header.merge(gfa_line) - when :S - if gfa_line.version == :gfa1 - raise RGFA::VersionError, - "Version: 2.0 (#{@version_explanation})\n"+ - "GFA1 segment found: #{gfa_line}" - end - gfa_line.connect(self) - else - gfa_line.connect(self) - end - end - -end diff --git a/lib/rgfa/lines/destructors.rb b/lib/rgfa/lines/destructors.rb deleted file mode 100644 index 5511f9b..0000000 --- a/lib/rgfa/lines/destructors.rb +++ /dev/null @@ -1,85 +0,0 @@ -# -# Methods for the RGFA class, which allow to remove lines. -# -module RGFA::Lines::Destructors - - # Delete a line from the RGFA graph - # - # @param line [RGFA::Line, Symbol] a line instance or identifier - # - # @tested_in api_lines_destructors - # - # @return [RGFA] self - def rm(gfa_line, *args) - line!(gfa_line).disconnect - return self - end - - # Remove all links of a segment end end except that to the other specified - # segment end. - # - # @param segment_end [RGFA::SegmentEnd] the segment end - # @param other_end [RGFA::SegmentEnd] the other segment end - # @param conserve_components [Boolean] (defaults to: +false+) - # Do not remove links if removing them breaks the graph into unconnected - # components. - # - # @tested_in XXX - # - # @return [RGFA] self - def delete_other_links(segment_end, other_end, conserve_components: false) - segment_end = segment_end.to_segment_end - other_end = other_end.to_segment_end - s = segment!(segment_end.segment) - s.dovetails_of_end(segment_end.end_type).each do |l| - if l.other_end(segment_end) != other_end - if !conserve_components or !cut_link?(l) - l.disconnect - end - end - end - end - - # @api private - module API_PRIVATE - - # Remove a line from the @records collection in which it was registered - # @tested_in test_unit_rgfa_lines - # @return [void] - def unregister_line(gfa_line) - api_private_check_gfa_line(gfa_line, "unregister_line") - if gfa_line.record_type == :H - raise RGFA::AssertionError, "Bug found, please report\n"+ - "gfa_line: #{gfa_line}" - end - collection = @records[gfa_line.record_type] - key = gfa_line - delete_if_empty = nil - if collection.kind_of?(Hash) - storage_key = gfa_line.class::STORAGE_KEY - case storage_key - when :name - if !gfa_line.name.empty? - key = gfa_line.name - else - collection = collection[nil] - end - when :external - collection = collection[gfa_line.external.name] - delete_if_empty = gfa_line.external.name - end - end - if collection.kind_of?(Array) - collection.delete_if {|line| line.object_id == key.object_id} - else - collection.delete(key) - end - if delete_if_empty and collection.empty? - @records[gfa_line.record_type].delete(delete_if_empty) - end - end - - end - include API_PRIVATE - -end diff --git a/lib/rgfa/lines/finders.rb b/lib/rgfa/lines/finders.rb deleted file mode 100644 index 03b2d5c..0000000 --- a/lib/rgfa/lines/finders.rb +++ /dev/null @@ -1,185 +0,0 @@ -# -# Methods for the RGFA class, which allow to add lines. -# -# @tested_in api_lines_finders -# -module RGFA::Lines::Finders - - # @!macro [new] segment - # Searches the segment with name equal to +segment_name+. - # @param s - # [Symbol, String, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # segment name or instance - # @return [RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # if a segment is found - # @return [nil] if no such segment exists in the RGFA instance - # - def segment(s) - return s if s.kind_of?(RGFA::Line) - @records[:S][s.to_sym] - end - - # @!macro segment - # @raise [RGFA::NotFoundError] if no such segment exists - def segment!(s) - seg = segment(s) - if seg.nil? - raise RGFA::NotFoundError, "No segment has name #{s}"+ - "#{segment_names.size < 10 ? - "\nSegment names: "+segment_names.inspect : ''}" - end - seg - end - - # Record types whose references are stored in the RGFA instance - # in hashes, where the key is a name field - RECORDS_WITH_NAME = [:E, :S, :P, :U, :G, :O, nil] - - # Find the line with the given l - # @param l [Symbol, RGFA::Line, RGFA::Placeholder] the line to search - # @return [RGFA::Line, nil] if +l+ is a line, then it is returned; - # otherwise, the line is search with name +l+; if such a line does - # not exist +nil+ is returned - def line(l) - return nil if l.placeholder? - case l - when RGFA::Line - return l - when Symbol - return line_by_name(l) - when String - return line_by_name(l.to_sym) - else - return nil - end - end - - # Find the line with the given name, and raise an exception if it does not - # exist - # @param l [Symbol, RGFA::Line, RGFA::Placeholder] the line to search - # @raise [RGFA::ValueError] if +l+ is a placeholder - # @raise [RGFA::NotFoundError] if no line with the given name +l+ exists - # @return [RGFA::Line, nil] if +l+ is a line, then it is returned; - # otherwise, return the line with name +l+ - def line!(l) - gfa_line = line(l) - if gfa_line.nil? - if l.placeholder? - raise RGFA::ValueError, - "Cannot search a line with l '*'" - else - raise RGFA::NotFoundError, - "No line found with ID '#{l}'" - end - end - return gfa_line - end - - # Returns all the fragments where the ```external``` ID is the specified ID. - # @return [Array] - def fragments_for_external(id) - @records[:F].fetch(id.to_sym, []) - end - - # @overload select(hash) - # Searches a line based on the value of some of its fields - # @param hash [Hash] an hash field name => field content; if the key :name - # exists, the name field of the line type is searched first, then the - # other fields are compared; placeholder values are ignored. - # @return [Array] a list of lines whose fields are compatible - # with the values given in +hash+ - # @overload select(line) - # Searches a line based on a given line instance - # @param line [RGFA::Line] a line - # @return [Array] a list of lines with the same values in - # the mandatory fields and specified tags, as +line+; additional tags - # may be present; fields whose value is a placeholder are not compared - def select(hash_or_line) - is_hash = hash_or_line.kind_of?(Hash) - name = is_hash ? hash_or_line[:name] : hash_or_line.get(:name) - if !name.nil? and !name.placeholder? - collection = [line_by_name(name)] - else - record_type = is_hash ? hash_or_line[:record_type] : - hash_or_line.record_type - collection = collection_for_select(record_type) - end - method = is_hash ? :field_values? : :eql_fields? - collection.select do |line| - line.send(method, hash_or_line, [:record_type, :name]) - end - end - - # @api private - # @tested_in unit_lines_finders - module API_PRIVATE - - # Search a possible duplicate of the gfa_line. - # @param gfa_line [RGFA::Line] - # @return [RGFA::Line, nil] - def search_duplicate(gfa_line) - case gfa_line.record_type - when :L - return search_link(gfa_line.oriented_from, - gfa_line.oriented_to, gfa_line.alignment) - when *RECORDS_WITH_NAME - return line(gfa_line.name) - else - return nil - end - end - - # Search the link from a segment S1 in a given orientation - # to another segment S2 in a given, or the equivalent - # link from S2 to S1 with inverted orientations. - # - # @param [RGFA::OrientedLine] oriented_segment1 a segment with orientation - # @param [RGFA::OrientedLine] oriented_segment2 a segment with orientation - # @param [RGFA::Alignment::CIGAR] cigar - # @return [RGFA::Line::Edge::Link] the first link found - # @return [nil] if no link is found. - def search_link(oriented_segment1, oriented_segment2, cigar) - s = segment(oriented_segment1.line) - return nil if s.nil? - s.dovetails.each do |l| - return l if l.kind_of?(RGFA::Line::Edge::Link) and - l.compatible?(oriented_segment1, oriented_segment2, cigar, true) - end - return nil - end - - end - include API_PRIVATE - - private - - # Searches a line by its name - # @param name [Symbol] - # @return [RGFA::Line, nil] - def line_by_name(name) - RECORDS_WITH_NAME.each do |rt| - next if !@records[rt] - found = @records[rt][name] - return found if !found.nil? - end - return nil - end - - # Computes a collection of lines compatible with the given - # record_type. - # - # @param record_type [Symbol] The record type. - def collection_for_select(record_type) - case record_type - when nil - return lines - when :S, :P - collection = @records[record_type].values - when :E, :G, :U, :O, :F - collection = @records[record_type].values.flatten - else - collection = @records[record_type] - end - end - -end diff --git a/lib/rgfa/lines/headers.rb b/lib/rgfa/lines/headers.rb deleted file mode 100644 index 91a6b1a..0000000 --- a/lib/rgfa/lines/headers.rb +++ /dev/null @@ -1,57 +0,0 @@ -require_relative "../field_array" - -# Methods for accessing the GFA header information. -# -# The GFA header is accessed using {#header RGFA#header}, -# which returns a {RGFA::Line::Header} object. -# -# @example Accessing the header information -# rgfa.header.VN # => “1.0” -# rgfa.header.co = “This the header comment” -# rgfa.header.ni = 100 -# rgfa.header.field_to_s(:ni) # => “ni:i:100” -# -# == Multiple header lines defining the same tag -# -# The specification does not explicitely forbid to have the same tag on -# different lines. To represent this case, a "field array" -# ({RGFA::FieldArray RGFA::FieldArray}) is used, which is an array of -# instances of a tag, from different lines of the header. -# -# @example Header with tags repeated on different lines (see {RGFA::FieldArray}) -# rgfa.header.ni # => RGFA::FieldArray<[100,200] @datatype: :i> -# rgfa.header.ni[0] # 100 -# rgfa.header.ni << 200 # “200” is also OK -# rgfa.header.ni.map!{|i|i-10} -# rgfa.header.ni = RGFA::FieldArray.new(:i, [100,200,300]) -# -# @example Adding instances of a tag (will go on different header lines) -# rgfa.header.add(:xx, 100) # => 100 # single i tag, if .xx did not exist yet -# rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100] @datatype: :i> -# rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100,100] @datatype :i> -# -# @tested_in api_header -# -module RGFA::Lines::Headers - - # @return [RGFA::Line::Header] an header line representing the entire header - # information; if multiple header line were present, and they contain the - # same tag, the tag value is represented by a {RGFA::FieldArray} - def header - @records[:H] - end - - # Header information in single-tag-lines. - # - # Returns an array of RGFA::Line::Header - # objects, each containing a single field of the header. - # @!macro readonly - # @note Read-only! The returned array containes copies of the original - # values, i.e.\ changes in the lines will not affect the RGFA object; to - # update the values in the RGFA use the #header method. - # @return [Array] - def headers - @records[:H].split - end - -end diff --git a/lib/rgfa/logger.rb b/lib/rgfa/logger.rb deleted file mode 100644 index ff4097e..0000000 --- a/lib/rgfa/logger.rb +++ /dev/null @@ -1,194 +0,0 @@ -# -# This class allows to output a message to the log file or STDERR and -# to keep track of the progress of a method which takes long time to complete. -# -# @api private -# -# @tested_in NOT_TESTED -# -class RGFA::Logger - - # Information about the progress of a computation - ProgressData = Struct.new(:counter, :units, :partsize, - :lastpart, :total, :starttime, - :strlen) - - # Create a Logger instance - # - # @param channel [#puts] - # where to output (default: STDERR) - # @param prefix [String] - # output prefix (default: "#") - # @param verbose_level [Integer] - # 0: no logging; >0: the higher, the more logging - # @return [RGFA::Logger] - def initialize(verbose_level: 1, channel: STDERR, prefix: "#") - @progress = false - if !verbose_level.kind_of?(Integer) - raise RGFA::ArgumentError, "verbose_level must be an Integer" - end - if !channel.respond_to?(:puts) - raise RGFA::TypeError, "channel must provide a puts method" - end - @channel = channel - @pfx = prefix - @verbose_level = verbose_level - @data = {} - end - - # Output a message - # - # @param msg [String] message to output - # @param min_verbose_level [Integer] - # @return [void] - def log(msg, min_verbose_level=1) - @channel.puts "#@pfx #{msg}" if @verbose_level >= min_verbose_level - return nil - end - - # Enable output from the Logger instance - # - # @param part [Float] - # - part = 0 => output at every call of {RGFA::Logger.progress_log} - # - 0 < part < 1 => output once per part of the total progress - # (e.g. 0.001 = log every 0.1% progress) - # - part = 1 => output only total elapsed time - # @return [void] - def enable_progress(part: 0.1) - if part < 0 or part > 1 - raise RGFA::ArgumentError, "part must be in range [0..1]" - end - @progress = true - @part = part - @channel.puts "#@pfx Progress logging enabled" if @verbose_level > 0 - return nil - end - - # Disable progress logging - # @return [void] - def disable_progress - @progress = false - @channel.puts "#@pfx Progress logging disabled" if @verbose_level > 0 - return nil - end - - # @!macro progress_init - # Initialize progress logging for a computation - # @param symbol [Symbol] a symbol assigned to the computation - # @param units [String] a string with the name of the units, in plural - # @param total [Integer] total number of units - # @param initmsg [String] an optional message to output at the beginning - # @return [void] - def progress_init(symbol, units, total, initmsg = nil) - return nil if !@progress or total == 0 - str = "#@pfx 0.0% #{units} processed" - @data[symbol] = ProgressData.new(0, units, (@part*total).to_i, 1, total, - Time.now, str.size) - @channel.puts "#@pfx #{initmsg}" if initmsg - @channel.print str if @part != 1 - return nil - end - - # @!macro [new] progress_log - # Updates progress logging for a computation - # @!macro [new] prlog - # @param symbol [Symbol] the symbol assigned to the computation at - # init time - # @param keyargs [Hash] additional units to display, with their current - # value (e.g. segments_processed: 10000) - # @param progress [Integer] how many units were processed - # @return [void] - def progress_log(symbol, progress=1, **keyargs) - return nil if !@progress or @part == 1 - data = @data[symbol] - return nil if data.nil? - data.counter += progress - if data.counter == data.total - progress_end(symbol) - elsif data.partsize == 0 or - (data.counter / data.partsize).to_i > data.lastpart - return nil if data.partsize == 0 and @part > 0 - # this means total is very small - data.lastpart = data.counter / data.partsize if data.partsize > 0 - done = data.counter.to_f / data.total - t = Time.now - data.starttime - eta = (t / done) - t - tstr= ("Elapsed: %02dh %02dmin %02ds" % [t/3600, t/60%60, t%60]) - etastr = ("ETA: %02dh %02dmin %02ds" % [eta/3600, eta/60%60, eta%60]) - donestr = "%.1f" % (done*100) - keystr = "" - keyargs.each {|k,v| keystr << "; #{k}: #{v}"} - str = "#@pfx #{donestr}% #{data.units} processed "+ - "[#{tstr}; #{etastr}#{keystr}]" - if str.size > data.strlen - data.strlen = str.size - spacediff = "" - else - spacediff = " "*(data.strlen-str.size) - end - @channel.print "\r#{str}#{spacediff}" - @channel.flush - end - return nil - end - - # @!macro [new] progress_end - # Completes progress logging for a computation - # @!macro prlog - # @return [void] - def progress_end(symbol, **keyargs) - return if !@progress - data = @data[symbol] - return if data.nil? - t = Time.now - data.starttime - tstr= ("Elapsed time: %02dh %02dmin %02ds" % [t/3600, t/60%60, t%60]) - quantity = @part == 1 ? data.total.to_s : "100.0%" - keystr = "" - keyargs.each {|k,v| keystr << "; #{k}: #{v}"} - str = "#@pfx #{quantity} #{data.units} processed [#{tstr}#{keystr}]" - spacediff = " "*([data.strlen - str.size,0].max) - @channel.print "\r" if @part != 1 - @channel.puts "#{str}#{spacediff}" - @channel.flush - @data.delete(symbol) - return nil - end - -end - -# Progress logging related-methods for RGFA class -module RGFA::LoggerSupport - - # Activate logging of progress - # @return [RGFA] self - def enable_progress_logging(part: 0.1, channel: STDERR) - @progress = RGFA::Logger.new(channel: channel) - @progress.enable_progress(part: part) - return self - end - - # @!macro progress_init - # @return [RGFA] self - # @api private - def progress_log_init(symbol, units, total, initmsg = nil) - @progress.progress_init(symbol, units, total, initmsg) if @progress - return self - end - - # @!macro progress_log - # @return [RGFA] self - # @api private - def progress_log(symbol, progress=1, **keyargs) - @progress.progress_log(symbol, progress) if @progress - return self - end - - # @!macro progress_end - # @return [RGFA] self - # @api private - def progress_log_end(symbol, **keyargs) - @progress.progress_end(symbol) if @progress - return self - end - -end diff --git a/lib/rgfa/numeric_array.rb b/lib/rgfa/numeric_array.rb deleted file mode 100644 index c47b6c5..0000000 --- a/lib/rgfa/numeric_array.rb +++ /dev/null @@ -1,221 +0,0 @@ -require_relative "error" - -# -# A numeric array representable using the data type B of the GFA specification -# -# @tested_in api_tags -# -class RGFA::NumericArray < Array - - # Subtypes for signed integers, from the smallest to the largest - SIGNED_INT_SUBTYPE = %W[c s i] - - # Subtypes for unsigned integers, from the smallest to the largest - UNSIGNED_INT_SUBTYPE = SIGNED_INT_SUBTYPE.map{|st|st.upcase} - - # Subtypes for integers - INT_SUBTYPE = UNSIGNED_INT_SUBTYPE + SIGNED_INT_SUBTYPE - - # Subtypes for floats - FLOAT_SUBTYPE = ["f"] - - # Subtypes - SUBTYPE = INT_SUBTYPE + FLOAT_SUBTYPE - - # Number of bits of unsigned integer subtypes - SUBTYPE_BITS = {"c" => 8, "s" => 16, "i" => 32} - - # Range for integer subtypes - SUBTYPE_RANGE = Hash[ - INT_SUBTYPE.map do |subtype| - [ - subtype, - if subtype == subtype.upcase - 0..((2**SUBTYPE_BITS[subtype.downcase])-1) - else - (-(2**(SUBTYPE_BITS[subtype]-1)))..((2**(SUBTYPE_BITS[subtype]-1))-1) - end - ] - end - ] - - # Computes the subtype of the array from its content. - # - # If all elements are float, then the computed subtype is "f". - # If all elements are integer, the smallest possible numeric subtype - # is computed; thereby, - # if all elements are non-negative, an unsigned subtype is selected, - # otherwise a signed subtype. - # In all other cases an exception is raised. - # - # @raise [RGFA::ValueError] if the array is not a valid numeric - # array - # @return [RGFA::NumericArray::SUBTYPE] - def compute_subtype - if all? {|f|f.kind_of?(Float)} - return "f" - else - e_max = nil - e_min = nil - each do |e| - if !e.kind_of?(Integer) - raise RGFA::ValueError, - "NumericArray does not contain homogenous numeric values\n"+ - "Content: #{inspect}" - end - e_max = e if e_max.nil? or e > e_max - e_min = e if e_min.nil? or e < e_min - end - return RGFA::NumericArray.integer_type(e_min..e_max) - end - end - - # Validate the numeric array - # - # @raise [RGFA::ValueError] if the array is not valid - alias_method :validate, :compute_subtype - - # Return self - # @param valid [nil] ignored, for compatibility - # @return [RGFA::NumericArray] - def to_numeric_array(valid: nil) - self - end - - # GFA datatype B representation of the numeric array - # @raise [RGFA::ValueError] if the array - # if not a valid numeric array - # @return [String] - def to_s - subtype = compute_subtype - "#{subtype},#{join(",")}" - end - - # @api private - module API_PRIVATE - - # GFA tag datatype to use, if none is provided - # @return [RGFA::Field::TAG_DATATYPE] - # @tested_in internals_tag_datatype - def default_gfa_tag_datatype; :B; end - - module ClassMethods - # Computes the subtype for integers in a given range. - # - # If all elements are non-negative, an unsigned subtype is selected, - # otherwise a signed subtype. - # - # @param range [Range] the integer range - # - # @raise [RGFA::ValueError] if the integer range is outside - # all subtype ranges - # - # @return [RGFA::NumericArray::INT_SUBTYPE] subtype code - # @tested_in unit_numeric_array - def integer_type(range) - if range.min < 0 - SIGNED_INT_SUBTYPE.each do |st| - st_range = RGFA::NumericArray::SUBTYPE_RANGE[st] - if st_range.include?(range.min) and st_range.include?(range.max) - return st - end - end - else - UNSIGNED_INT_SUBTYPE.each do |st| - return st if range.max <= RGFA::NumericArray::SUBTYPE_RANGE[st].max - end - end - raise RGFA::ValueError, - "NumericArray: values are outside of all integer subtype ranges\n"+ - "Content: #{inspect}" - end - end - - end - include API_PRIVATE - extend API_PRIVATE::ClassMethods - -end - -# -# Method to create a numeric array from an array -# -class Array - # Create a numeric array from an Array instance - # - # @param valid [Boolean] (default: +false+) - # if +false+, validate the range of the numeric values, according - # to the array subtype; if +true+ the string is guaranteed to be valid - # - # @raise [RGFA::ValueError] if any value is not compatible with the subtype - # @raise [RGFA::TypeError] if the subtype code is invalid - # - # @return [RGFA::NumericArray] the numeric array - # - # @tested_in api_tags - def to_numeric_array(valid: false) - na = RGFA::NumericArray.new(self) - na.validate if !valid - na - end -end - -# -# Method to create a numeric array from a string -# -class String - - # Create a numeric array from a string - # - # @param valid [Boolean] (default: +false+) - # if +false+, validate the range of the numeric values, according - # to the array subtype; if +true+ the string is guaranteed to be valid - # - # @raise [RGFA::ValueError] if any value is not compatible with the subtype - # @raise [RGFA::TypeError] if the subtype code is invalid - # - # @return [RGFA::NumericArray] the numeric array - # - # @tested_in api_tags - # - def to_numeric_array(valid: false) - unless valid - if empty? - raise RGFA::FormatError, "Numeric array string shall not be empty" - end - if self[-1] == "," - raise RGFA::FormatError, "Numeric array string ends with comma\n"+ - "String: #{self}" - end - end - elems = split(",") - subtype = elems.shift - if !RGFA::NumericArray::SUBTYPE.include?(subtype) - raise RGFA::TypeError, "Subtype #{subtype} unknown" - end - if subtype != "f" - range = RGFA::NumericArray::SUBTYPE_RANGE[subtype] - end - elems.map! do |e| - begin - if subtype != "f" - e = Integer(e) - if not valid and not range.include?(e) - raise "NumericArray: "+ - "value is outside of subtype #{subtype} range\n"+ - "Value: #{e}\n"+ - "Range: #{range.inspect}\n"+ - "Content: #{inspect}" - end - e - else - Float(e) - end - rescue => msg - raise RGFA::ValueError, msg - end - end - elems.to_numeric_array(valid: true) - end - -end diff --git a/lib/rgfa/oriented_line.rb b/lib/rgfa/oriented_line.rb deleted file mode 100644 index 522873c..0000000 --- a/lib/rgfa/oriented_line.rb +++ /dev/null @@ -1,191 +0,0 @@ -require_relative "error" - -# A line or line identifier plus an orientation. -# -# @tested_in unit_oriented_line -class RGFA::OrientedLine - - # Create an OrientedLine instance - # @param line [Symbol, RGFA::Line] either a line identifier (Symbol) - # or a reference to a line (RGFA::Line subclass instance) - # @param orient [:+,:-] a symbol defining the orientation - def initialize(line, orient) - @line = line - @orient = orient.to_sym - @editable = true - end - - # Get the line - # @return [RGFA::Line, Symbol] the line identifier or reference - def line - @line - end - - # Get the orientation symbol - # @return [:+,:-] the orientation symbol - def orient - @orient - end - - # Set the line - # @param line [Symbol, RGFA::Line] set the line instance - # @raise [RGFA::RuntimeError] if the line instance is not editable - # @return [Symbol, RGFA::Line] the line - def line=(line) - if @editable - @line = line - else - raise RGFA::RuntimeError, - "RGFA::OrientedLine instance cannot be edited (#{self})" - end - end - - # Set the orientation - # @param orient [:+,:-] set the orientation symbol - # @raise [RGFA::RuntimeError] if the line instance is not editable - # @return [:+,:-] the orientation - def orient=(orient) - if @editable - @orient = orient - else - raise RGFA::RuntimeError, - "RGFA::OrientedLine instance cannot be edited (#{self})" - end - return @orient - end - - # @return [Symbol] the line name - def name - @line.to_sym - end - - # Validate the instance - # @raise [RGFA::ValueError] if the orientation symbol is not +:++ or +:-+ - # @raise [RGFA::TypeError] if the line is not a string, symbol - # or reference to a line (RGFA::Line) - # @raise [RGFA::FormatError] if the line is a string or symbol and - # it contains spacing or non-printable characters - # @return [void] - def validate - validate_line - validate_orient - return nil - end - - # Create an oriented line instance, with the inverted orientation - # @return [RGFA::OrientedLine] same line, inverted orientation - def invert - RGFA::OrientedLine.new(@line, @orient.invert) - end - - # Compute the string representation of the oriented line - # @return [String] line name and orientation - def to_s - "#{name}#{orient}" - end - - # Compare the segment names and orientations of two instances - # - # @param [RGFA::OrientedLine,Array] other the other instance - # @return [Boolean] - def ==(other) - case other - when RGFA::OrientedLine - when Array - other = other.to_oriented_line - when String, Symbol - other = other.to_s.to_oriented_line - else - return false - end - (self.name == other.name) and - (self.orient == other.orient) - end - - # Return self (for compatibility with to_oriented_line of other classes) - # @return [RGFA::OrientedLine] self - def to_oriented_line - self - end - - # @api private - module API_PRIVATE - - def block - @editable = false - end - - def unblock - @editable = true - end - end - - include API_PRIVATE - - private - - # Delegate methods to the line - def method_missing(meth, *args, &block) - @line.send meth, *args, &block - end - - def validate_orient - if ![:+,:-].include?(@orient) - raise RGFA::ValueError, - "Invalid orientation (#{@orient})" - end - end - - def validate_line - case @line - when RGFA::Line - string = @line.name - when Symbol, String - string = @line - else - raise RGFA::TypeError, - "Invalid class (#{@line.class}) for line reference (#{@line})" - end - if string !~ /^[!-~]+$/ - raise RGFA::FormatError, - "#{string.inspect} is not a valid GFA identifier\n"+ - "(it contains spaces or non-printable characters)" - end - end - -end - -class String - # Create an oriented line instance from a string, which contains - # a GFA identifier followed by + or - - # @return [RGFA::OrientedLine] - # @tested_in unit_oriented_line - def to_oriented_line - RGFA::OrientedLine.new(self[0..-2].to_sym, self[-1].to_sym) - end -end - -class Array - # Create an oriented line instance from an array, which contains - # a GFA identifier as first element, and a :+ or :- as second - # @return [RGFA::OrientedLine] - # @tested_in unit_oriented_line - def to_oriented_line - RGFA::OrientedLine.new(self[0], self[1].to_sym) - end -end - -# Shortcut to create new objects -# using a OL[] syntax -module OL - # Create an oriented line instance - # (shortcut for RGFA::OrientedLine.new) - # @param line [RGFA::Line,Symbol] the line - # @param orient [:+,:-] the orientation symbol - # @return [RGFA::OrientedLine] - def [](line,orient) - RGFA::OrientedLine.new(line,orient) - end - module_function :[] -end -Kernel.extend(OL) diff --git a/lib/rgfa/placeholder.rb b/lib/rgfa/placeholder.rb deleted file mode 100644 index e289705..0000000 --- a/lib/rgfa/placeholder.rb +++ /dev/null @@ -1,85 +0,0 @@ -# A placeholder is used in mandatory fields when a value is not specified. -# Its string representation is an asterisk (+*+). -# @tested_in api_placeholder -class RGFA::Placeholder - # @return [String] string representation (+*+) - def to_s - "*" - end - - # A placeholder is always empty - # return [true] - def empty? - true - end - - # A placeholder is always valid - # return [void] - def validate - end - - # For compatibility with String#rc (RGFA::Sequence module) - # @return [self] - def rc - self - end - - # Length/size of a placeholder is always 0 - # @return [self] - def length - 0 - end - - # @return [true] - def placeholder? - true - end - - alias_method :size, :length - - # Any cut of the placeholder returns the placeholder itself - # @param anything [Object] ignored - # @return [self] - def [](*anything) - self - end - - # Adding the placeholder to anything returns the placeholder itself - # @param anything [Object] ignored - # @return [self] - def +(*anything) - self - end - - def ==(other) - other.placeholder? - end - - alias :eql? :== - alias :=== :== - -end - -class String - def placeholder? - self == "*" - end -end - -class Symbol - def placeholder? - self == :"*" - end -end - -class Array - def placeholder? - empty? - end -end - -class Numeric - def placeholder? - false - end -end diff --git a/lib/rgfa/segment_end.rb b/lib/rgfa/segment_end.rb deleted file mode 100644 index d595780..0000000 --- a/lib/rgfa/segment_end.rb +++ /dev/null @@ -1,132 +0,0 @@ -require_relative "error" - -# A segment or segment name and an end symbol (:L or :R) -# -# @tested_in unit_segment_end -class RGFA::SegmentEnd < Array - - def initialize(segment, end_type) - self[0]=segment - self[1]=end_type - end - - # Check that the elements of the array are compatible with the definition. - # - # @!macro [new] segment_info_validation_errors - # @raise [RGFA::ValueError] if size is not 2 - # @raise [RGFA::ValueError] if second element - # is not a valid info - # @return [void] - def validate - if ![:L, :R].include?(end_type) - raise RGFA::ValueError, - "Invalid end type (#{end_type.inspect})" - end - return nil - end - - # @return [Symbol, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # the segment instance or name - def segment - self[0] - end - - # Set the segment - # @param value - # [Symbol, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # the segment instance or name - # @return [Symbol, RGFA::Line::Segment::GFA1, RGFA::Line::Segment::GFA2] - # +value+ - def segment=(value) - self[0]=value - end - - # @return [Symbol] the end type - def end_type - self[1] - end - - # Set the end type - # @param value [Symbol] the end type - # @return [Symbol] +value+ - def end_type=(value) - self[1]=(value) - end - - # @return [Symbol] the segment name - def name - segment.to_sym - end - - # @return [RGFA::SegmentInfo] same segment, inverted end type - def invert - self.class.new(self.segment, self.end_type.invert) - end - - # @return [String] name of the segment and end type - def to_s - "#{name}#{end_type}" - end - - # @return [Symbol] name of the segment and end type - def to_sym - to_s.to_sym - end - - # Compare the segment names and end types of two instances - # - # @param [RGFA::SegmentInfo] other the other instance - # @return [Boolean] - def ==(other) - to_s == other.to_segment_end.to_s - end - - # Compare the segment names and end types of two instances - # - # @param [RGFA::SegmentEnd] other the other instance - # @return [Boolean] - def <=>(other) - to_s <=> other.to_segment_end.to_s - end - - def to_segment_end - self - end - - (Array.instance_methods - Object.instance_methods).each do |method| - private method - end - - def to_a - [segment, end_type] - end - public :to_a - -end - -class Array - - # Create and validate a segment end from an array - # @!macro segment_info_validation_errors - # @return [RGFA::SegmentEnd] - def to_segment_end - if self.size != 2 - raise RGFA::ValueError, - "Wrong n of elements, 2 expected (#{inspect})" - end - se = RGFA::SegmentEnd.new(*map {|e| e.kind_of?(String) ? e.to_sym : e}) - se.validate - return se - end - -end - -# An array containing {RGFA::SegmentEnd} elements, which defines a path -# in the graph -class RGFA::SegmentEndsPath < Array - # Create a reverse direction path - # @return [RGFA::SegmentEndsPath] - def reverse - super.map {|segment_end| segment_end.to_segment_end.invert} - end -end diff --git a/lib/rgfa/sequence.rb b/lib/rgfa/sequence.rb deleted file mode 100644 index 5178a7e..0000000 --- a/lib/rgfa/sequence.rb +++ /dev/null @@ -1,67 +0,0 @@ -require_relative "error" - -# -# Extensions of the String class to handle nucleotidic sequences -# -# @tested_in api_positionals -module RGFA::Sequence - - # Computes the reverse complement of a nucleotidic sequence - # - # @return [String] reverse complement, without newlines and spaces - # @return [String] "*" if string is "*" - # - # @param valid [Boolean] (defaults to: +false+) - # if true, anything non-sequence is complemented to itself - # @param rna [Boolean] (defaults to: +false+) - # if true, any A and a is complemented into u and U - # - # @raise [RGFA::ValueError] if chars are found for which - # no Watson-Crick complement is defined (and not +valid+) - # - # @example - # "ACTG".rc # => "CAGT" - # "acGT".rc # => "ACgt" - # @example Undefined sequence is represented by "*": - # "*".rc # => "*" - # @example Extended IUPAC Alphabet: - # "ARBN".rc # => "NVYT" - # @example Usage with RNA sequences: - # "ACG".rc(rna: true) # => "CGU" - def rc(valid: false, rna: false) - return self if self.placeholder? - retval = each_char.map do |c| - wcc = WCC.fetch(c, valid ? c : nil) - if wcc.nil? - raise RGFA::ValueError, - "#{self}: no Watson-Crick complement for #{c}" - end - wcc - end.reverse.join - retval.tr!("tT","uU") if rna - retval - end - - # Watson-Crick Complements - WCC = {"a"=>"t","t"=>"a","A"=>"T","T"=>"A", - "c"=>"g","g"=>"c","C"=>"G","G"=>"C", - "b"=>"v","B"=>"V","v"=>"b","V"=>"B", - "h"=>"d","H"=>"D","d"=>"h","D"=>"H", - "R"=>"Y","Y"=>"R","r"=>"y","y"=>"r", - "K"=>"M","M"=>"K","k"=>"m","m"=>"k", - "S"=>"S","s"=>"s","w"=>"w","W"=>"W", - "n"=>"n","N"=>"N","u"=>"a","U"=>"A", - "-"=>"-","."=>".","="=>"=", - " "=>"","\n"=>""} - - # Parse a string as sequence. - # @return [RGFA::Placeholder, self] returns self if the string content - # is other than "*", otherwise a RGFA::Placeholder object - def to_sequence - self.placeholder? ? RGFA::Placeholder.new : self - end -end - -class String - include RGFA::Sequence -end diff --git a/lib/rgfa/symbol_invert.rb b/lib/rgfa/symbol_invert.rb deleted file mode 100644 index e8193b1..0000000 --- a/lib/rgfa/symbol_invert.rb +++ /dev/null @@ -1,27 +0,0 @@ -require_relative "error" - -# Define the inverted symbol for some symbols which represent boolean -# conditions, i.e. orientation symbols (:+/:-) and segment ends (:L,:R) -# @tested_in unit_symbol_invert -module RGFA::SymbolInvert - - # Invert a symbol describing an orientation or a segment end - # @raise [RGFA::ValueError] if the symbol has no defined inverted symbol - # @return [Symbol] - def invert - case self - when :+ then :- - when :- then :+ - when :L then :R - when :R then :L - else - raise RGFA::ValueError, - "The symbol #{self.inspect} has no inverse." - end - end - -end - -class Symbol - include RGFA::SymbolInvert -end diff --git a/lib/rgfatools.rb b/lib/rgfatools.rb deleted file mode 100644 index 332a72a..0000000 --- a/lib/rgfatools.rb +++ /dev/null @@ -1,102 +0,0 @@ -require "rgfa" - -# -# Module defining additional methods for the RGFA class. -# -# RGFATools is an extension to the RGFA library, which allow to perform further -# operations. Thereby additional conventions are required, with respect to the -# GFA specification, which are compatible with it. -# -# The methods defined here allow, e.g., to randomly orient a segment which has -# the same connections on both sides, to compute copy numbers and multiply or -# delete segments according to them, to distribute the links of copies after -# multipling a segment, or to eliminate edges in the graph which are -# incompatible with an hamiltonian path. -# -# Custom optional fields are defined, such as "cn" for the copy number of a -# segment, "or" for the original segment(s) of a duplicated or merged segment, -# "mp" for the starting position of original segments in a merged segment, "rp" -# for the position of possible inversions due to arbitrary orientation of some -# segments by the program. -# -# Furthermore a convention for the naming of the segments is introduced, which -# gives a special meaning to the characters "_^()". -# -# @developer -# In the main file is only the method redefinition infrastructure -# (private methods). The public methods are in the included modules. -# -module RGFATools -end - -require_relative "rgfatools/artifacts" -require_relative "rgfatools/copy_number" -require_relative "rgfatools/invertible_segments" -require_relative "rgfatools/multiplication" -require_relative "rgfatools/superfluous_links" -require_relative "rgfatools/linear_paths" -require_relative "rgfatools/p_bubbles" - -module RGFATools - - include RGFATools::Artifacts - include RGFATools::CopyNumber - include RGFATools::InvertibleSegments - include RGFATools::Multiplication - include RGFATools::SuperfluousLinks - include RGFATools::LinearPaths - include RGFATools::PBubbles - - private - - def self.included(klass) - included_modules.each do |included_module| - if included_module.const_defined?("Redefined") - self.redefine_methods(included_module::Redefined, klass) - end - if included_module.const_defined?("ClassMethods") - klass.extend(included_module::ClassMethods) - end - end - end - - def self.redefine_methods(redefined_methods, klass) - klass.class_eval do - redefined_methods.each do |redefined_method| - was_private = klass.private_instance_methods.include?(redefined_method) - public redefined_method - alias_method :"#{redefined_method}_without_rgfatools", redefined_method - alias_method redefined_method, :"#{redefined_method}_with_rgfatools" - if was_private - private redefined_method, - :"#{redefined_method}_without_rgfatools", - :"#{redefined_method}_with_rgfatools" - end - end - end - end - - ProgramName = "RGFATools" - - def add_program_name_to_header - set_header_field(:pn, RGFATools::ProgramName) - end - -end - -class RGFA - include RGFATools - - # Enable {RGFATools} extensions of RGFA methods - # @return [void] - def enable_extensions - @extensions_enabled = true - end - - # Disable {RGFATools} extensions of RGFA methods - # @return [void] - def disable_extensions - @extensions_enabled = false - end - -end diff --git a/lib/rgfatools/artifacts.rb b/lib/rgfatools/artifacts.rb deleted file mode 100644 index a6150a2..0000000 --- a/lib/rgfatools/artifacts.rb +++ /dev/null @@ -1,30 +0,0 @@ -# -# Methods which edit the graph components without traversal -# -module RGFATools::Artifacts - - # Remove connected components whose sum of lengths of the segments - # is under a specified value. - # @param minlen [Integer] the minimum length - # @return [RGFA] self - def remove_small_components(minlen) - connected_components.select {|cc| - cc.map{|sn|segment(sn).length}.reduce(:+) < minlen }. - each{|cc| cc.each {|sn|rm(sn)}} - self - end - - # Remove end segments, whose sequence length is under a specified value. - # @param minlen [Integer] the minimum length - # @return [RGFA] self - def remove_dead_ends(minlen) - segments.each do |s| - c = s.connectivity - rm(s) if s.length < minlen and - (c[0] == 0 or c[1] == 0) and - !cut_segment?(s) - end - self - end - -end diff --git a/lib/rgfatools/copy_number.rb b/lib/rgfatools/copy_number.rb deleted file mode 100644 index 778b0a6..0000000 --- a/lib/rgfatools/copy_number.rb +++ /dev/null @@ -1,124 +0,0 @@ -# -# Methods which edit the graph components without traversal -# -module RGFATools::CopyNumber - - # Sets the count tag to use as default by coverage computations - # (defaults to: +:RC+). - # - # @return [RGFA] self - # @param tag [Symbol] the tag to use - def set_default_count_tag(tag) - @default[:count_tag] = tag - return self - end - - # Sets the unit length (k-mer size, average read lenght or average fragment - # length) to use for coverage computation - # (defaults to: 1). - # - # @param unit_length [Integer] the unit length to use - # @return [RGFA] self - def set_count_unit_length(unit_length) - @default[:unit_length] = unit_length - return self - end - - # Delete segments which have a coverage under a specified value. - # - # @param mincov [Integer] the minimum coverage - # @!macro [new] count_tag - # @param count_tag [Symbol] (defaults to: +:RC+ or the value set by - # {#set_default_count_tag}) the count tag to use for coverage - # computation - # @!macro [new] unit_length - # @param unit_length [Integer] (defaults to: 1 or the value set by - # {#set_count_unit_length}) the unit length to use for coverage - # computation - # - # @return [RGFA] self - def delete_low_coverage_segments(mincov, - count_tag: @default[:count_tag], - unit_length: @default[:unit_length]) - segments.each do |s| - cov = s.coverage(count_tag: count_tag, - unit_length: unit_length) - s.disconnect if cov < mincov - end - self - end - - # @param mincov [Integer] (defaults to: 1/4 of +single_copy_coverage+) - # the minimum coverage, cn for segments under this value is set to 0 - # @param single_copy_coverage [Integer] - # the coverage that shall be considered to be single copy - # @param cn_tag [Symbol] (defaults to: +:cn+) - # the tag to use for storing the copy number - # @!macro count_tag - # @!macro unit_length - # @return [RGFA] self - def compute_copy_numbers(single_copy_coverage, - mincov: single_copy_coverage * 0.25, - count_tag: @default[:count_tag], - cn_tag: :cn, - unit_length: @default[:unit_length]) - segments.each do |s| - cov = s.coverage!(count_tag: count_tag, unit_length: unit_length).to_f - if cov < mincov - cn = 0 - elsif cov < single_copy_coverage - cn = 1 - else - cn = (cov / single_copy_coverage).round - end - s.set(cn_tag, cn) - end - self - end - - # Applies the computed copy number to a segment - # @!macro [new] apply_copy_number - # @return [RGFA] self - # @param [:lowcase, :upcase, :number, :copy] copy_names_suffix - # (Defaults to: +:lowcase+) - # Symbol representing a system to compute the names from the name of - # the original segment. See "Automatic computation of the copy names" - # in #multiply_extended. - # @param count_tag [Symbol] tag to use for storing the copy number - # (default: cn) - # @param distribute [Symbol] distribution policy, see #multiply_extended - # @param origin_tag [Symbol] tag to use for storing the origin (default: or) - # @param conserve_components [Boolean] when factor is 0, - # do not remove segments if doing so increases the number of components - # in the graph (default: true) - # @param segment [RGFA::Line::Segment, Symbol] segment or segment name - def apply_copy_number(segment, count_tag: :cn, - distribute: :auto, - copy_names_suffix: :asterisk, origin_tag: :or, - conserve_components: true) - s, sn = segment_and_segment_name(segment) - factor = s.get!(count_tag) - multiply_extended(sn, factor, - distribute: distribute, - copy_names: copy_names_suffix, - conserve_components: conserve_components, - origin_tag: origin_tag) - self - end - - # Applies the computed copy number to all segments - # @!macro apply_copy_number - def apply_copy_numbers(count_tag: :cn, distribute: :auto, - copy_names_suffix: :asterisk, origin_tag: :or, - conserve_components: true) - segments.sort_by{|s|s.get!(count_tag)}.each do |s| - multiply_extended(s.name, s.get(count_tag), - distribute: distribute, - copy_names: copy_names_suffix, - conserve_components: conserve_components, - origin_tag: origin_tag) - end - self - end - -end diff --git a/lib/rgfatools/invertible_segments.rb b/lib/rgfatools/invertible_segments.rb deleted file mode 100644 index 6639481..0000000 --- a/lib/rgfatools/invertible_segments.rb +++ /dev/null @@ -1,109 +0,0 @@ -# -# Methods which edit the graph components without traversal -# -module RGFATools::InvertibleSegments - - # Selects a random orientation for all invertible segments - # @return [RGFA] self - def randomly_orient_invertibles - segment_names.each do |sn| - if segment_same_links_both_ends?(sn) - randomly_orient_proven_invertible_segment(sn) - end - end - self - end - - # Selects a random orientation for an invertible segment - # @return [RGFA] self - # @!macro segment_param - def randomly_orient_invertible(segment) - segment_name = segment.kind_of?(RGFA::Line) ? segment.name : segment - if !segment_same_links_both_ends?(segment_name) - raise "Only segments with links to the same or equivalent segments "+ - "at both ends can be randomly oriented" - end - randomly_orient_proven_invertible_segment(segment_name) - self - end - - private - - def randomly_orient_proven_invertible_segment(segment_name) - parts = partitioned_links_of([segment_name, :R]) - if parts.size == 2 - tokeep1_other_end = parts[0][0].other_end([segment_name, :R]) - tokeep2_other_end = parts[1][0].other_end([segment_name, :R]) - elsif parts.size == 1 and parts[0].size == 2 - tokeep1_other_end = parts[0][0].other_end([segment_name, :R]) - tokeep2_other_end = parts[0][1].other_end([segment_name, :R]) - else - return - end - return if tokeep1_other_end.segment.dovetails_of_end( - tokeep1_other_end.end_type).size < 2 - return if tokeep2_other_end.segment.dovetails_of_end( - tokeep2_other_end.end_type).size < 2 - delete_other_links([segment_name, :R], tokeep1_other_end) - delete_other_links([segment_name, :L], tokeep2_other_end) - annotate_random_orientation(segment_name) - end - - def link_targets_for_cmp(segment_end) - segment_end.segment.dovetails_of_end( - segment_end.end_type).map do |l| - l.other_end(segment_end).join - end - end - - def segment_same_links_both_ends?(segment_name) - e_links = link_targets_for_cmp([segment_name, :R]) - b_links = link_targets_for_cmp([segment_name, :L]) - return e_links == b_links - end - - def segment_signature(segment_end) - s = segment!(segment_end[0]) - link_targets_for_cmp(segment_end).join(",")+"\t"+ - link_targets_for_cmp(segment_end.invert_end_type).join(",")+"\t"+ - [:or].map do |field| - s.send(field) - end.join("\t") - end - - def partitioned_links_of(segment_end) - segment_end.segment.dovetails_of_end(segment_end.end_type).group_by do |l| - other_end = l.other_end(segment_end) - sig = segment_signature(other_end) - sig - end.map {|sig, par| par} - end - - def annotate_random_orientation(segment_name) - segment = segment!(segment_name) - n = segment.name.to_s.split("_") - pairs = 0 - pos = [1, segment.LN] - if segment.or - o = segment.or.to_s.split(",") - if o.size > 2 - while o.last == o.first + "^" or o.last + "^" == o.first - pairs += 1 - o.pop - o.shift - end - end - if segment.mp - pos = [segment.mp[pairs*2], segment.mp[-1-pairs*2]] - end - end - rn = segment.rn - rn ||= [] - rn += pos - segment.rn = rn - n[pairs] = "(" + n[pairs] - n[-1-pairs] = n[-1-pairs] + ")" - rename(segment.name, n.join("_")) - end - -end diff --git a/lib/rgfatools/linear_paths.rb b/lib/rgfatools/linear_paths.rb deleted file mode 100644 index bec5e0b..0000000 --- a/lib/rgfatools/linear_paths.rb +++ /dev/null @@ -1,141 +0,0 @@ -# -# Methods for the RGFA class, which involve a traversal of the graph following -# links -# -module RGFATools::LinearPaths - - # @!method merge_linear_path(segpath, **options) - # Merge a linear path, i.e. a path of segments without extra-branches. - # @!macro [new] merge_more - # Extends the RGFA method, with additional functionality: - # - +name+: the name of the merged segment is set to the name of the - # single segments joined by underscore (+_+). If a name already - # contained an underscore, it is splitted before merging. Whenever a - # segment is reversed complemented, its name (or the name of all its - # components) is suffixed with a +^+; if the last letter was already - # +^+, it is removed; if it contained +_+ the name is splitted, the - # elements reversed and joined back using +_+; round parentheses are - # removed from the name before processing and added back after it. - # - +:or+: keeps track of the origin of the merged segment; the - # origin tag is set to an array of :or or name - # (if no :or available) tags of the segment which have been merged; - # the character +^+ is assigned the same meaning as in +name+ - # - +:rn+: tag used to store possible inversion positions and - # it is updated by this method; i.e. it is passed from the single - # segments to the merged segment, and the coordinates updated - # - +:mp+: tag used to store the position of the - # single segments in the merged segment; it is created or updated by - # this method - # Note that the extensions to the original method will only be run - # if either #enable_extensions has been called on RGFA object - # or the enable_tracking parameter is set.. - # After calling #enable_extensions, you may still obtain the original - # behaviour by setting the disable_tracking parameter. - # @!macro merge_more - # - # @!macro [new] merge_lim - # - # Limitations: all containments und paths involving merged segments are - # deleted. - # @!macro merge_lim - # - # @param segpath [Array] a linear path, such as that - # retrieved by #linear_path (see RGFA API documentation) - # @!macro [new] merge_options - # @param options [Hash] optional keyword arguments - # @option options [String, :short, nil] :merged_name (nil) - # if nil, the merged_name is automatically computed; if :short, - # a name is computed starting with "merged1" and calling next until - # an available name is founf; if String, the name to use - # @option options [Boolean] :cut_counts (false) - # if true, total count in merged segment m, composed of segments - # s of set S is multiplied by the factor Sum(|s in S|)/|m| - # @option options [Boolean] :enable_tracking (false) - # if true, the extended method with RGFATools is called, - # no matter if RGFA#enable_extensions was called. - # @option options [Boolean] :disable_tracking (false) - # if true, the original method of RGFA without RGFATools is called, - # no matter if RGFA#enable_extensions was called. - # @!macro merge_options - # - # @return [RGFA] self - # @see #merge_linear_paths - - # @!method merge_linear_paths(**options) - # Merge all linear paths in the graph, i.e. - # paths of segments without extra-branches - # @!macro merge_more - # @!macro merge_lim - # @!macro merge_options - # - # @return [RGFA] self - - private - - Redefined = [:add_segment_to_merged] - - def reverse_segment_name(name, separator) - name.to_s.split(separator).map do |part| - openp = part[0] == "(" - part = part[1..-1] if openp - closep = part[-1] == ")" - part = part[0..-2] if closep - part = (part[-1] == "^") ? part[0..-2] : part+"^" - part += ")" if openp - part = "(#{part}" if closep - part - end.reverse.join(separator) - end - - def reverse_pos_array(pos_array, lastpos) - return nil if pos_array.nil? or lastpos.nil? - pos_array.map {|pos| lastpos - pos + 1}.reverse - end - - def add_segment_to_merged_with_rgfatools(merged, segment, reversed, cut, init, - options) - if !options[:enable_tracking] and - (options[:disable_tracking] or !@extensions_enabled) - return add_segment_to_merged_without_rgfatools(merged, segment, reversed, - cut, init, options) - end - s = (reversed ? segment.sequence.rc[cut..-1] : segment.sequence[cut..-1]) - n = (reversed ? reverse_segment_name(segment.name, "_") : segment.name.to_s) - rn = (reversed ? reverse_pos_array(segment.rn, segment.LN) : segment.rn) - mp = (reversed ? reverse_pos_array(segment.mp, segment.LN) : segment.mp) - mp = [1, segment.LN] if mp.nil? and segment.LN - if segment.or.nil? - o = n - else - o = (reversed ? reverse_segment_name(segment.or, ",") : segment.or) - end - if init - merged.sequence = s - merged.name = options[:merged_name].nil? ? n : options[:merged_name] - merged.LN = segment.LN - merged.rn = rn - merged.or = o - merged.mp = mp - else - (segment.sequence.placeholder?) ? - (merged.sequence = "*") : - (merged.sequence += s) - merged.name = "#{merged.name}_#{n}" if options[:merged_name].nil? - if merged.LN - if rn - rn = rn.map {|pos| pos - cut + merged.LN} - merged.rn = merged.rn.nil? ? rn : merged.rn + rn - end - if mp and merged.mp - merged.mp += mp.map {|pos| pos - cut + merged.LN} - end - segment.LN ? merged.LN += (segment.LN - cut) - : merged.LN = nil - else - merged.mp = nil - end - merged.or = merged.or.nil? ? o : "#{merged.or},#{o}" - end - end - -end diff --git a/lib/rgfatools/multiplication.rb b/lib/rgfatools/multiplication.rb deleted file mode 100644 index 4b3e8a3..0000000 --- a/lib/rgfatools/multiplication.rb +++ /dev/null @@ -1,195 +0,0 @@ -# -# Methods which edit the graph components without traversal -# -module RGFATools::Multiplication - - # Allowed values for the links_distribution_policy option - LINKS_DISTRIBUTION_POLICY = [:off, :auto, :equal, :L, :R] - - # @overload multiply(segment, factor, copy_names: :lowcase, distribute: :auto, conserve_components: true, origin_tag: :or) - # Create multiple copies of a segment. - # - # Complements the multiply method of gfatools with additional functionality. - # These extensions are used only after #enable_extensions is called on the - # RGFA object. After that, you may still call the original method - # using #multiply_without_rgfatools. - # - # For more information on the additional functionality, see - # #multiply_extended. - # - # @return [RGFA] self - def multiply_with_rgfatools(segment, factor, - copy_names: :asterisk, - distribute: :auto, - conserve_components: true, - origin_tag: :or) - if !@extensions_enabled - return multiply_without_rgfatools(segment, factor, - copy_names: copy_names, - conserve_components: conserve_components) - else - multiply_extended(segment, factor, - copy_names: copy_names, - distribute: distribute, - conserve_components: conserve_components, - origin_tag: origin_tag) - end - end - - # Create multiple copies of a segment. - # - # Complements the multiply method of gfatools with additional functionality. - # To always run the additional functionality when multiply is called, - # use RGFA#enable_extensions. - # - # @!macro [new] copynames_text - # - # Automatic computation of the copy names: - # - # - First, itis checked if the name of the original segment ends with a - # relevant - # string, i.e. a lower case letter (for +:lowcase+), an upper case letter - # (for +:upcase+), a digit (for +:number+), or the string +"_copy"+ - # plus one or more optional digits (for +:copy+). - # - If so, it is assumed, it was already a copy, and it is not - # altered. - # - If not, then +a+ (for +:lowcase+), +A+ (for +:upcase+), +1+ (for - # +:number+), +_copy+ (for +:copy+) is appended to the string. - # - Then, in all - # cases, next (*) is called on the string, until a valid, non-existant - # name is found for each of the segment copies - # - (*) = except for +:copy+, where - # for the first copy no digit is present, but for the following is, - # i.e. the segment names will be +:copy+, +:copy2+, +:copy3+, etc. - # - Can be overridden, by providing an array of copy names. - # - # @!macro [new] ldp_text - # - # Links distribution policy - # - # Depending on the value of the option +distribute+, an end - # is eventually selected for distribution of the links. - # - # - +:off+: no distribution performed - # - +:L/:R+: links of the specified end are distributed - # - +:equal+: select an end for which the number of links is equal to - # +factor+, if any; if both, then the E end is selected - # - +:auto+: automatically select E or B, trying to maximize the number of - # links which can be deleted - # - # @param [Integer] factor multiplication factor; if 0, delete the segment; - # if 1; do nothing; if > 1; number of copies to create - # @!macro [new] segment_param - # @param segment [String, RGFA::Line::Segment] segment name or instance - # @param [:lowcase, :upcase, :number, :copy, Array] copy_names - # (Defaults to: +:lowcase+) - # Array of names for the copies of the segment, - # or a symbol, which defines a system to compute the names from the name of - # the original segment. See "Automatic computation of the copy names". - # @!macro [new] conserve_components - # @param [Boolean] conserve_components (Defaults to: +true+) - # If factor == 0 (i.e. deletion), delete segment only if - # #cut_segment?(segment) is +false+ (see RGFA API). - # @!macro [new] ldp_param - # @param distribute - # [RGFATools::Multiplication::LINKS_DISTRIBUTION_POLICY] - # (Defaults to: +:auto+) - # Determines if and for which end of the segment, links are distributed - # among the copies. See "Links distribution policy". - # @!macro [new] origin_tag - # @param origin_tag [Symbol] (Defaults to: +:or+) - # Name of the custom tag to use for storing origin information. - # - # @return [RGFA] self - def multiply_extended(segment, factor, - copy_names: :asterisk, - distribute: :auto, - conserve_components: true, - origin_tag: :or) - s, sn = segment_and_segment_name(segment) - s.set(origin_tag, sn) if !s.get(origin_tag) - copy_names = compute_copy_names(copy_names, sn, factor) - multiply_without_rgfatools(sn, factor, - copy_names: copy_names, - conserve_components: conserve_components) - distribute_links(distribute, sn, copy_names, factor) - return self - end - - private - - Redefined = [:multiply] - - def select_distribute_end(links_distribution_policy, segment_name, factor) - accepted = RGFATools::Multiplication::LINKS_DISTRIBUTION_POLICY - if !accepted.include?(links_distribution_policy) - raise "Unknown links distribution policy #{links_distribution_policy}, "+ - "accepted values are: "+ - accepted.inspect - end - return nil if links_distribution_policy == :off - if [:L, :R].include?(links_distribution_policy) - return links_distribution_policy - end - s = segment(segment_name) - esize = s.dovetails_of_end(:R).size - bsize = s.dovetails_of_end(:L).size - auto_select_distribute_end(factor, bsize, esize, - links_distribution_policy == :equal) - end - - # (keep separate for testing) - # @tested_in unit_multiplication - def auto_select_distribute_end(factor, bsize, esize, equal_only) - if esize == factor - return :R - elsif bsize == factor - return :L - elsif equal_only - return nil - elsif esize < 2 - return (bsize < 2) ? nil : :L - elsif bsize < 2 - return :R - elsif esize < factor - return ((bsize <= esize) ? :R : - ((bsize < factor) ? :L : :R)) - elsif bsize < factor - return :L - else - return ((bsize <= esize) ? :L : :R) - end - end - - def distribute_links(links_distribution_policy, segment_name, - copy_names, factor) - return if factor < 2 - end_type = select_distribute_end(links_distribution_policy, - segment_name, factor) - return nil if end_type.nil? - et_links = segment(segment_name).dovetails_of_end(end_type) - diff = [et_links.size - factor, 0].max - links_signatures = et_links.map do |l| - l.other_end([segment_name, end_type]).to_s - end - ([segment_name]+copy_names).each_with_index do |sn, i| - segment(sn).dovetails_of_end(end_type).each do |l| - l_sig = l.other_end([sn, end_type]).to_s - to_save = links_signatures[i..i+diff].to_a - l.disconnect unless to_save.include?(l_sig) - end - end - end - - def segment_and_segment_name(segment_or_segment_name) - if segment_or_segment_name.kind_of?(RGFA::Line) - s = segment_or_segment_name - sn = segment_or_segment_name.name - else - sn = segment_or_segment_name.to_sym - s = segment(sn) - end - return s, sn - end - -end diff --git a/lib/rgfatools/p_bubbles.rb b/lib/rgfatools/p_bubbles.rb deleted file mode 100644 index 013ca64..0000000 --- a/lib/rgfatools/p_bubbles.rb +++ /dev/null @@ -1,67 +0,0 @@ -# -# Methods for the RGFA class, which involve a traversal of the graph following -# links -# -module RGFATools::PBubbles - - require "set" - - # Removes all p-bubbles in the graph - # @return [RGFA] self - def remove_p_bubbles - visited = Set.new - segments.each do |s| - sn = s.name - next if visited.include?(sn) - if s.connectivity == [1,1] - s1 = s.neighbours_of_end(:L)[0] - s2 = s.neighbours_of_end(:R)[0] - n1 = s1.neighbours.sort - n2 = s2.neighbours.sort - n1.each {|se| visited << se[0].name} - if n1 == n2.map{|se| se.invert_end_type} - remove_proven_p_bubble(s1, s2, n1) - end - end - end - return self - end - - # Removes a p-bubble between segment_end1 and segment_end2 - # @param [RGFA::SegmentEnd] segment_end1 a segment end - # @param [RGFA::SegmentEnd] segment_end2 another segment end - # @!macro [new] count_tag - # @param count_tag [Symbol] (defaults to: +:RC+ or the value set by - # {#set_default_count_tag}) the count tag to use for coverage - # computation - # @!macro [new] unit_length - # @param unit_length [Integer] (defaults to: 1 or the value set by - # {#set_count_unit_length}) the unit length to use for coverage - # computation - # @return [RGFA] self - # - def remove_p_bubble(segment_end1, segment_end2, - count_tag: @default[:count_tag], - unit_length: @default[:unit_length]) - n1 = neighbours_of_end(segment_end1).sort - n2 = neighbours_of_end(segment_end2).sort - raise if n1 != n2.map{|se| se.invert_end_type} - raise if n1.any? {|se| se[0].connectivity != [1,1]} - remove_proven_p_bubble(segment_end1, segment_end2, n1, - count_tag: count_tag, - unit_length: unit_length) - return self - end - - private - - def remove_proven_p_bubble(segment_end1, segment_end2, alternatives, - count_tag: @default[:count_tag], - unit_length: @default[:unit_length]) - coverages = alternatives.map{|s|segment!(s[0]).coverage( - count_tag: count_tag, unit_length: unit_length)} - alternatives.delete_at(coverages.index(coverages.max)) - alternatives.each {|s| segment(s[0]).disconnect} - end - -end diff --git a/lib/rgfatools/superfluous_links.rb b/lib/rgfatools/superfluous_links.rb deleted file mode 100644 index ed9b4d4..0000000 --- a/lib/rgfatools/superfluous_links.rb +++ /dev/null @@ -1,65 +0,0 @@ -# -# Methods which edit the graph components without traversal -# -module RGFATools::SuperfluousLinks - - # Remove superfluous links in the presence of mandatory links - # for a single segment - # @return [RGFA] self - # @!macro segment_param - # @!macro [new] conserve_components_links - # @param [Boolean] conserve_components (Defaults to: +true+) - # delete links only if #cut_link?(link) is +false+ (see RGFA API). - def enforce_segment_mandatory_links(segment, conserve_components: true) - s, sn = segment_and_segment_name(segment) - se = {} - l = {} - [:L, :R].each do |et| - se[et] = [sn, et] - l[et] = segment(s).dovetails_of_end(et) - end - if l[:L].size == 1 and l[:R].size == 1 - oe = {} - [:L, :R].each {|et| oe[et] = l[et][0].other_end(se[et])} - return if oe[:L] == oe[:R] - [:L, :R].each {|et| delete_other_links(oe[et], se[et], - conserve_components: conserve_components)} - else - i = (l[:L].size == 1) ? 0 : ((l[:R].size == 1) ? 1 : nil) - return if i.nil? - et = [:L, :R][i] - oe = l[et][0].other_end(se[et]) - delete_other_links(oe, se[et], conserve_components: conserve_components) - end - self - end - - # Remove superfluous links in the presence of mandatory links - # in the entire graph - # @!macro conserve_components_links - # @return [RGFA] self - def enforce_all_mandatory_links(conserve_components: true) - segment_names.each {|sn| enforce_segment_mandatory_links(sn, - conserve_components: conserve_components)} - self - end - - # Remove links of segment to itself - # @!macro segment_param - # @return [RGFA] self - def remove_self_link(segment) - segment = segment!(segment) if !segment.kind_of?(RGFA::Line) - segment.dovetails.each do |e| - e.disconnect! if e.from == e.to - end - self - end - - # Remove all links of segments to themselves - # @return [RGFA] self - def remove_self_links - segment_names.each {|sn| remove_self_link(sn)} - self - end - -end diff --git a/manual/TODO b/manual/TODO deleted file mode 100644 index 992a90e..0000000 --- a/manual/TODO +++ /dev/null @@ -1 +0,0 @@ -work on graph_operations.md diff --git a/manual/alignments.md b/manual/alignments.md index 058247d..be423c1 100644 --- a/manual/alignments.md +++ b/manual/alignments.md @@ -5,76 +5,77 @@ overlaps; E/F: alignment). If an alignment is not given, the placeholder symbol ```*``` is used instead. In GFA1 the alignments can be given as CIGAR strings, in GFA2 also as Dazzler traces. -RGFA uses different classes (in module RGFA::Alignment) for representing the two +Gfapy uses different classes (in module gfapy::Alignment) for representing the two possible alignment styles (cigar strings and traces) and undefined alignments (placeholders). ### Creating an alignment An alignment instance is usually created from its GFA string representation -by using the ```String#to_alignment``` method: -```ruby -"*".to_alignment # => RGFA::Alignment::Placeholder -"10,10,10".to_alignment # => RGFA::Alignment::Trace -"30M2I30M".to_alignment # => RGFA::Alignment::CIGAR -``` - -The alignment classes also provide a ```to_alignment``` -method (returning self), so that is always safe to call the method on a +or from a list by using the ```gfapy.Alignment``` constructor. +If the argument is an alignment object it will be returned, +so that is always safe to call the method on a variable which can contain a string or an alignment instance: -```ruby -RGFA::Alignment::Placeholder.new.to_alignment -RGFA::Alignment::Trace.new([12,13,0]).to_alignment + +```python +gfapy.Alignment("*") # => gfapy.AlignmentPlaceholder +gfapy.Alignment("10,10,10") # => gfapy.Trace +gfapy.Alignment([10,10,10]) # => gfapy.Trace +gfapy.Alignment("30M2I30M") # => gfapy.CIGAR +gfapy.Alignment(gfapy.Alignment("*")) +gfapy.Alignment(gfapy.Alignment("10,10")) ``` ### Recognizing undefined alignments -The ```placeholder?``` method is available for strings and -alignment instances and is the correct way to understand if an alignment +The ```gfapy.is_placeholder()``` method allows to understand if an alignment field contains a defined value (cigar, trace) or not (placeholder). - -```ruby -"30M".to_alignment.placeholder? # => false -"10,10,10".to_alignment.placeholder? # => false -"*".to_alignment.placeholder? # => true -"*".placeholder? # => true -RGFA::Alignment::CIGAR.new([]).placeholder? # => true -RGFA::Alignment::Trace.new([]).placeholder? # => true -RGFA::Alignment::Placeholder.new.placeholder? # => true +The method works correctly with both alignment objects and their string +or list representation. + +```python +gfapy.is_placeholder(gfapy.Alignment("30M")) # => False +gfapy.is_placeholder(gfapy.Alignment("10,10")) # => False +gfapy.is_placeholder(gfapy.Alignment("*")) # => True +gfapy.is_placeholder("*") # => True +gfapy.is_placeholder("30M") # => False +gfapy.is_placeholder("10,10") # => True +gfapy.is_placeholder([]) # => True +gfapy.is_placeholder([10,10]) # => False ``` +Note that, as a placeholder is False in boolean context, just a +```if not aligment``` will also work, if alignment is an alignment object, +but not if it is a string representation. + ### Reading and editing CIGARs CIGARs are represented by arrays of cigar operation objects. -Each cigar operation provides the methods ```len```/```len=``` and -```code```/```code=```. Len is the length of the operation (Integer). +Each cigar operation provides the properties ```length``` and +```code```. Length is the length of the CIGAR operation (int). +Code is one of the codes allowed by the GFA specification. -```ruby -cigar = "30M".to_alignment -cigar.kind_of?(Array) # => true +```python +cigar = gfapy.Alignment("30M") +isinstance(cigar, list) # => True operation = cigar[0] -operation.class # => RGFA::Alignment::CIGAR::Operation -operation.code # => :M -operation.len # => 30 -operation.to_s # => "30M" -operation.code = :D -operation.len = 10 -operation.to_s # => "10D" +type(operation) # => "gfapy.CIGAR.Operation" +operation.code # => "M" +operation.code = "D" +operation.length # => 30 +len(operation) # => 30 +str(operation) # => "30D" ``` -CIGAR values can be edited using the methods ```len=``` and ```code=``` -of the single operations or editing the array itself (which allows e.g. -to add or remove operations). If the array is emptied, its -string representation will be ```*```. - -```ruby -cigar = "30M".to_alignment -cigar << RGFA::Alignment::CIGAR::Operation.new(12, :D) -cigar.to_s # "30M12D" -cigar.delete(cigar[1]) -cigar.to_s # "30M" -cigar.delete(cigar[0]) -cigar.to_s # "*" +The CIGAR object can be edited using the list methods. +If the array is emptied, its string representation will be ```*```. +```python +cigar = gfapy.Alignment("1I20M2D") +cigar[0].code = "M" +cigar.pop(1) +str(cigar) # => "1M2D" +cigar[:] = [] +str(Cigar) # => "*" ``` CIGARs consider one sequence as reference and another sequence @@ -83,10 +84,10 @@ compute the length of the alignment on the two sequences. These methods are used by the library e.g. to convert GFA1 L lines to GFA2 E lines (which is only possible if CIGARs are provided). -```ruby -cigar = "30M10D20M5I10M".to_alignment -cigar.length_on_reference # => 70 -cigar.length_on_query # => 65 +```python +cigar = gfapy.Alignment("30M10D20M5I10M") +cigar.length_on_reference() # => 70 +cigar.length_on_query() # => 65 ``` #### Validation @@ -97,17 +98,15 @@ The codes can be M, I, D or P. For GFA1 the other codes are formally accepted (no exception is raised), but their use is discouraged. An error is raised in GFA2 on validation, if the other codes are used. -```ruby -cigar = "30M10D20M5I10M".to_alignment -cigar.validate # no exception raised -cigar = "-30M".to_alignment -cigar.validate # raises an exception -cigar = "30X".to_alignment -cigar.validate # raises an exception -cigar = "10=".to_alignment(version: :gfa1) -cigar.validate # no exception raised -cigar = "10=".to_alignment(version: :gfa2) +```python +cigar = gfapy.Alignment("30M10D20M5I10M") +cigar.validate() # no exception raised +cigar[1].code = "L" cigar.validate # raises an exception +cigar = gfapy.Alignment("30M10D20M5I10M") +cigar[1].code = "X" +cigar.validate(version="gfa1") # no exception raised +cigar.validate(version="gfa2") # exception raised ``` ### Reading and editing traces @@ -117,9 +116,9 @@ using a trace spacing value. If traces are used, a trace spacing value must be defined in a TS integer tag, either in the header, or in the single lines which contain traces. -```ruby +```python gfa.header.TS # => the global TS value -gfa.edges(:x).TS # => an edge''s own TS tag +gfa.line("x").TS # => an edge''s own TS tag ``` ### Complement alignment @@ -131,23 +130,23 @@ two sequences are switched. This method is used by the library e.g. to compare links, as they can be expressed in different ways, by switching the two sequences. -```ruby -cigar = "2M1D3M".to_alignment -cigar.complement.to_s # => "3M1I2M" +```python +cigar = gfapy.Alignment("2M1D3M") +str(cigar.complement()) # => "3M1I2M" ``` -The current version of RGFA does not provide a way to compute the alignment in -RGFA, thus the trace information can be accessed and edited, but not used for -this purpose. Because of this there is currently no way in RGFA to compute a +The current version of gfapy does not provide a way to compute the alignment in +gfapy, thus the trace information can be accessed and edited, but not used for +this purpose. Because of this there is currently no way in gfapy to compute a complement trace (trace obtained when the sequences are switched). -```ruby -trace = "1,2,3".to_alignment -trace.complement.to_s # => "*" +```python +trace = gfapy.Alignment("1,2,3") +str(trace.complement()) # => "*" ``` The complement of a placeholder is a placeholder: -```ruby -"*".to_alignment.complement.to_s # => "*" +```python +str(gfapy.Alignment("*").complement()) # => "*" ``` diff --git a/manual/chapters b/manual/chapters index 2f6257c..76cf9f7 100644 --- a/manual/chapters +++ b/manual/chapters @@ -1,5 +1,5 @@ introduction.md -rgfa.md +gfa.md validation.md positional_fields.md placeholders.md @@ -12,4 +12,3 @@ custom_records.md comments.md errors.md graph_operations.md -extending_rgfa.md diff --git a/manual/comments.md b/manual/comments.md index cb45d3f..9d736cc 100644 --- a/manual/comments.md +++ b/manual/comments.md @@ -1,24 +1,22 @@ ## Comments GFA lines starting with a ```#``` symbol are considered comments. -In RGFA comments are represented by instances of RGFA::Line::Comment. -They have a similar interface to other line instances (see below), +In gfapy comments are represented by instances of ```gfapy.line.Comment```. +They have a similar interface to other line instances, with some differences, e.g. they do not support tags. -### Comments in RGFA objects +### Accessing the comments -Adding a comment to a RGFA object is done similary to other lines, by using the -```RGFA#<<(line)``` method. The comments of a RGFA object can be accessed -using the ```comments``` method. This returns an array of comment line -instances. To remove a comment from the RGFA, first find the instance (using -the #comments array), then call ```disconnect``` on the line instance or -``rm(line)``` on the RGFA object (passing the instance as parameter). +Adding a comment to a gfapy.Gfa instance is done similary to other lines, by using the +```add_line(line)``` method. The comments of a gfapy object can be accessed +using the ```comments``` method. This returns a list of comment line +instances. To remove a comment from the Gfa, you need to find the instance in +the list, and call ```disconnect()``` on it. -Examples: -```ruby -g << "# this is a comment" -g.comments.map(&:to_s) # => ["# this is a comment"] -g.comments[0].disconnect # or g.rm(g.comments[0]) +```python +g.add_line("# this is a comment") +[str(c) for c in g.comments] # => ["# this is a comment"] +g.comments[0].disconnect() g.comments # => [] ``` @@ -30,18 +28,20 @@ initial spacing characters, is included in the field +content+. The initial spacing characters can be read/changed using the +spacer+ field. The default value is a single space. +```python +g.add_line("# this is a comment") +c = g.comments[-1] +g.content # => "this is a comment" +g.spacer # => " " +``` + Tags are not supported by comment lines. If the line contains tags, these are nor parsed, but included in the +content+ field. -Trying to set or get tag values raises exceptions. +Trying to set tags values raises exceptions. -### Summary of comments-related API methods - -```ruby -RGFA#<<(comment_line) -RGFA#comments -RGFA::Line::Comment#disconnect -RGFA#rm(comment_line) -RGFA::Line::Comment#content/content= -RGFA::Line::Comment#spacer/spacer= +```python +c = gfapy.Line.from_string("# this is not a tag\txx:i:1") +c.content # => "this is not a tag\txx:i:1" +c.xx # => None +c.xx = 1 # raises an exception ``` - diff --git a/manual/custom_records.md b/manual/custom_records.md index 50ef899..69d9c01 100644 --- a/manual/custom_records.md +++ b/manual/custom_records.md @@ -4,7 +4,7 @@ According to the GFA2 specification, each line which starts with a non-standard record type shall be considered an user- or program-specific record. -RGFA allows to retrieve custom records and access their data using a similar +Gfapy allows to retrieve custom records and access their data using a similar interface to that for the predefined record types. It assumes that custom records consist of tab-separated fields and that the first field is the record type. @@ -15,29 +15,49 @@ or write custom record contents. ### Retrieving, adding and deleting custom records -The custom records contained in a RGFA object can be retrieved using its -```custom_records``` method. If no argument is provided, all custom -records are returned. If a record type symbol is provided (e.g. -```g.custom_records(:X)```), records of that type will be returned. +The custom records of a Gfa instance can be retrieved using its +```custom_records``` property. This returns a list of all custom records, +regardless of the record type. -Adding custom records to and removing them from a RGFA instance -is similar to any other line. So to delete a custom record, ```disconnect``` -is called on the instance, or ```rm(custom_record_line)``` on the RGFA object. -To add a custom record line, the instance or its string representation -is added using ```<<``` on the RGFA, e.g. ```g << "X\ta\tb"```. +To retrieve only the custom records of a given type use the method +```custom_records_of_type(record_type)```. + +```python +gfa.custom_records +gfa.custom_records_of_type("X") +``` + +Adding custom records to and removing them from a gfapy instance +is similar to any other line. So to delete a custom record, ```disconnect()``` +is called on the instance. To add a custom record line, the instance or its string representation +is added using ```add_line``` on the gfapy. + +```python +gfa.add_line("X\ta\tb") +gfa.custom_records("X")[-1].disconnect() +``` ### Tags -As RGFA cannot know how many positional fields are present when parsing custom +As gfapy cannot know how many positional fields are present when parsing custom records, an heuristic approach is followed, to identify tags. - A field resembles a tag if it starts with ```tn:d:``` where ```tn``` is a valid tag name and ```d``` a valid tag datatype (see Tags chapter). - The fields are parsed from the last to the first. As soon as a field is found which does not resemble a tag, all remaining fields are considered positionals (even if another field parsed later resembles a tag). +```python +gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100") +x1 = gfa.custom_records("X")[-1] +x1.cc # => 10 +x1.dd # => 100 +gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100\te") +x2 = gfa.custom_records("X")[-1] +x1.cc # => None +x1.dd # => None +``` + This parsing heuristics has some consequences on validations. Tags with an invalid tag name (such as starting with a number, or with a wrong number of letters), or an invalid tag datatype (wrong letter, or wrong number of letters) @@ -45,9 +65,16 @@ are considered positional fields. The only validation available for custom records tags is thus the validation of the content of the tag, which must be valid according to the datatype. +```python +gfa.add_line("X\ta\tb\tcc:i:10\tddd:i:100") +x = gfa.custom_records("X")[-1] +x.cc # => None +# (as ddd:i:100) is considered a positional field +``` + ### Positional fields -The positional fields in a custom record are called ```:field1, :field2, ...```. +The positional fields in a custom record are called ```"field1", "field2", ...```. The user can iterate over the positional field names using the array obtained by calling ```positional_fieldnames``` on the line. @@ -57,27 +84,22 @@ structural elements of the line). Due to the parsing heuristics mentioned in the Tags section above, invalid tags are sometimes wrongly taken as positional fields. Therefore, -the user shall validate the number of positional fields -(```line.positional_fieldnames.size```). +the user is responsible of validating the number of positional fields. + +```python +gfa.add_line("X\ta\tb\tcc:i:10\tdd:i:100") +x = gfa.custom_records("X")[-1] +len(x.positional_fieldnames) # => 2 +x.positional_fieldnames # => ["a", "b"] +``` ### Extensions -The support for custom fields is limited, as RGFA does not know which and +The support for custom fields is limited, as gfapy does not know which and how many fields are there and how shall they be validated. -It is possible to create an extension of RGFA, which defines new record +It is possible to create an extension of gfapy, which defines new record types: this will allow to use these record types in a similar way to the built-in types. However, extending the library requires sligthly more advanced programming than just using the predefined record types. -In the chapter Extending RGFA these extensions are discussed and an +In the chapter Extending gfapy these extensions are discussed and an example is made. - -### Summary of custom-record related API methods -``` -RGFA#custom_records -RGFA#custom_records(record_type) -RGFA#rm(custom_record_line) -RGFA#<<(custom_record_string) -RGFA::Line::CustomRecord#disconnect -RGFA::Line::CustomRecord#positional_fieldnames -RGFA::Line::CustomRecord#field1/field2/... -``` diff --git a/manual/errors.md b/manual/errors.md index 13c9ac0..3526607 100644 --- a/manual/errors.md +++ b/manual/errors.md @@ -1,7 +1,7 @@ ## Errors -All exception raised in the library are subclasses of RGFA::Error. -This means that ```rescue RGFA::Error``` catches all library errors. +All exception raised in the library are subclasses of gfapy.Error. +This means that ```except gfapy.Error``` catches all library errors. Different types of errors are defined and are summarized in the following table: @@ -30,4 +30,3 @@ by the programmer (e.g. a value is implied to be positive at a certain point of the code). It the checks fails, an assertion error is raised. The user may report the problem, as this may indicate a bug (unless the user did something he was not supposed to do, such as calling an API private method). - diff --git a/manual/extending_rgfa.md b/manual/extending_rgfa.md deleted file mode 100644 index 49875af..0000000 --- a/manual/extending_rgfa.md +++ /dev/null @@ -1,336 +0,0 @@ -## Extending RGFA - -The RGFA library is designed to be easily extended, although its extensions -requires more knowledge of the Ruby languange, than what is necessary for -merely using the library. - -The GFA2 format can be extended by defining new line types. These are handled -using the custom records functionality, but the support is limited: e.g. -validation, parsing of the field content, references to other lines and access -to fields by name are not possible. All this is made possible by extensions. - -### An example of user-specific record types - -This chapter gives an example on how to extend the -library to define an user-specific record type and custom field datatypes. -As an example, we will define a record type for metagenomics applications -with code M. This will have the role to define taxon-specific subgraphs, -by putting segments in relation with a taxon. The taxa themselves -will be declared in lines with code T: - -Each T line will contain: -- tid: a taxon ID -- name: an organism name (text field) -- the tags may contain an URL tag, which will point to a website, - describing the organism (UL tag, string) - -Each M line will contain: -- mid: an optional assignment ID -- tid: a taxon ID -- sid: a reference to a segment -- score: an optional Phred-style integer score, which will define an error - probability of the assignment of the segment to a taxon - -Here is an example of GFA containing the new line types: -``` -S A 1000 * -T B12_c -M 1 taxon:123 A 40 xx:Z:cjaks536 -M 2 taxon:123 B * xx:Z:cga5r5cs -S B 1000 * -M * B12_c B 20 -T taxon:123 UL:http://www.taxon123.com -``` - -### Subclassing RGFA::Line - -Defining a new record type for RGFA requires to create a new subclass of -the RGFA::Line class. -Thereby some constants must be defined: - -- ```RECORD_TYPE``` must contain the record type as symbol. -- ```POSFIELDS``` is an array of symbols, indicating the sequence - of positional fields in the record -- ```PREDEFINED_TAGS``` contain an array of predefined optional - tag names. -- ```DATATYPE``` is an hash. Each key is a symbol, either contained in - POSFIELDS or in PREDEFINED_TAGS. The value is a datatype symbol: - see the RGFA::Field module for a list of possible datatypes. -- ```NAME_FIELD``` is the field which contains the name of the line, if any -- ```STORAGE_KEY``` is the field which shall be used as a key for storing - references of the line in RGFA; for custom subclasses, - set it to ```:name``` if the line has a name field, to ```nil``` otherwise -- ```FIELD_ALIAS``` ia an hash which contain aliases to field names; - it may be empty -- ```REFERENCE_FIELDS``` is a list of fields which contain references - (or arrays of references) to other lines. The references may contain - an orientation. -- ```BACKREFERENCE_RELATED_FIELDS``` is a list of fields which shall - not be changed in a connected line without potentially invaliding - backreferences to the line. In the predefined line types, these are - the fields containing match coordinates in GFA2 edges (as they change their - nature as internal, dovetails or containments) and the orientation and overlap - fields in GFA1 links. -- ```DEPENDENT_LINES``` and ```OTHER_REFERENCES``` are lists - of names of references collections, which will - contain backreferences to other line types (which refer the line type in their - fields). E.g. for a segment, the list contain the ```:fragments``` symbol, - indicating that a collection - shall be initialized, which will contain backreferences to the fragments - which reference the segment. - Disconnection is cascaded to lines in the collections named in - DEPENDENT_LINES but not to those named in OTHER_REFERENCES. - -For our example, we will define the subclasses for record types T and M. - -```ruby -class RGFA::Line::Taxon < RGFA::Line - - RECORD_TYPE = :T - POSFIELDS = [:tid, :desc] - PREDEFINED_TAGS = [:UL] - DATATYPE = { - :tid => :identifier_gfa2, - :desc => :Z, - :UL => :Z, - } - NAME_FIELD = :tid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:metagenomic_assignments] - OTHER_REFERENCES = [] - - apply_definitions - -end - -class RGFA::Line::MetagenomicAssignment < RGFA::Line - - RECORD_TYPE = :M - POSFIELDS = [:mid, :tid, :sid, :score] - PREDEFINED_TAGS = [] - DATATYPE = { - :mid => :optional_identifier_gfa2, - :tid => :identifier_gfa2, - :sid => :identifier_gfa2, - :score => :optional_integer, - } - NAME_FIELD = :mid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [:tid, :sid] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - -end -``` - -### Enabling the references - -If reference fields have been defined (as in the previous example of M, where -tid is a reference to a taxon line and sid is a reference to a segment line), -a private ```initialize_references``` -method shall be provided, which is called when a line of the type is connected -to a RGFA instance. - -In particular, the method shall change all identifiers in the reference -fields into references to lines in the GFA (either existing lines or -virtual lines, which is the way RGFA handles forward-pointing references). - -If the referenced line is not yet available, but it may be defined by -the GFA at a later time, the method will create a virtual line. -In our example, we know that the reference is to a segment or a taxon line. -If we would not know that we would instantiate RGFA::Line::Unknown. - -When the field content itself is a reference, the content cannot be -changed directly (using set would raise an exception, as the line is -already connected when the initialize_referneces method is called). -Therefore, the private line method set_existing_field shall be used, -with ```set_reference: true```. If the reference field contains -an oriented line or an array instead, references can be edited directly. - -```ruby -class RGFA::Line::MetagenomicAssignment - - def initialize_references - s = @rgfa.segment(sid) - if s.nil? - s = RGFA::Line::Segment::GFA2.new([sid.to_s, "1", "*"], - virtual: true, version: :gfa2) - s.connect(@rgfa) - end - set_existing_field(:sid, s, set_reference: true) - s.add_reference(self, :metagenomic_assignments) - - t = @rgfa.line(tid) - if t.nil? - t = RGFA::Line::Taxon.new([tid.to_s, ""], - virtual: true, version: :gfa2) - t.connect(@rgfa) - end - set_existing_field(:tid, t, set_reference: true) - t.add_reference(self, :metagenomic_assignments) - end - private :initialize_references - -end -``` - -The method defined backreferences to the new line in the -segment and taxon instances, using :metagenomic_assignments as name for the collection -of backreferences in S or T lines to lines of type M. For taxa, this collection -has been defined in the class definition above. For segments, we will need to -add this collection to the segment definition and redefine the reference getters -methods. As lines of type M will be dependent on S lines -(ie they shall be deleted if the referred segment line is deleted), we will -add it to the DEPENDENT_LINES list. In case of no dependency, we would use the -OTHER_REFERENCES list instead. - -```ruby -class RGFA::Line::Segment::GFA2 - DEPENDENT_LINES << :metagenomic_assignments - define_reference_getters -end -``` - -### Recognizing the record type code - -When parsing lines starting with the code for the new record type, -we want RGFA to return an instance of the correct subclass of Line. - -To obtain this, the ```subclass``` class Method of ```RGFA::Line``` must -be extended to handle the new record_type symbol, for GFA2 or -unknown version records. It must return a class (the new subclass of RGFA::Line). -The new record symbols must also be added to the gfa2 specific -symbols list in ```RECORD_TYPE_VERSIONS[:specific][:gfa2]```. - -In our example the method ```subclass``` will be patched as follows: - -```ruby -class RGFA::Line - class << self - alias_method :orig_subclass, :subclass - def subclass_GFA2(record_type, version: nil) - if version.nil? or version == :gfa2 - case record_type.to_sym - when :M then return RGFA::Line::MetagenomicAssignment - when :T then return RGFA::Line::Taxon - end - end - orig_subclass(record_type, version: version) - end - end - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :M - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :T -end -``` - -### Allowing to find records - -Both record types T and M define a name field. This allows to find record of -the types using the ```line()``` method of the ```RGFA``` class, as well as -allowing to replace virtual T lines created while parsing M lines, with real T -lines, when these are found. For this to work, the codes must be added to the -list ```RECORDS_WITH_NAME``` of the ```RGFA``` class: - -```ruby -RGFA::RECORDS_WITH_NAME << :T -RGFA::RECORDS_WITH_NAME << :M -``` - -### Defining a field datatype - -When new subclasses of line are created, it may be necessary or useful to -create new datatypes for its fields. For example, we used :identifier_gfa2 for -the tid field in the M and T records. However, we could made the field syntax -stricter, and require that the content of the field must be either a reference -to the NCBI taxonomy database or a custom identifier. In the first case, it -will need to be in the form ```taxon:```, where `````` is a positive -integer. In the second case, it will need to be a combination of letters, -numbers and underscores (thereby ```:``` will not be allowed). - -A module must be created, which handles the parsing and writing of fields with -the new datatype. -The module shall define six module functions -(see the API documentation of the RGFA::Field module for more detail). -Decode and unsafe_decode take a string as -argument and return an appropriate Ruby object. Encode and unsafe_encode take -a string representation or another ruby object and converts it into the correct -string representation. Validate_encoded validates the string representation. -Validate_decoded validates a non-string content of the field. The unsafe -version of the decode and encode methods may provide faster results and are -used if the parameters are guaranteed to be valid. The safe version must check -the validity of the provided data. - -```ruby -module RGFA::Field::TaxonID - - def validate_encoded(string) - if string !~ /^taxon:(\d+)$/ and string !~ /^[a-zA-Z0-9_]+$/ - raise RGFA::ValueError, "Invalid taxon ID: #{string}" - end - end - module_function :validate_encoded - - def unsafe_decode(string) - string.to_sym - end - module_function :unsafe_decode - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - module_function :decode - - def validate_decoded(object) - case object - when RGFA::Line::Taxon - validate_encoded(object.name.to_s) - when Symbol - validate_encoded(object.to_s) - else - raise RGFA::TypeError, - "Invalid type for taxon ID: #{object.inspect}" - end - end - module_function :validate_decoded - - def unsafe_encode(object) - object = object.name if object.kind_of?(RGFA::Line::Taxon) - object.to_s - end - module_function :unsafe_encode - - def encode(object) - validate_decoded(object) - unsafe_encode(object) - end - module_function :encode - -end -``` - -The new datatype must have a symbol which identifies it. The symbol must be -added to the ```GFA2_POSFIELD_DATATYPE``` list of the ```RGFA::Field``` module. -An entry must be added to the ```RGFA::Field::FIELD_MODULE``` -hash, where the symbol of the new datatype is the key and the value is the -module. - -```ruby -RGFA::Field::GFA2_POSFIELD_DATATYPE << :taxon_id -RGFA::Field::FIELD_MODULE[:taxon_id] = RGFA::Field::TaxonID -``` - -Now the new datatype can be put into use by changing the datatype for the tid -fields of the M and T lines: - -```ruby -RGFA::Line::Taxon::DATATYPE[:tid] = :taxon_id -RGFA::Line::MetagenomicAssignment::DATATYPE[:tid] = :taxon_id -``` diff --git a/gfapy_manual/gfa.md b/manual/gfa.md similarity index 100% rename from gfapy_manual/gfa.md rename to manual/gfa.md diff --git a/manual/gfapy-manual.pdf b/manual/gfapy-manual.pdf new file mode 100644 index 0000000..7025a66 Binary files /dev/null and b/manual/gfapy-manual.pdf differ diff --git a/manual/graph_operations.md b/manual/graph_operations.md index 4271fb8..9193c7c 100644 --- a/manual/graph_operations.md +++ b/manual/graph_operations.md @@ -1,6 +1,8 @@ ## Graph operations -Some graph operations are provided by the RGFA library. -These are described in the RGFA1 paper and in the API documentation. -Note that some operations are completed by additional features, -which are available when RGFATools is used. +Graph operations such as linear paths merging, +multiplication of segments and other are provided. +These operations are similar to those provided by the +RGFA library: +please refer to the RGFA paper (Gonnella and Kurtz, 2016) +for a description and to the API documentation. diff --git a/manual/header.md b/manual/header.md index e323c8a..0362ffe 100644 --- a/manual/header.md +++ b/manual/header.md @@ -1,17 +1,24 @@ ## The Header -GFA files may contain one or multiple header lines (record type: H). These +GFA files may contain one or multiple header lines (record type: "H"). These lines may be present in any part of the file, not necessarily at the beginning. Although the header may consist of multiple lines, its content refers to the -whole file. Therefore in RGFA the header is accessed using a single line +whole file. Therefore in gfapy the header is accessed using a single line instance (accessible by the ```header``` method). Header lines contain only -tags. If not header line is present in the GFA, then the header line object +tags. If not header line is present in the Gfa, then the header line object will be empty (i.e. contain no tags). -Header lines cannot be connected to the RGFA as other lines (i.e. calling -```connect``` on them raises an exception). Instead they are merged to the -existing header, when the ```add_line(line)``` method is called on the RGFA. +Note that header lines cannot be connected to the gfapy as other lines +(i.e. calling ```connect``` on them raises an exception). Instead they +must be merged to the existing Gfa header, using +```add_line(line)``` on the gfa instance. + +```python +gfapy.Line.from_string("H\tnn:f:1.0").connect(gfa) # exception +gfa.add_line("H\tnn:f:1.0") # this works! +gfa.header.nn # => 1.0 +``` ### Multiple definitions of the predefined header tags @@ -19,12 +26,10 @@ For the predefined tags (```VN``` and ```TS```), the presence of multiple values in different lines is an error, unless the value is the same in each instance (in which case the repeated definitions are ignored). -``` -H VN:Z:1.0 -# other lines -# ... -# the following raises an exception: -H VN:Z:2.0 +```python +gfa.add_line("H\tVN:Z:1.0") +gfa.add_line("H\tVN:Z:1.0") # ignored +gfa.add_line("H\tVN:Z:2.0") # exception! ``` ### Multiple definitions of custom header tags @@ -42,78 +47,80 @@ sections. Reading, validating and setting the datatype of multi-definition tags is done using the same methods as for all other lines (see Tags chapter). However, if a tag is defined multiple times on multiple H lines, reading -the tag will return an array of the values on the lines. This array is an -instance of the subclass ```RGFA::FieldArray``` of Array. - -``` -H xx:i:1 -H xx:i:2 -H xx:i:3 -# => gfa.header.xx value is RGFA::FieldArray[1,2,3] +the tag will return a list of the values on the lines. This array is an +instance of the subclass ```gfapy.FieldArray``` of list. + +```python +gfa.add_line("H\txx:i:1") +gfa.add_line("H\txx:i:2") +gfa.add_line("H\txx:i:3") +gfa.header.xx # => gfapy.FieldArray("i", [1,2,3]) ``` -### Setting a tag - -Calling set, if a tag was already defined, overwrites its value. -For this reason, another method is defined, for supporting multi-definition -tags: ```add```. When ```add(tagname, value)``` is called on the RGFA header, -if the tag does not exist, add will be a synonymous of set and simply create -it. If it exists, it creates a field array (if a single value was present) -or adds the new value to the existing field array (if multiple values were -present). - -```ruby -# header.xx is not set -gfa.header.add(:xx, 1) -# header.xx is 1 -gfa.header.add(:xx, 2) -# header.xx is a field array [1,2] +### Setting tags + +There are two possibilities to set a tag for the header. The first is the +normal tag interface (using ```set``` or the tag name property). The second +is to use ```add```. The latter supports multi-definition tags, i.e. it +adds the value to the previous ones (if any), instead of overwriting them. + +```python +gfa.header.xx # => None +gfa.header.add("xx", 1) +gfa.header.xx # => 1 +gfa.header.add("xx", 2) +gfa.header.xx # => gfapy.FieldArray("i", [1,2]) +gfa.header.set("xx", 3) +gfa.header.xx # => 3 ``` ### Modifying field array values Field arrays can be modified directly (e.g. adding new values or removing some -values). However, if this is done, some additional work is sometimes needed. - -First, if values are added to the array, or its values -are modified, the user is responsible to check that the array values -remain compatible with the datatype of the tag (which can be checked -by calling ```validate_field(tagname)``` on the header). - -```ruby -gfa.header.xx # => RGFAFieldArray[1,2,3] -gfa.header.xx << 4 -gfa.header.xx << 5 -gfa.validate_field(:xx) +values). After modification, the user may check if the array values +remain compatible with the datatype of the tag using the ```validate_field``` +method. + +```python +gfa.header.xx # => gfapy.FieldArray([1,2,3]) +gfa.header.validate_field("xx") # => True +gfa.header.xx.append("X") +gfa.header.validate_field("xx") # => False ``` -Second, if the field array is modified using array methods (such as ```map```) -which return an Array class instance, this must be transformed back into a field -array calling ```to_rgfa_field_array(datatype)``` method; thereby datatype -can be set to the value returned by calling ```get_datatype(tagname)``` +If the field array is modified using array methods which return a list or data +of any other type, a field array must be constructed, setting its +datatype to the value returned by calling ```get_datatype(tagname)``` on the header. -```ruby -gfa.header.map = gfa.header.map {|elem| elem + 1}. - to_rgfa_field_array(gfa.header.get_datatype(:xx)) +```python +gfa.header.xx # => gfapy.FieldArray([1,2,3]) +gfa.header.xx = gfa.FieldArray(gfa.header.get_datatype("xx"), + map(lambda x: x+1, gfa.header.xx)) +gfa.header.xx # => gfapy.FieldArray([2,3,4]) ``` ### String representation of the header -Note that when converting the header line to string, a single-line string is -returned, eventually with multiple instances of the tag (in which case it is -not standard-compliant). Similarly when calling #field_to_s on a field array -tag, the output string will contain the instances of the tag, separated by -tabs. However, when the RGFA is output to file or string, the header is +For consinstency with other line types, the string representation of +the header is a single-line string, eventually non standard-compliant, +if it contains multiple instances of the tag. +(and when calling ```field_to_s(tag)``` for a tag present multiple +times, the output string will contain the instances of the tag, separated by +tabs). + +However, when the gfapy is output to file or string, the header is splitted into multiple H lines with single tags, so that standard-compliant GFA -is output. These can be retrieved using the ```headers``` method on the RGFA: - -```ruby -gfa.header.to_s # H VN:Z:1.0 xx:i:1 xx:i:2 (compact, but invalid GFA) -gfa.header.field_to_s(:xx) # => xx:i:1 xx:i:2 -gfa.headers # => [] of three Header instances, with a single tag each -gfa.to_s # => (valid GFA) - # H VN:Z:1.0 - # H xx:i:1 - # H xx:i:2 +is output. The splitted header can be retrieved using the ```headers``` method +on the Gfa instance. + +```python +gfa.header.field_to_s("xx") # => "xx:i:1\txx:i:2" +str(gfa.header) # => "H\tVN:Z:1.0\txx:i:1\txx:i:2" +[str(h) for h in gfa.headers] # => ["H\tVN:Z:1.0", "H\txx:i:1", "H\txx:i:2"] +str(gfa) # => """ + H VN:Z:1.0 + H xx:i:1 + H xx:i:2 + """ ``` diff --git a/manual/introduction.md b/manual/introduction.md index 5b64056..ed485f7 100644 --- a/manual/introduction.md +++ b/manual/introduction.md @@ -1,6 +1,6 @@ -# RGFA +# gfapy -RGFA is a Ruby library for working with GFA files. It allows to parse, +gfapy is a python library for working with GFA files. It allows to parse, validate, edit and write GFA files. This manual explains how to access the information in GFA files using the @@ -9,7 +9,7 @@ each class, method and constant defined by the library. A test suite makes sure that the functionality described by this manual also works as intented. However, if this is not the case, please report any bug -using the Github issues tracked (https://github.com/ggonnella/rgfa/issues). +using the Github issues tracked (https://github.com/ggonnella/gfapy/issues). ## GFA specifications diff --git a/manual/list_of_api_methods b/manual/list_of_api_methods deleted file mode 100644 index 99baf69..0000000 --- a/manual/list_of_api_methods +++ /dev/null @@ -1,93 +0,0 @@ -# rgfa.rb -RGFA#vlevel -RGFA#vlevel=(level) -RGFA#version -RGFA.new(validate:, version:) -RGFA#validation -RGFA#to_s -RGFA#to_gfa1_s -RGFA#to_gfa2_s -RGFA#to_gfa1 -RGFA#to_gfa2 -RGFA#to_rgfa -RGFA#clone -RGFA#read_file(filename) -RGFA.from_file(filename, validate:, version:) -RGFA.to_file(filename) -RGFA#info -RGFA#n_dead_ends -RGFA#== -String#to_rgfa(validate:, version:) -Array#to_rgfa(validate:, version:) -# lib/rgfa/alignment* -String#to_alignment(version:, valid:) -RGFA::Alignment::CIGAR#complement -RGFA::Alignment::CIGAR#to_s -RGFA::Alignment::CIGAR#validate(version:) -RGFA::Alignment::CIGAR#to_alignment(valid:, version:) -RGFA::Alignment::CIGAR#clone -RGFA::Alignment::CIGAR#length_on_reference -RGFA::Alignment::CIGAR#length_on_query -RGFA::Alignment::CIGAR::Operation#len -RGFA::Alignment::CIGAR::Operation#code -RGFA::Alignment::CIGAR::Operation.new(len, code) -RGFA::Alignment::CIGAR::Operation#to_s -RGFA::Alignment::CIGAR::Operation#== -RGFA::Alignment::CIGAR::Operation#validate(version:) -RGFA::Alignment::Placeholder#complement -RGFA::Alignment::Placeholder#to_alignment(valid:, version:) -RGFA::Alignment::Trace#validate(ts:) -RGFA::Alignment::Trace#to_s -RGFA::Alignment::Trace#to_alignment(valid:, version:) -RGFA::Alignment::Trace#complement -# lib/rgfa/field_array.rb -RGFA::FieldArray.new -RGFA::FieldArray#validate -### Summary of headers-related API methods -RGFA#header -RGFA::Line::Header#add -RGFA::FieldArray#(array methods) -Array#to_rgfa_field_array(datatype) -### Summary of tags-related API methods -RGFA::Line#tn/tn!/tn= # tn = tag name -RGFA::Line#get/get!/set -RGFA::Line#delete -RGFA::Line#get_datatype/set_datatype -RGFA::Line#validate_field/validate -String#to_byte_array/to_numeric_array -Array#to_byte_array/to_numeric_array -RGFA::NumericArray/RGFA::ByteArray#to_s -RGFA::NumericArray/RGFA::ByteArray#validate -RGFA::NumericArray#compute_subtype -### rgfa.md -```ruby -RGFA.new -RGFA.from_file(filename) -RGFA#to_s -RGFA#to_file(filename) -RGFA#segments -RGFA#paths -RGFA#edges -RGFA#links -RGFA#containments -RGFA#groups -RGFA#fragments -RGFA#comments -RGFA#custom_lines -RGFA#custom_lines(key) -RGFA#header -RGFA#line(name) -RGFA#names -RGFA#segment_names -RGFA#edges_names -RGFA#gap_names -RGFA#path_names -RGFA#set_names -RGFA#external_names -RGFA#fragments_for_external(id) -RGFA#add_line(line) -RGFA#<<(line) -String#to_rgfa_line -RGFA#rm(line) -RGFA::Line#disconnect -``` diff --git a/manual/placeholders.md b/manual/placeholders.md index b4a4745..b7b47fb 100644 --- a/manual/placeholders.md +++ b/manual/placeholders.md @@ -1,38 +1,38 @@ ## Placeholders -Some positional fields may contain an undefined value S: sequence; L/C: -overlap; P: overlaps; E: eid, alignment; F: alignment; G: gid, var; U/O: pid. +Some positional fields may contain an undefined value S: ```sequence```; +L/C: ```overlap```; P: ```overlaps```; E: ```eid```, ```alignment```; +F: ```alignment```; G: ```gid```, ```var```; U/O: ```pid```. In GFA this value is represented by a ```*```. -In RGFA instances of the class RGFA::Placeholder (and its subclasses) represent +In gfapy instances of the class RGFA::Placeholder (and its subclasses) represent the undefined value. ### Distinguishing placeholders -The method #placeholder? is defined for placeholders and all classes whose -instances can be used as a value for fields where a placeholder is allowed. It -allows to check if a value is a placeholder instance or an equivalent value -(such as an empty array, or the string representation of the placeholder). +The method ```gfapy.is_placeholder()```` checks if a value is or would +be represented by a placeholder in GFA (such as an empty array, or +a string containing "*"). + +```python +gfapy.is_placeholder("*") # => True +gfapy.is_placeholder("**") # => False +gfapy.is_placeholder([]) # => True +gfapy.is_placeholder(gfapy.Placeholder()) # => True +``` + +Note that, as a placeholder is False in boolean context, just a +```if not placeholder``` will also work, if placeholder is a gfa.Placeholder() +but not if it is a string representation. ### Compatibility methods Some methods are defined for placeholders, which allow them to respond to the -same methods as defined values. For example, for all placeholders, #empty? -returns true; #validate does nothing; #length returns 0; #[] returns self; #+ -returns self. Thus in many cases the code can be written in a generic way, -without explicitely handling the different cases where a value is a placeholder -or not. - -### Summary of API methods related to placeholders - -```ruby -RGFA::Placeholder#to_s -RGFA::Placeholder#placeholder? -String/Symbol/Array/Integer#placeholder? -RGFA::Placeholder#empty? -RGFA::Placeholder#validate -RGFA::Placeholder#length -RGFA::Placeholder#[] -RGFA::Placeholder#+ -``` +same methods as defined values. This allows to write generic code. +```python +placeholder.validate() # does nothing +len(placeholder) # => 0 +placeholder[1] # => gfapy.Placeholder() +placeholder + anything # => gfapy.Placeholder() +``` diff --git a/manual/positional_fields.md b/manual/positional_fields.md index 6e3795f..4fbdd84 100644 --- a/manual/positional_fields.md +++ b/manual/positional_fields.md @@ -13,19 +13,21 @@ with it. The field names are derived from the specification. Lower case versions of the field names are used and spaces are subsituted with underscores. +In some cases, the field names were changed, as they represent keywords +in common programming languages (```from```, ```send```). -The following tables shows the field names used in RGFA, for each kind of line. +The following tables shows the field names used in gfapy, for each kind of line. Headers have no positional fields. Comments and custom lines follow particular rules, see the respective chapters. #### GFA1 field names -| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 | -|-------------|-----------------|---------------------|----------------|-----------------|---------------|---------------| -| Segment | ```name``` | ```sequence``` | | | | | -| Link | ```from``` | ```from_orient``` | ```to``` | ```to_orient``` | ```overlap``` | | -| Containment | ```from``` | ```from_orient``` | ```to``` | ```to_orient``` | ```pos``` | ```overlap``` | -| Path | ```path_name``` | ```segment_names``` | ```overlaps``` | | | | +| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 | +|-------------|--------------------|---------------------|------------------|-----------------|---------------|---------------| +| Segment | ```name``` | ```sequence``` | | | | | +| Link | ```from_segment``` | ```from_orient``` | ```to_segment``` | ```to_orient``` | ```overlap``` | | +| Containment | ```from_segment``` | ```from_orient``` | ```to_segment``` | ```to_orient``` | ```pos``` | ```overlap``` | +| Path | ```path_name``` | ```segment_names``` | ```overlaps``` | | | | #### GFA2 field names @@ -35,14 +37,14 @@ rules, see the respective chapters. | Edge | ```eid``` | ```sid1 ``` | ```sid2 ``` | ```beg1 ``` | ```end1 ``` | ```beg2 ``` | ```end2 ``` | ```alignment``` | | Fragment | ```sid``` | ```external``` | ```s_beg ``` | ```s_end``` | ```f_beg``` | ```f_end``` | ```alignment``` | | | Gap | ```gid``` | ```sid1 ``` | ```d1 ``` | ```d2 ``` | ```sid2 ``` | ```disp ``` | ```var ``` | | -| U\ Group | ```pid``` | ```items ``` | | | | | | | -| O\ Group | ```pid``` | ```items ``` | | | | | | | +| Set | ```pid``` | ```items ``` | | | | | | | +| Path | ```pid``` | ```items ``` | | | | | | | ### Datatypes The datatype of each positional field is described in the specification and cannot be changed (differently from tags). Here is a short description of the -Ruby classes used to represent data for different datatypes. For some +Python classes used to represent data for different datatypes. For some complex cases, more details are found in the following chapters. #### Placeholders @@ -50,44 +52,52 @@ complex cases, more details are found in the following chapters. The positional fields in GFA can never be empty. However, there are some fields with optional values. If a value is not specified, a placeholder character is used instead (```*```). Such undefined values are represented -in RGFA by the Placeholder class, which is described more in detail in the +in gfapy by the gfapy.Placeholder class, which is described more in detail in the Placeholders chapter. #### Arrays The ```items``` field in unordered and ordered groups and the ```segment_names``` and ```overlaps``` fields in paths are -lists of objects and are represented by Array instances. +lists of objects and are represented by list instances. + +```python +type(set.items) # => "list" +type(gfa2_path.items) # => "list" +type(gfa1_path.segment_names) # => "list" +type(gfa1_path.overlaps) # => "list" +``` #### Orientations -Orientations are represented by symbols. Applying the ```invert``` method -on an orientation symbol returns the other orientation, e.g. -```ruby -:+.invert # => :- +Orientations are represented by strings. The ```gfapy.invert``` method +applied to an orientation string returns the other orientation. +```python +gfapy.invert("+") # => "-" +gfapy.invert("-") # => "+" ``` #### Identifiers The identifier of the line itself (available for S, P, E, G, U, O lines) -can always be accessed in RGFA using the ```name``` alias and is represented -in RGFA by a Symbol. If it is optional (E, G, U, O lines) +can always be accessed in gfapy using the ```name``` alias and is represented +in gfapy by a Symbol. If it is optional (E, G, U, O lines) and not specified, it is represented by a Placeholder instance. The fragment identifier is also a Symbol. Identifiers which refer to other lines are also present in some line types (L, C, E, G, U, O, F). These are never placeholders and in stand-alone lines -are represented by symbols. In connected lines they are references to the Line +are represented by strings. In connected lines they are references to the Line instances to which they refer to (see the References chapter). #### Oriented identifiers Oriented identifiers (e.g. ```segment_names``` in GFA1 paths) are represented by elements of the class -```RGFA::OrientedLine```. The ```segment``` method of the oriented +```gfapy::OrientedLine```. The ```segment``` method of the oriented segments returns the segment identifier (or segment reference in connected -path lines) and the ```orient``` method returns the orientation symbol. -The ```name``` method returns the symbol of the segment, even if this is +path lines) and the ```orient``` method returns the orientation string. +The ```name``` method returns the string of the segment, even if this is a reference to a segment. A new oriented line can be created using the ```OL[line, orientation]``` method. @@ -96,7 +106,7 @@ To set the two attributes the methods ```segment=``` and ```orient=``` are available. Examples: -```ruby +```python p = "P\tP1\ta+,b-\t*".to_rgfa_line p.segment_names # => [OrientedLine(:a,:+),OrientedLine(:b,:-)] p[0].segment # => :a @@ -105,21 +115,21 @@ p[0].orient # => :+ p[0].invert # => OrientedLine(:a,:-) p[0].orient = :- p[0].segment = "S\tX\t*".to_rgfa_line -p[0] # => OrientedLine(RGFA::Line("S\tX\t*"), :-) +p[0] # => OrientedLine(gfapy::Line("S\tX\t*"), :-) p[0].name # => :X -p[0] = OL[RGFA::Line("S\tY\t*"), :+] +p[0] = OL[gfapy::Line("S\tY\t*"), :+] ``` #### Sequences -Sequences (S field sequence) are represented by strings in RGFA. +Sequences (S field sequence) are represented by strings in gfapy. Depending on the GFA version, the alphabet definition is more or less restrictive. The definitions are correctly applied by the validation methods. The method ```rc``` is provided to compute the reverse complement of a nucleotidic sequence. The extended IUPAC alphabet is understood by the method. Applied to non nucleotidic sequences, the results will be meaningless: -```ruby +```python "gcat".rc # => "atgc" "*".rc # => "*" (placeholder) "yatc".rc # => "gatr" (wildcards) @@ -134,7 +144,7 @@ are represented by integers. The ```var``` field is optional, and thus can be also a placeholder. Positions are 0-based coordinates. The position fields of GFA2 E lines (```beg1, beg2, end1, end2```) and -F lines (```s_beg, s_end, f_beg, f_end```) contain a dollar symbol as suffix +F lines (```s_beg, s_end, f_beg, f_end```) contain a dollar string as suffix if the position is equal to the segment length. For more information, see the Positions chapter. @@ -146,20 +156,20 @@ For more details, see the Alignments chapter. #### GFA1 datatypes -| Datatype | Record Type | Fields | -|--------------------------|-------------|------------------------------| -| Identifier | Segment | ```name ``` | -| | Path | ```path_name ``` | -| | Link | ```from, to ``` | -| | Containment | ```from, to ``` | -| [OrientedIdentifier] | Path | ```segment_names ``` | -| Orientation | Link | ```from_orient, to_orient``` | -| | Containment | ```from_orient, to_orient``` | -| Sequence | Segment | ```sequence ``` | -| Alignment | Link | ```overlap ``` | -| | Containment | ```overlap ``` | -| [Alignment] | Path | ```overlaps ``` | -| Position | Containment | ```pos ``` | +| Datatype | Record Type | Fields | +|--------------------------|-------------|---------------------------------| +| Identifier | Segment | ```name ``` | +| | Path | ```path_name ``` | +| | Link | ```from_segment, to_segment ``` | +| | Containment | ```from_segment, to_segment ``` | +| [OrientedIdentifier] | Path | ```segment_names ``` | +| Orientation | Link | ```from_orient, to_orient ``` | +| | Containment | ```from_orient, to_orient ``` | +| Sequence | Segment | ```sequence ``` | +| Alignment | Link | ```overlap ``` | +| | Containment | ```overlap ``` | +| [Alignment] | Path | ```overlaps ``` | +| Position | Containment | ```pos ``` | #### GFA2 datatypes @@ -185,30 +195,60 @@ For more details, see the Alignments chapter. ### Reading and writing positional fields -The ```RGFA::Line#positional_fieldnames``` method returns the list of the names -(as symbols) of the positional fields of a line. - -The positional fields can be read using a method on the RGFA line object, which +The ```positional_fieldnames``` method returns the list of the names +(as strings) of the positional fields of a line. +The positional fields can be read using a method on the gfapy line object, which is called as the field name. Setting the value is done with an equal sign version of the field name method (e.g. segment.slen = 120). In alternative, the ```set(fieldname, value)``` and ```get(fieldname)``` methods can also be used. +```python +s_gfa1.positional_fieldnames # => ["name", "sequence"] +s_gfa1.name # => "segment1" +s_gfa1.get("name") # => "segment3" +s_gfa1.name = "segment2" +s_gfa1.name # => "segment2" +s_gfa1.set("name", "segment3") +s_gfa1.name = "segment3" + +``` + When a field is read, the value is converted into an appropriate object. The string representation of a field can be read using the ```field_to_s(fieldname)``` method. -When setting a value, the user can specify the value of a tag either as a Ruby -object, or as the string representation of the value. +```python +link.from_segment # => gfapy.line.segment.GFA1("S\ts1\t*") +link.field_to_s(from_segment) # => ("s1") +``` + +When setting a non-string field, the user can specify the value of a tag +either as a Python non-string object, or as the string representation of the +value. + +```python +c.pos = 1 +c.pos = "1" +c.pos # => 1 +c.field_to_s("pos") # => "1" +``` Note that setting the value of reference and backreferences-related fields is generally not allowed, when a line instance is connected to a -RGFA object (see the References chapter). +gfapy object (see the References chapter). + +```python +s = gfa.Line.from_string("L\ts1\t+\ts2\t-\t*") +s.from_segment = "s3" +gfa.add_line(s) +s.from_segment = "s4" # raises an exception +``` ### Validation The content of all positional fields must be a correctly formatted -string according to the rules given in the GFA specifications (or a Ruby object +string according to the rules given in the GFA specifications (or a Python object whose string representation is a correctly formatted string). Depending on the validation level, more or less checks are done automatically @@ -218,6 +258,11 @@ the user can trigger a manual validation using the ```validate```, which does a full validation on the whole line, including all positional fields. +```python +line.validate_field("xx") +line.validate() +``` + ### Aliases For some fields, aliases are defined, which can be used in all contexts @@ -225,6 +270,18 @@ where the original field name is used (i.e. as parameter of a method, and the same setter and getter methods defined for the original field name are also defined for each alias, see below). +```python +gfa1_path.name == gfa1_path.path_name # True +edge.eid == edge.name # True +segment.sid == segment.name # True +containment.from_segment == containment.container # True + +s = gfapy.Line.from_string("S\t1\t*") +s.sid # => "1" +s.name = "a" +s.sid # => "a" +``` + #### Name Different record types have an identifier field: @@ -246,15 +303,6 @@ and ```name``` for GFA2 segments. The definition of from and to for containments is somewhat cryptical. Therefore following aliases have been defined for containments: -container[_orient] for from[_orient]; contained[_orient] for to[_orient] - -### Summary of positional fields-related API methods - -```ruby -RGFA::Line#/= -RGFA::Line#get/set -RGFA::Line#validate_field/validate -Symbol#invert -String#rc -``` +container[_orient] for from[_|segment|orient]; contained[_orient] +for to[_segment|orient]. diff --git a/manual/positions.md b/manual/positions.md index 81dc1e9..33ab569 100644 --- a/manual/positions.md +++ b/manual/positions.md @@ -7,51 +7,46 @@ in the container segment and is 0-based. Some fields in GFA2 E lines (```beg1, beg2, end1, end2```) and F lines (```s_beg, s_end, f_beg, f_end```) are positions. According to the specification, they are 0-based and represent -virtual ticks before and after each symbol in the sequence. +virtual ticks before and after each string in the sequence. Thus ranges are represented similarly to the Python range conventions: e.g. a 1-character prefix of a sequence will have begin 0 and end 1. -### GFA2 last position symbol +### GFA2 last position string -The GFA2 positions must contain an additional symbol (```$```) appended to the +The GFA2 positions must contain an additional string (```$```) appended to the integer, if (and only if) they are the last position in the segment sequence. -These particular positions are represented in RGFA as instances of the class -RGFA::LastPos. - -To create a lastpos instance, ```to_lastpos``` can be called on -an integer, or ```to_pos``` can be called on the string representation: -```ruby -12.to_lastpos # => RGFA::LastPos with value 12 -"12".to_pos # => 12 -"12$".to_pos # => RGFA::LastPos with value 12 +These particular positions are represented in gfapy as instances of the class +```gfapy.LastPos```. + +To create a lastpos instance, the constructor can be used with +an integer, or the string representation (which must end with the dollar sign, +otherwise an integer is returned): +```python +str(gfapy.LastPos(12)) # => "12$" +gfapy.LastPos("12") # => 12 +str(gfapy.LastPos("12")) # => "12" +gfapy.LastPos("12$") # => gfapy.LastPos(12) +str(gfapy.LastPos("12$")) # => "12$" ``` Subtracting an integer from a lastpos returns a lastpos if 0 subtracted, an integer otherwise. This allows to do some arithmetic on positions without making them invalid. -```ruby -12.to_lastpos - 0 # => RGFA::LastPos(value: 12) -12.to_lastpos - 1 # 11 -``` -The methods first? and last? allow to determine if a position value -is 0 (first?), or if it is a last position (last?), using the -same syntax fo lastpos and integer instances. -```ruby -0.first? # true -0.last? # false -12.first? # false -12.last? # false -"12".to_pos.first? # false -"12$".to_pos.last? # true -``` -### Summary of position-related API methods - -```ruby -String#to_pos -Integer#to_lastpos -Integer/RGFA::LastPos#first? -Integer/RGFA::LastPos#last? -RGFA::LastPos.- +```python +gfapy.LastPos(12) - 0 # => gfapy.LastPos(12) +gfapy.LastPos(12) - 1 # => 11 ``` +The functions ```gfapy.islastpos``` and ``isfirstpos``` +allow to determine if a position value is 0 (first), or the +last position, using the same syntax for lastpos and integer instances. + +```python +gfapy.isfirst(0) # True +gfapy.islast(0) # False +gfapy.isfirst(12) # False +gfapy.islast(12) # False +gfapy.islast(gfapy.LastPos("12")) # False +gfapy.islast(gfapy.LastPos("12$")) # True +``` diff --git a/manual/references.md b/manual/references.md index 71dd96f..8531886 100644 --- a/manual/references.md +++ b/manual/references.md @@ -1,30 +1,40 @@ ## References Some fields in GFA lines contain identifiers or lists of identifiers -(sometimes followed by orientation symbols), which reference -other lines of the GFA file. +(sometimes followed by orientation strings), which reference +other lines of the GFA file. In gfapy it is possible to follow these +references and traverse the graph. -### Connecting a line to a RGFA object +### Connecting a line to a gfapy object In stand-alone line instances, the identifiers which reference -other lines are symbols (or, if they are oriented identifiers, -then instances of RGFA::OrientedLine containing a symbol). -Lists of identifiers are represented by arrays of symbols and oriented -segment instances. - -When a line is connected to a RGFA object (adding the line using -```RGFA#<<(line)``` or calling ```RGFA::Line#connect(rgfa)```), -the symbols in the fields (and in arrays and oriented line instances) -are changed into references to the corresponding lines in the RGFA object. - -The method ```RGFA::Line#connected?``` allows to determine if -a line is connected to an RGFA instance. The method ```RGFA::Line#rgfa``` -returns the RGFA instance to which the line is connected. +other lines are either strings containing the line name, pairs +of strings (name and orientation) in a gfapy.OrientedLine object, +or lists of lines names or gfapy.OrientedLine objects. + +Using the ```add_line(line)``` (alias: ```append(line)```) method of the +gfapy.Gfa object, or the equivalent ```connect(gfa)``` method of the gfapy.Line +instance, a line is added to a Gfa instance (this is done automatically when a +GFA file is parsed). All strings expressing references are then changed into +references to the corresponding line objects. The method ```is_connected()``` +allows to determine if a line is connected to an gfapy instance. The read-only +property ```gfa``` allows to find the gfapy.Gfa instance to which the line is +connected. + +```python +link.is_connected() # => False +link.gfa # => None +link.from_segment # => "A" +link.connect(gfa) # or gfa.add_line(link); or gfa.append(link) +link.is_connected() # => True +link.gfa # => gfapy.Gfa(...) +link.from_segment # => gfapy.Segment("S\tA\t*", ...) +``` ### References for each record type -The following tables list the references for each record type. -```[]``` represent arrays. +The following tables describes the references contained in each record type. +The notation ```[]``` represent lists. #### GFA1 @@ -41,27 +51,31 @@ retrieved using ```links``` (which is not a field). #### GFA2 -| Record type | Fields | Type of reference | -|-------------|---------------|--------------------------------------| -| Edge | sid1, sid2 | Segment | -| Gap | sid1, sid2 | Segment | -| Fragment | sid | Segment | -| U Group | items | [Edge/O-Group/U-Group/Segment] | -| O Group | items | [OrientedLine(Edge/O-Group/Segment)] | +| Record type | Fields | Type of reference | +|-------------|---------------|----------------------------------| +| Edge | sid1, sid2 | Segment | +| Gap | sid1, sid2 | Segment | +| Fragment | sid | Segment | +| Set | items | [Edge/Set/Path/Segment] | +| Path | items | [OrientedLine(Edge/Set/Segment)] | ### Backreferences for each record type -When a line containing a reference to another line is connected to a RGFA +When a line containing a reference to another line is connected to a gfapy object, backreferences to it are created in the targeted line. -For each backreference collection a getter method exist, which is named -as the collection (e.g. ```RGFA::Line::Segment#dovetails_L```). -The methods return frozen arrays (as changing the content of -the array directly would invalid other related references in the graph object). -To change the reference which generated the backreference, see the section -"Editing reference fields" below. +For each backreference collection a read-only property exist, which is named +as the collection (e.g. ```dovetails_L``` for segments). Note that +the reference list returned by these arrays are read-only and editing +the references is done using other methods (see the section +"Editing reference fields" below). + +```python +segment.dovetails_L # => [gfapy.line.edge.Link(...), ...] +``` -The following tables list the backreferences collections for each record type. +The following tables describe the backreferences collections for each record +type. #### GFA1 @@ -94,44 +108,60 @@ The following tables list the backreferences collections for each record type. | | sets | U | | U Group | sets | U | -#### Backreference convenience methods +#### Segment backreference convenience methods -In some cases, additional methods are available which combine in different way +For segments, additional methods are available which combine in different way the backreferences information. - -The segment ```dovetails``` and ```gaps``` -methods take an optional argument. Without argument all dovetail overlaps -(references to links or dovetail edges) or gaps are returned. If :L or :R is -provided as argument, the dovetails overlaps (or gaps) of the left or, +The ```dovetails_of_end(end)``` and ```gaps_of_end(end)``` methods take an +argument "L" or "R" and return the dovetails overlaps (or gaps) of the left or, respectively, right end of the segment sequence are returned (equivalent to -dovetails_L/dovetails_R and gaps_L/gaps_R). +```dovetails_L```/```dovetails_R``` and ```gaps_L```/```gaps_R```). + The segment ```containments``` methods returns both containments where the segment is the container or the contained segment. -The segment ```edges``` method returns all edges (dovetails, containments +The segment ```edges``` property is a list of all edges (dovetails, containments and internals) with a reference to the segment. -Other methods -directly compute list of segments from the edges lists mentioned above. -In particular, -the segment ```neighbours``` method computes the set of segment +Other methods directly compute list of segments from the edges lists mentioned +above. The ```neighbours_L```, ```neighbours_R``` properties +and the ``neighbours(end)``` method computes the set of segment instances which are connected by dovetails to the segment. -The segment ```containers``` and ```contained``` methods similarly +The segment ```containers``` and ```contained``` properties similarly compute the set of segment instances which, respectively, contains the segment, or are contained in the segment. -### Multiline group definitions +```python +s.dovetails_of_end("L") # => [gfapy.line.edge.Link(...), ...] +s.dovetails_L == segment.dovetails_of_end("L") # => True +s.gaps_of_end("R") # => [] +s.edges # => [gfapy.line.edge.Link(...), ...] +s.neighbours_L # => [gfapy.line.segment.GFA1(...), ...] +s.containers # => [gfapy.line.segment.GFA1(...), ...] +``` -Groups can be defined on multiple lines, by using the same ID -for each line defining the group. If multiple RGFA::Line::Group -instances with the same ID are connected to the RGFA, the final -RGFA will only contain the last instance: all previous one are -disconnected and their items list prepended to the last instance. -All tags will be copied to the last instance added. +### Multiline group definitions -The tags of multiple line defining a group -may not contradict each other. Either are the tag names on different -lines defining the group all different, or, if the same tag is present -on different lines, the value and datatype must be the same. +The GFA2 specification opens the possibility (experimental) to +define groups on multiple lines, by using the same ID +for each line defining the group. This is supported by gfapy. + +This means that if multiple ```gfapy.line.group.Ordered``` or +```gfapy.line.group.Unordered``` instances connected to a gfapy have the same +```gid```, they are merged into a single instance (technically the +last one getting added to the graph object). The items list are merged. + +The tags of multiple line defining a group shall not contradict each other +(i.e. either are the tag names on different lines defining the group all +different, or, if the same tag is present on different lines, the value and +datatype must be the same, in which case the multiple definition will be +ignored). + +```python +gfa.add_line("U\tu1\ts1 s2 s3") +[s.name for s in gfa.sets[-1].items] # => ["s1","s2","s3"] +gfa.add_line("U\tu1\t4 5") +[s.name for s in gfa.sets[-1].items] # => ["s1","s2","s3","s4","s5"] +``` ### Induced set and captured path @@ -147,31 +177,46 @@ Furthermore groups may refer to other groups (set to sets or paths, paths to paths only), which then indirectly contain references to segments and edges. -RGFA provides methods for the computation of the sets of segments +gfapy provides methods for the computation of the sets of segments and edges which are implied by an ordered or unordered group. Thereby all references to subgroups are resolved and implicit elements are added, as described in the specification. The computation can, therefore, only be applied to connected lines. For unordered groups, this computation is provided by the method -```induced_set```, which returns an array of segment and edge instances. +```induced_set()```, which returns an array of segment and edge instances. For ordered group, the computation is provided by the method -```captured_path```, whcih returns a list of RGFA::OrientedLine instances, +```captured_path()```, whcih returns a list of gfapy.OrientedLine instances, alternating segment and edge instances (and starting and ending in segments). -The methods ```induced_segments_set```, ```induced_edges_set```, -```captured_segments``` and ```captured_edges``` return, respectively, +The methods ```induced_segments_set()```, ```induced_edges_set()```, +```captured_segments()``` and ```captured_edges()``` return, respectively, the list of only segments or edges, in ordered or unordered groups. -### Disconnecting a line from a RGFA object +```python +gfa.add_line("U\tu1\ts1 s2 s3") +u = gfa.sets[-1] +u.induced_edges_set # => [gfapy.line.edge.GFA2("E\te1\ts1+\ts2-...", ...)] +[l.name for l in u.induced_set ] # => ["s1", "s2", "s3", "e1"] +``` -Lines can be disconnected using ```RGFA#rm(line)``` or -```RGFA::Line#disconnect```. +### Disconnecting a line from a gfapy object + +Lines can be disconnected using the ```rm(line)``` method of the +```gfapy.Gfa``` object or the ```disconnect()``` method of the +line instance. + +```python +line = gfa.segment("sA") +gfa.rm(line) +# or equivalent: +line.disconnect() +``` Disconnecting a line affects other lines as well. Lines which are dependent on the disconnected line are disconnected as well. Any other reference to disconnected lines is removed as well. In the disconnected line, references -to lines are transformed back to symbols and backreferences are deleted. +to lines are transformed back to strings and backreferences are deleted. The following tables show which dependent lines are disconnected if they refer to a line which is being disconnected. @@ -185,38 +230,39 @@ refer to a line which is being disconnected. #### GFA2 -| Record type | Dependent lines | -|-------------|--------------------------------------------| -| Segment | edges, gaps, fragments, u-groups, o-groups | -| Edge | u-groups, o-groups | -| U-Group | groups | +| Record type | Dependent lines | +|-------------|-------------------------------------| +| Segment | edges, gaps, fragments, sets, paths | +| Edge | sets, paths | +| Sets | sets, paths | ### Editing reference fields In connected line instances, it is not allowed to directly change the content of fields containing references to other lines, as this would make the state of -the RGFA object invalid. +the gfapy object invalid. Besides the fields containing references, some other fields are read-only in connected lines. Changing some of the fields would require moving the backreferences to other collections (position fields of edges and gaps, -from_orient and to_orient of links). The overlaps field of connected links is +```from_orient``` and ```to_orient``` of links). The overlaps field of connected links is readonly as it may be necessary to identify the link in paths. #### Renaming an element -The name field of a line (e.g. segment name/sid) is not a reference and thus +The name field of a line (e.g. segment ```name```/```sid```) is not a reference and thus can be edited also in connected lines. When the name of the line is changed, no manual editing of references (e.g. from/to fields in links) is necessary, as all lines which refer to the line will still refer to the same instance. -The references to the instance in the RGFA lines collections will be +The references to the instance in the gfapy lines collections will be automatically updated. Also, the new name will be correctly used when -converting to string, such as when the RGFA is written to a GFA file. +converting to string, such as when the gfapy is written to a GFA file. Renaming a line to a name which already exists has the same effect of adding -a line with that name. That is, in most cases, ```RGFA::NotUniqueError``` is -raised. An exception are GFA2 groups: in this case -the line will be appended to the existing line with the same name. +a line with that name. That is, in most cases, ```gfapy.NotUniqueError``` is +raised. An exception are GFA2 sets and paths: in this case +the line will be appended to the existing line with the same name +(as described in "Multiline group definitions"). #### Adding and removing group elements @@ -225,19 +271,19 @@ non-connected lines, using the following methods. To add an item to or remove an item from an unordered group, use the methods ```add_item(item)``` and ```rm_item(item)```, which take as argument either -a symbol (identifier) or a line instance. +a string (identifier) or a line instance. To append or prepend an item to an ordered group, use the methods ```append_item(item)``` and ```prepend_item(item)```. To remove the first or the last item of an ordered group use the methods -```rm_first_item``` and -```rm_last_item```. +```rm_first_item()``` and +```rm_last_item()```. #### Editing read-only fields of connected lines Editing the read-only information of edges, gaps, links, containments, fragments and paths is more complicated. These lines shall be disconnected -before the edit and connected again to the RGFA object after it. Before +before the edit and connected again to the gfapy object after it. Before disconnecting a line, you should check if there are other lines dependent on it (see tables above). If so, you will have to disconnect these lines first, eventually update their fields and reconnect them at the end of the operation. @@ -245,9 +291,9 @@ eventually update their fields and reconnect them at the end of the operation. ### Virtual lines The order of the lines in GFA is not prescribed. Therefore, during parsing, -or constructing a RGFA in memory, it is possible that a line is referenced to, -before it is added to the RGFA instance. -Whenever this happens, RGFA creates a "virtual" line instance. +or constructing a gfapy in memory, it is possible that a line is referenced to, +before it is added to the gfapy instance. +Whenever this happens, gfapy creates a "virtual" line instance. Users do not have to handle with virtual lines, if they work with complete and valid GFA files. @@ -255,54 +301,23 @@ complete and valid GFA files. Virtual lines are similar to normal line instances, with some limitations (they contain only limited information and it is not allowed to add tags to them). To check if a line is a virtual line, one can use the -```RGFA::Line#virtual?``` method. +```is_virtual()``` method of the line. As soon as the parser founds the real line corresponding to a previously introduced virtual line, the virtual line is exchanged with the real line and all references are corrected to point to the real line. -### Summary of references-related API methods - -```ruby -RGFA#<<(line)/rm(line) -RGFA::Line#connect(rgfa) -RGFA::Line#disconnect -RGFA::Line#connected? -RGFA::Line#rgfa -RGFA::Line#virtual? -RGFA::Line::Segment::GFA1/GFA2#dovetails(_L|_R) -RGFA::Line::Segment::GFA1/GFA2#dovetails -RGFA::Line::Segment::GFA1/GFA2#neighbours -RGFA::Line::Segment::GFA1/GFA2#contain(ed|ers) -RGFA::Line::Segment::GFA1/GFA2#edges_to_contain(ed|ers) -RGFA::Line::Segment::GFA1/GFA2#containments -RGFA::Line::Segment::GFA1/GFA2#internals -RGFA::Line::Segment::GFA1/GFA2#edges -RGFA::Line::Segment::GFA2#gaps(_L|_R) -RGFA::Line::Segment::GFA2#gaps -RGFA::Line::Segment::GFA2#fragments -RGFA::Line::Segment::GFA1/GFA2#paths -RGFA::Line::Segment::GFA2#sets -RGFA::Line::Fragment#sid -RGFA::Line::Edge::Containment/Link#from/to -RGFA::Line::Gap/Edge::GFA2#sid1/sid2 -RGFA::Line::Gap/Edge::GFA2#sets/paths -RGFA::Line::Group::Path#segment_names -RGFA::Line::Group::Path#links -RGFA::Line::Group::Unordered#items -RGFA::Line::Group::Unordered#paths -RGFA::Line::Group::Unordered#add_item(item) -RGFA::Line::Group::Unordered#rm_item(item) -RGFA::Line::Group::Ordered#items -RGFA::Line::Group::Ordered#paths -RGFA::Line::Group::Ordered#append_item(item) -RGFA::Line::Group::Ordered#prepend_item(item) -RGFA::Line::Group::Ordered#rm_first_item -RGFA::Line::Group::Ordered#rm_last_item -RGFA::Line::Group::Ordered#captured_paths -RGFA::Line::Group::Ordered#captured_segments -RGFA::Line::Group::Ordered#captured_edges -RGFA::Line::Group::Unordered#induced_set -RGFA::Line::Group::Unordered#induced_segments_set -RGFA::Line::Group::Unordered#induced_edges_set +```python +g = gfapy.Gfa() +g.add_line("S\t1\t*") +g.add_line("L\t\1\t+\t2\t+\t*") +l = g.dovetails[-1] +g.segment("1").is_virtual() # => False +g.segment("2").is_virtual() # => True +l.to_segment == g.segment("2") # => True +g.segment("2").dovetails = [l] # => True +g.add_line("S\t2\t*") +g.segment("2").is_virtual() # => False +l.to_segment == g.segment("2") # => True +g.segment("2").dovetails = [l] # => True ``` diff --git a/manual/rgfa.md b/manual/rgfa.md deleted file mode 100644 index 7402314..0000000 --- a/manual/rgfa.md +++ /dev/null @@ -1,133 +0,0 @@ -## The RGFA object - -The main class of the library is RGFA. An object of the class RGFA represents -the content of a GFA file. - -A RGFA instance can be created directly (using the ```RGFA.new``` -method, or the method ```RGFA.from_file(filename)``` can be used to parse a -GFA file and create a RGFA instance from it. - -The ```to_s``` method converts the RGFA instance into its textual -representation. Writing all information to a GFA file can be done directly -using the ```to_file(filename)``` method. - -### Retrieving the lines - -For many line times, iterating between all lines of the type can be done -using a method which is named after the record type, in plural -(```segments```, ```paths```, ```edges```, ```links```, ```containments```, -```groups```, ```fragments```, ```comments```, ```custom_lines```). -The access to the header is done using a single line, which is retrieved using -the ```header``` method. - -Some lines use identifiers: segments, gaps, edges, paths and sets. Given an -identifier, the line can be retrieved using the ```line(id)``` -method. Note that identifier are represented in RGFA by Ruby symbols. -The list of all identifier can be retrieved using the ```names``` method; -for the identifiers of a single line type, use ```segment_names```, - ```edges_names```, ```gap_names```, ```path_names``` and ```set_names```. -The identifiers of external sequences in fragments are not part of the -same namespace and can be retrieved using the ```external_names``` method. - -### Segments - -Segment lines are available in both GFA1 and GFA2. They -they represent the pieces of molecules, whose relations to other -segments are coded by other line types. - -In GFA1 a segment contains a segment name and a sequence (and, eventually, -optional tags). In GFA2 the syntax is slightly different, -as the segment contain an additional segment length field, which -represent an eventually approximate length, which can be taken as a -drawing indication for segments in graphical programs. - -### Relationships between segments - -Segments are put in relation to each other by edges lines (E lines in GFA2, -L and C Lines in GFA1), as well as gaps. RGFA allows to convert edges -lines from one spefication version to the other (subject to limitations, -see the Versions chapter). Gap lines cannot be converted, as no GFA1 -specification exist for them. - -### Relationships to external sequences - -Fragments represent relationships of segments to external sequences, -i.e. sequences which are not represented in the GFA file itself. -The typical application is to put contigs in relationship with the -reads from which they are constructed. - -The set of IDs of the external sequences may overlap the IDs of the -GFA file itself (ie. the namespaces are separated). The list of -external IDs referenced to by fragment lines can be retrieved -using the ```external_names``` method of RGFA instances. - -To find all fragments which refer to an external ID, -the ```fragments_for_external(ID)``` method is used. As an external sequence -can refer to different segments in different F lines, the result is always -an array of F lines. - -Conversely, to find all fragments for a particular segment, you may use the -```fragments``` method on the segment instance (see the References chapter). - -### Groups - -Groups are lines which combine different other lines in an ordered (paths) -or unordered (sets) way. RGFA supports both GFA1 paths and GFA2 paths and sets. -Paths have a different syntax in the two specification versions. -Methods are provided to edit the group components also without disconnecting -the line instance (see the References chapter). - -### Other line types - -The header contain metadata in a single or multiple lines. For ease of access -to the header information, all its tags are summarized in a single line -instance. See the Header chapter for more information. -All lines which start by the symbol ```#``` are comments; they are -handled in the Comments chapter. -Custom lines are lines of GFA2 files which start with a non-standard -record type. RGFA provides a limited support for accessing the information -in custom lines. - -### Adding new lines - -New lines can be added to a GFA file using the ```add_line(line)``` method -or its alias ```<<(line)```. The argument may be a string describing a line -with valid GFA syntax, or an instance of the class ```RGFA::Line``` - -if a string is added, a line instance is created and then added. -A line instance can be created manually before adding it, using -the ```to_rgfa_line``` string method. - -### Editing the lines - -Accessing the information stored in the fields of a line instance -is described in the ```Positional fields``` and ```Tags``` -chapters. - -Once a line instance has been added to a RGFA, either directly, or using its -string representation, the line is said to be _connected_ to the RGFA. -Reading the information in fields is always allowed, while changing the content -of some fields (fields which refer to other lines) is only possible for -instances which are not connected. - -In some cases, methods are provided -to modify the content of reference fields of connected line -(see the References chapter). - -### Removing lines - -Removing a line can be done using the ```rm(line)``` method. The argument -can be a line instance or a symbol (in which case the line is searched -using the ```line(name)``` method, then eliminated). -A line instance can also be disconnected using the ```disconnect``` method -on it. Disconnecting a line may trigger other operations, such as the -disconnection of other lines (see the References chapter). - -### Renaming lines - -Lines with an identifier can be renamed. This is done simply by editing the -corresponding field (such as segment_name). This field is not a reference -to another line and can be freely edited also in line instances connected -to a RGFA. All references to the line from other lines will still be up to -date, as they will refer to the same instance (whose name has been changed) -and their string representation will use the new name. - diff --git a/manual/tags.md b/manual/tags.md index f802c5f..bb3c930 100644 --- a/manual/tags.md +++ b/manual/tags.md @@ -2,25 +2,25 @@ Each record in GFA can contain tags. Tags are fields which consist in a tag name, a datatype and data. The format is ```NN:T:DATA``` where ``NN`` is a -two-letter tag name, ```T``` is an one-letter datatype symbol and ```DATA``` is +two-letter tag name, ```T``` is an one-letter datatype string and ```DATA``` is a string representing the data according to the specified datatype. Tag names must be unique for each line, i.e. each line may only contain a tag once. -``` +```python # Examples of GFA tags of different datatypes: -aa:i:-12 -bb:f:1.23 -cc:Z:this is a string -dd:A:X -ee:B:c,12,3,2 -ff:H:122FA0 -gg:J:["A","B"] +"aa:i:-12" +"bb:f:1.23" +"cc:Z:this is a string" +"dd:A:X" +"ee:B:c,12,3,2" +"ff:H:122FA0" +'gg:J:["A","B"]' ``` ### Custom tags Some tags are explicitely defined in the specification (these are named -_predefined tags_ in RGFA), and the user or an application can define its own +_predefined tags_ in gfapy), and the user or an application can define its own custom tags. Custom tags are user or program specific and may of course collide with the @@ -28,32 +28,28 @@ tags used by other users or programs. For this reasons, if you write scripts which employ custom tags, you should always check that the values are of the correct datatype and plausible. -```ruby -if line.get_datatype(:xx) != :i - raise "I expected the tag xx to contain an integer!" -end +```python +if line.get_datatype("xx") != "i": + raise Exception("I expected the tag xx to contain an integer!") myvalue = line.xx -if (myvalue > 120) or (myvalue % 2 == 1) - raise "The value in the xx tag is not an even value <= 120" -end +if (myvalue > 120) or (myvalue % 2 == 1): + raise Exception("The value in the xx tag is not an even value <= 120") # ... do something with myvalue ``` Also it is good practice to allow the user of the script to change the name of -the custom tags. For example, RGFATools employs the +or+ custom tag to track +the custom tags. For example, gfapy employs the +or+ custom tag to track the original segment from which a segment in the final graph is derived. All methods which read or write the +or+ tag allow to specify an alternative tag name to use instead of +or+, for the case that this name collides with the custom tag of another program. -```ruby +```python # E.g. a method which does something with myvalue, usually stored in tag xx # allows the user to specify an alternative name for the tag -# @param mytag [Symbol] (defaults to: +:xx+) tag where value is stored -def mymethod(line, mytag: :xx) - myvalue = line.xx - # .... do something with myvalue -end +def mymethod(line, mytag="xx"): + myvalue = line.get(mytag) + # ... ``` ### Tag names in GFA1 @@ -64,13 +60,13 @@ can be a number). There is a number of predefined tags in the specification, different for each kind of line. ``` -VN:Z:1.0 # VN is upcase => predefined tag -z5:Z:1.0 # z5 first char is downcase => custom tag +"VN:Z:1.0" # VN is upcase => predefined tag +"z5:Z:1.0" # z5 first char is downcase => custom tag # not forbidden, but not reccomended: -zZ:Z:1.0 # => mixed case, first char downcase => custom tag -Zz:Z:1.0 # => mixed case, first char upcase => custom tag -vn:Z:1.0 # => same name as predefined tag, but downcase => custom tag +"zZ:Z:1.0" # => mixed case, first char downcase => custom tag +"Zz:Z:1.0" # => mixed case, first char upcase => custom tag +"vn:Z:1.0" # => same name as predefined tag, but downcase => custom tag ``` Besides the tags described in the specification, in GFA1 headers, the TS tag is @@ -82,7 +78,7 @@ The GFA2 specification is currently not as strict regarding tags: anyone can use both upper and lower case tags, and no tags are predefined except for VN and TS. -However, RGFA follows the same conventions as for GFA1: i.e. it allows the tags +However, gfapy follows the same conventions as for GFA1: i.e. it allows the tags specified as predefined tags in GFA1 to be used also in GFA2. No other upper case tag is allowed in GFA2. @@ -90,15 +86,15 @@ case tag is allowed in GFA2. The following table summarizes the datatypes available for tags: -| Symbol | Datatype | Example | Ruby class | +| Symbol | Datatype | Example | Python class | |--------|---------------|-------------------------|--------------------| -| Z | string | This is a string | String | -| i | integer | -12 | Fixnum | -| f | float | 1.2E-5 | Float | -| A | char | X | String | -| J | JSON | [1,{"k1":1,"k2":2},"a"] | Array/Hash | -| B | numeric array | f,1.2,13E-2,0 | RGFA::NumericArray | -| H | byte array | FFAA01 | RGFA::ByteArray | +| Z | string | This is a string | str | +| i | integer | -12 | int | +| f | float | 1.2E-5 | float | +| A | char | X | str | +| J | JSON | [1,{"k1":1,"k2":2},"a"] | list/dict | +| B | numeric array | f,1.2,13E-2,0 | gfapy.NumericArray | +| H | byte array | FFAA01 | gfapy.ByteArray | ### Validation @@ -106,28 +102,28 @@ The tag name is validated according the the rules described above: except for the upper case tags indicated in the GFA1 specification, and the TS header tag, all other tags must contain at least one lower case letter. -``` -VN # => in header: allowed, elsewhere: error -TS # => allowed in headers and GFA2 Edges -KC # => allowed in links, containments, GFA1/GFA2 segments -xx # => custom tag, always allowed -xxx # => error: name is too long -x # => error: name is too short -11 # => error: at least one letter must be present +```python +"VN:i:1" # => in header: allowed, elsewhere: error +"TS:i:1" # => allowed in headers and GFA2 Edges +"KC:i:1" # => allowed in links, containments, GFA1/GFA2 segments +"xx:i:1" # => custom tag, always allowed +"xxx:i:1" # => error: name is too long +"x:i:1" # => error: name is too short +"11:i:1" # => error: at least one letter must be present ``` The datatype must be one of the datatypes specified above. For predefined -tags, RGFA also checks that the datatype given in the specification is used. +tags, gfapy also checks that the datatype given in the specification is used. ``` -xx:X # => error: datatype X is unknown -VN:i # => error: VN must be of type Z +"xx:X:1" # => error: datatype X is unknown +"VN:i:1" # => error: VN must be of type Z ``` The data must be a correctly formatted string for the specified datatype or a -Ruby object whose string representation is a correctly formatted string. +Python object whose string representation is a correctly formatted string. -```ruby +```python # current value: xx:i:2 line.xx = 1 # OK line.xx = "1" # OK, value is set to 1 @@ -140,65 +136,61 @@ is performed only during parsing or accessing values the first time, therefore the user must perform a manual validation if he changes values to something which is not guaranteed to be correct. To trigger a manual validation, the user can call the method ```validate_field(fieldname)``` to validate a -single tag, or ```validate``` to validate the whole line, including all +single tag, or ```validate()``` to validate the whole line, including all tags. -```ruby +```python line.xx = "A" -line.validate_field(:xx) # validates xx +line.validate_field("xx") # validates xx # or, to validate the whole line, including tags: -line.validate +line.validate() ``` ### Reading and writing tags -Tags can be read using a method on the RGFA line object, which is called as the -tag (e.g. line.xx). A banged version of the method raises an error if the -tag was not available (e.g. line.LN!), which the normal method returns -```nil``` in this case. Setting the value is done with an equal sign version of -the tag name method (e.g. line.TS = 120). In alternative, the -```set(fieldname, value)```, ```get(fieldname)``` and ```get!(fieldname)``` +Tags can be read using a property on the gfapy line object, which is called as +the tag (e.g. line.xx). A special version of the property prefixed by +```try_get_``` raises an error if the tag was not available (e.g. +```line.try_get_LN```), while the tag property (e.g. ```line.LN```) would +return ```None``` in this case. Setting the value is done assigning a value to +it the tag name method (e.g. ```line.TS = 120```). In alternative, the +```set(fieldname, value)```, ```get(fieldname)``` and ```try_get(fieldname)``` methods can also be used. To remove a tag from a line, use the -```delete(fieldname)``` method, or set its value to ```nil```. +```delete(fieldname)``` method, or set its value to ```None```. -```ruby +```python # line is "H xx:i:12" line.xx # => 1 line.xy # => nil -line.xx! # => 1 -line.xy! # => error: xy is not defined -line.get(:xx) # => 1 -line.get!(:xy) # => error, xy is not defined -line.xx = 2 # => value of xx is changed to 2 -line.xx = "a" # => error: not compatible with existing type (i) -line.xy = 2 # => xy is created and set to 2, type is auto-set to i -line.set(:xy, 2) # => sets xy to 2 -line.delete(:xy) # => tag is eliminated -line.xx = nil # => tag is eliminated +line.try_get_xx # => 1 +line.try_get_xy # => error: xy is not defined +line.get("xx") # => 1 +line.try_get("xy") # => error, xy is not defined +line.xx = 2 # => value of xx is changed to 2 +line.xx = "a" # => error: not compatible with existing type (i) +line.xy = 2 # => xy is created and set to 2, type is auto-set to i +line.set("xy", 2) # => sets xy to 2 +line.delete("xy") # => tag is eliminated +line.xx = None # => tag is eliminated ``` -The ```RGFA::Line#tagnames``` method, returns the list of the names (as -symbols) of all defined tags for a line. Alternatively, to test if a line -contains a tag, it is possible to use the not-banged get method (e.g. line.VN), -as this returns nil if the tag is not defined, and a non-nil value if the tag -is defined. - -```ruby -puts "Line contains the following tags:" -line.tagnames.each do |tagname| - puts tagname -end -if line.VN +The ```gfapy::Line#tagnames``` property is a list of the names (as +strings) of all defined tags for a line. + +```python +print("Line contains the following tags:") +for t in line.tagnames: + print(t) +if "VN" in line.tagnames: # do something with line.VN value -end ``` -When a tag is read, the value is converted into an appropriate object (see Ruby +When a tag is read, the value is converted into an appropriate object (see Python classes in the datatype table above). When setting a value, the user can -specify the value of a tag either as a Ruby object, or as the string +specify the value of a tag either as a Python object, or as the string representation of the value. -```ruby +```python # line is: H xx:i:1 xy:Z:TEXT xz:J:["a","b"] line.xx # => 1 (Integer) line.xy # => "TEXT" (String) @@ -211,39 +203,38 @@ of the field. By setting ``tag: true```, the entire tag is output (name, datatype, content, separated by colons). An exception is raised if the field does not exist. -```ruby +```python # line is: H xx:i:1 -line.xx # => 1 (Integer) -line.field_to_s(:xx) # => "1" (String) -line.field_to_s(:xx, tag: true) # => "xx:i:1" +line.xx # => 1 +line.field_to_s("xx") # => "1" +line.field_to_s("xx", tag=True) # => "xx:i:1" ``` ### Datatype of custom tags The datatype of an existing custom field (but not of predefined fields) can be changed using the ```set_datatype(fieldname, datatype)``` method. The current -datatype specification can be read using ```get_datatype(fieldname)```. Thereby -the fieldname and datatype arguments are Ruby symbols. +datatype specification can be read using ```get_datatype(fieldname)```. -```ruby +```python # line is: H xx:i:1 -line.get_datatype(:xx) # => :i -line.set_datatype(:xx, :Z) +line.get_datatype("xx") # => "i" +line.set_datatype("xx", "Z") ``` -If a new custom tag is specified, RGFA selects the correct datatype for it: i/f -for numeric values, J/B for arrays, J for hashes and Z for strings and symbols. +If a new custom tag is specified, gfapy selects the correct datatype for it: i/f +for numeric values, J/B for arrays, J for hashes and Z for strings and strings. If the user wants to specify a different datatype, he may do so by setting it -with ```set_datatype``` (this can be done also before assigning a value, which +with ```set_datatype()``` (this can be done also before assigning a value, which is necessary if full validation is active). -```ruby +```python # line has not tags line.xx = "1" # => "xx:Z:1" created -line.xx # => "1" -line.set_datatype(:xy, :i) +line.xx # => "1" +line.set_datatype("xy", "i") line.xy = "1" # => "xy:i:1" created -line.xy # => 1 +line.xy # => 1 ``` ### Arrays of numerical values @@ -251,70 +242,69 @@ line.xy # => 1 ```B``` and ```H``` tags represent array with particular constraints (e.g. they can only contain numeric values, and in some cases the values must be in predefined ranges). In order to represent them correctly and allow for -validation, Ruby classes have been defined for both kind of tags: -```RGFA::ByteArray``` for ```H``` and ```RGFA::NumericArray``` for ```B``` +validation, Python classes have been defined for both kind of tags: +```gfapy.ByteArray``` for ```H``` and ```gfapy.NumericArray``` for ```B``` fields. -Both are subclasses of Array. Object of the two classes can be created by -converting the string representation (using ```to_byte_array``` and -```to_numeric_array```). The same two methods can be applied also to existing -Array instances containing numerical values. +Both are subclasses of list. Object of the two classes can be created by +passing an existing list or the string representation to the class constructor. -```ruby +```python # create a byte array instance -[12,3,14].to_byte_array -"A012FF".to_byte_array +gfapy.ByteArray([12,3,14]) +gfapy.ByteArray("A012FF") # create a numeric array instance -"c,12,3,14".to_numeric_array -[12,3,14].to_numeric_array +gfapy.NumericArray("c,12,3,14") +gfapy.NumericArray([12,3,14]) ``` -Instances of the classes behave as normal arrays, except that they provide a -#validate method, which checks the constraints, and that their #to_s method -computes the GFA string representation of the field value. +Instances of the classes behave as normal lists, except that they provide a +#validate() method, which checks the constraints, and that their string +representation is the GFA string representation of the field value. -```ruby -[12,1,"1x"].to_byte_array.validate # => error: 1x is not a valid value -[12,3,14].to_numeric_array.to_s # => "c,12,3,14" +```python +gfapy.ByteArray([12,1,"1x"]).validate() # error: 1x is not a valid value +str(gfapy.ByteArray([12,3,14])) # => "c,12,3,14" ``` -For numeric values, the ```compute_subtype``` method allows to compute the +For numeric values, the ```compute_subtype()``` method allows to compute the subtype which will be used for the string representation. Unsigned subtypes are used if all values are positive. The smallest possible subtype range is selected. The subtype may change when the range of the elements changes. -```ruby -[12,13,14].to_numeric_value.compute_subtype # => "C" +```python +gfapy.NumericValue([12,13,14]).compute_subtype() # => "C" ``` ### Special cases: custom records, headers, comments and virtual lines. -GFA2 allows custom records, introduced by record type symbols other than the -predefined ones. RGFA uses a pragmatical approach for identifying tags in +GFA2 allows custom records, introduced by record type strings other than the +predefined ones. gfapy uses a pragmatical approach for identifying tags in custom records, and tries to interpret the rightmost fields as tags, until the first field from the right raises an error; all remaining fields are treated as positional fields. -``` -X a b c xx:i:12 # => xx is tag, a, b, c are positional fields -Y a b xx:i:12 c # => all positional fields, as c is not a valid tag +```python +"X a b c xx:i:12" # => xx is tag, a, b, c are positional fields +"Y a b xx:i:12 c" # => all positional fields, as c is not a valid tag ``` For easier access, the entire header of the GFA is summarized in a single line -instance. Different GFA header lines can contain the same tag (this was a -discussed topic, it is not forbidden by the current specifications, but this -may change). A class (```RGFA::FieldArray```) has been defined to handle this -special case (see Header chapter for details). +instance. A class (```gfapy.FieldArray```) has been defined to handle the +special case when multiple H lines define the same tag (see "Header" chapter +for details). -Comment lines are represented by a subclass of the same class (```RGFA::Line```) +Comment lines are represented by a subclass of the same class (```gfapy.Line```) as the records. However, they cannot contain tags: the entire line is taken as -content of the comment. +content of the comment. See the "Comments" chapter for more information about +comments. -``` -# this is not a tag: xx:i:1 # => not a tag, xx:i:1 is part of the string content +```python +"# this is not a tag: xx:i:1" # => xx is not a tag, xx:i:1 is part of the comment ``` -Virtual ```RGFA::Line``` instances (e.g. Segment instances automatically created +Virtual ```gfapy.Line``` instances (e.g. segment instances automatically created because of not yet resolved references found in edges) cannot be modified by the user, and tags cannot be specified for them. This includes all instances of -the ```RGFA::Line::Unknown``` class. +the ```gfapy::Line::Unknown``` class. See the "References" chapter for more information +about virtual lines. diff --git a/manual/validation.md b/manual/validation.md index 4c4b341..d9e4e38 100644 --- a/manual/validation.md +++ b/manual/validation.md @@ -2,10 +2,11 @@ Different validation levels are available. They represent different compromises between speed and warrant of validity. The validation level can be specified -when the RGFA object is created, using the ```vlevel``` parameter of -```RGFA.new``` and ```RGFA.from_file```. Four levels of validation are defined +when the gfapy.Gfa object is created, using the ```vlevel``` parameter of +the constructor and of the ```gfapy.Gfa.from_file()``` method. +Four levels of validation are defined (0 = no validation, 1 = validation by reading, 2 = validation by reading and -writing, 3 = continuous validation). The default validation level value is 1. +writing, 3 = continuous validation). The default validation level value is 1. ### Manual validation @@ -21,11 +22,11 @@ segment, to the value of the LN tag (if present). It is also possible to validate the structure of the GFA, for example to check if there are unresolved references to lines. To do this, -use the ```validate``` method of the ```RGFA``` class. +use the ```validate()``` method of the ```gfapy.Gfa``` class. ### No validations -If the validation is set to 0, RGFA will try to accept any input +If the validation is set to 0, gfapy will try to accept any input and never raise an exception. This is not always possible, and in some cases, an exception will still be raised, if the data is invalid. @@ -38,20 +39,20 @@ Additionally, all tags will be validated, either during parsing or on first access. Record-type cross-field validations will also be performed. -In other words, a validation of 1 means that RGFA guarantees (as good as +In other words, a validation of 1 means that gfapy guarantees (as good as it can) that the GFA content read from a file is valid, and will raise an exception on accessing the data if not. The user is supposed to run ```validate_field(fieldname)``` when changing a field content to something which can be potentially invalid, or -```validate``` if potentially cross-field validations could fail. +```validate()``` if potentially cross-field validations could fail. ### Validation when writing Setting the level to 2 will perform all validations described above, plus validate the fields content when their value is written to string. -In other words, a validation of 2 means that RGFA guarantee (as good as +In other words, a validation of 2 means that gfapy guarantee (as good as it can) that the GFA content read from a file and written to a file is valid and will raise an exception on accessing the data or writing to file if not. @@ -61,13 +62,5 @@ If the validation level is set to 3, all validations for lower levels described above are run, plus a validation of fields contents each time a setter method is used. -A validation of 3 means that RGFA guarantees (as good as it can) +A validation of 3 means that gfapy guarantees (as good as it can) that the GFA content is always valid. - -### Summary of validation related methods - -```ruby -RGFA#validate -RGFA::Line#validate -RGFA::Line#validate_field(fieldname) -``` diff --git a/manual/versions.md b/manual/versions.md index 7e21fc7..5ec6ba0 100644 --- a/manual/versions.md +++ b/manual/versions.md @@ -10,7 +10,7 @@ user-specific record types, by using non-standard codes. ### Version autodetection -RGFA tries to autodetect the version of a GFA file from its syntax. The +gfapy tries to autodetect the version of a GFA file from its syntax. The version of a valid GFA can always be recognized, unless it contains only header and comment lines, as any other line refer to segments, and segments are version-specific. If a GFA contains only header and commments, the version @@ -19,29 +19,30 @@ does not matter. The version is set as soon as a version-specific element is found. Here is the list of such elements: - segment lines (different number of positional fields in GFA1 and GFA2) -- version tag in header (VN:Z:1.0 or VN:Z:2.0) +- version tag in header (```VN:Z:1.0``` or ```VN:Z:2.0```) - E/G/F/O/U lines (GFA2 specific) - custom record-type lines (GFA2 specific) If subsequent version-specific elements are found which contrast with the first -one, RGFA::VersionError is raised. +one, gfapy::VersionError is raised. P/C/L lines are technically not GFA1-specific, as they could be custom records -in GFA2. However, their use in GFA2 is not supported by RGFA and an exception +in GFA2. However, their use in GFA2 is not supported by gfapy and an exception is thrown if these records are found in that version. Thus if these lines are found, their processing is delayed until a version-specific signal is found. -If the version is GFA2, RGFA::VersionError is raised. +If the version is GFA2, gfapy::VersionError is raised. ### Setting and reading the version Besides relying on autodetection, it is possible to explicitely set the version -of the RGFA or line objects, if this is known. Methods which create RGFA, i.e. -```new``` and ```from_file```, as well as methods which create RGFA lines, i.e. -```new``` and the string method ```to_rgfa_line```, all accept a version -parameter, which can be set to the symbols ```:gfa1``` or ```:gfa2```. +of the gfapy or line objects, if this is known. Methods which create Gfa instances +(the constructor and the ```from_file()``` method), as well as methods which +create gfapy lines (the constructor and the ```from_strig()``` method), +accept a ```version``` +parameter, which can be set to the strings ```"gfa1"``` or ```"gfa2"```. -Both the RGFA and the RGFA Line instances respond to the method -```version``` which returns one of: ```:gfa1```, ```:gfa2``` or ```:unknown```. +Instances of gfapy.Gfa and gfapy.Line have a ```version``` property +which contain ```"gfa1"```, ```"gfa2"``` or ```"unknown"```. ### Line queue @@ -49,20 +50,20 @@ The version autodetection feature is achieved by deferring the processing of version-specific lines (ie everything besides headers and comments) which are found before the version can be detected as explained above. These lines are put on a line queue. Once the version is clear, -the method ```process_line_queue``` is called on the RGFA instance. +the method ```process_line_queue()``` is called on the gfapy instance. This method can also be called by the user, if e.g. an example GFA is created programmatically, where the version is unclear. For the reasons explained above, this will generally not be the case, as such a GFA file would only contain headers and comments. -### Conversion of RGFA or RGFA::Line instances +### Conversion of gfapy or RGFA::Line instances The conversion of GFA lines between GFA version is possible in some -cases. When possible, this is achieved by using the ```to_gfa1``` -and ```to_gfa2``` methods on the line instances. It is also possible +cases. When possible, this is achieved by using the ```to_gfa1()``` +and ```to_gfa2()``` methods on the line instances. It is also possible to directly output the line as a string in the other version -using the ```to_gfa1_s``` and ```to_gfa2_s``` methods. +using the ```to_gfa1_s()``` and ```to_gfa2_s()``` methods. Some lines do not require conversion (headers - except changing the value of the VN tag, comments). @@ -78,8 +79,8 @@ compared to GFA1. Conversion from GFA2 to GFA1 is possible, unless unsupported characters are used in the sequence (which is usually not the case) or -the identifier is incompatible with GFA1 (ie it ends with + or - followed -by a comma). +the identifier is incompatible with GFA1 (i.e. it ends with +```+,``` or ```-,```). Conversion from GFA1 to GFA2 is possible, unless no sequence and no LN tag are present. @@ -97,8 +98,8 @@ which is anyway required for converting segments. Conversion from GFA2 to GFA1 is possible if the edge represents a dovetail overlap or an alignment. Also trace alignments are not supported -in GFA1, so the overlap will be set to *. Edge identifiers are stored -in id:Z: tags. +in GFA1, so the trace overlap will be set to ```*```. Edge identifiers are stored +in ```id:Z``` tags. #### Paths @@ -136,19 +137,3 @@ and/or edges and/or child paths with the same limitations. | Gap | Cannot be converted! | | Fragment | Cannot be converted! | | Custom | Cannot be converted! | - -## Summary of API methods related to GFA versions - -```ruby -RGFA.new(version:x) -RGFA.from_file(version:x) -RGFA::Line.new(version:x) -String.to_rgfa_line(version:x) -RGFA#version -RGFA#process_line_queue -RGFA::Line#version -RGFA::Line#to_gfa1 -RGFA::Line#to_gfa2 -RGFA::Line#to_gfa1_s -RGFA::Line#to_gfa2_s -``` diff --git a/pdfdoc/cover.css b/pdfdoc/cover.css deleted file mode 100644 index 52e0725..0000000 --- a/pdfdoc/cover.css +++ /dev/null @@ -1,4 +0,0 @@ -html, body{margin: 0; padding: 0; text-align: center} -.page{box-sizing: border-box; height: 100%; width: 100%; border: 1px solid transparent; page-break-after: always;} -.page-middle{height: 100%; width: 100%; display: table;} -.page-middle-inner{height: 100%; width: 100%; display: table-cell; vertical-align: middle;} diff --git a/pdfdoc/cover.html.erb b/pdfdoc/cover.html.erb deleted file mode 100644 index fe53daa..0000000 --- a/pdfdoc/cover.html.erb +++ /dev/null @@ -1,14 +0,0 @@ - - - -
-
-
-

Giorgio Gonnella

-

RGFA library - API documentation

-

Version <%=$rgfaversion%>

-
-
-
- - diff --git a/pdfdoc/print.css b/pdfdoc/print.css deleted file mode 100644 index 2307667..0000000 --- a/pdfdoc/print.css +++ /dev/null @@ -1,2 +0,0 @@ -h1 {page-break-before: always;} -h2 {page-break-after: avoid;} diff --git a/pdfdoc/rgfa-api-1.3.1.pdf b/pdfdoc/rgfa-api-1.3.1.pdf deleted file mode 100644 index c76fb6c..0000000 Binary files a/pdfdoc/rgfa-api-1.3.1.pdf and /dev/null differ diff --git a/rgfa.gemspec b/rgfa.gemspec deleted file mode 100644 index d3d747f..0000000 --- a/rgfa.gemspec +++ /dev/null @@ -1,176 +0,0 @@ -Gem::Specification.new do |s| - s.name = 'rgfa' - s.version = '2.0rc1' - s.date = '2016-12-07' - s.summary = 'Parse, edit and write GFA-format graphs in Ruby' - s.description = <<-EOF - The Graphical Fragment Assembly (GFA) is a proposed format which allow - to describe the product of sequence assembly. - This gem implements the proposed specifications for the GFA format - described under https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md - as close as possible. - The library allows to create an RGFA object from a file in the GFA format - or from scratch, to enumerate the graph elements (segments, links, - containments, paths and header lines), to traverse the graph (by - traversing all links outgoing from or incoming to a segment), to search for - elements (e.g. which links connect two segments) and to manipulate the - graph (e.g. to eliminate a link or a segment or to duplicate a segment - distributing the read counts evenly on the copies). - EOF - s.author = 'Giorgio Gonnella' - s.email = 'gonnella@zbh.uni-hamburg.de' - s.files = %w[ -lib/rgfatools.rb -lib/rgfatools/artifacts.rb -lib/rgfatools/p_bubbles.rb -lib/rgfatools/multiplication.rb -lib/rgfatools/copy_number.rb -lib/rgfatools/superfluous_links.rb -lib/rgfatools/linear_paths.rb -lib/rgfatools/invertible_segments.rb -lib/rgfa.rb -lib/rgfa/lastpos.rb -lib/rgfa/field.rb -lib/rgfa/logger.rb -lib/rgfa/graph_operations/topology.rb -lib/rgfa/graph_operations/multiplication.rb -lib/rgfa/graph_operations/linear_paths.rb -lib/rgfa/graph_operations/redundant_linear_paths.rb -lib/rgfa/graph_operations/rgl.rb -lib/rgfa/error.rb -lib/rgfa/segment_end.rb -lib/rgfa/field/position_gfa1.rb -lib/rgfa/field/alignment_gfa1.rb -lib/rgfa/field/path_name_gfa1.rb -lib/rgfa/field/orientation.rb -lib/rgfa/field/string.rb -lib/rgfa/field/segment_name_gfa1.rb -lib/rgfa/field/float.rb -lib/rgfa/field/json.rb -lib/rgfa/field/identifier_gfa2.rb -lib/rgfa/field/position_gfa2.rb -lib/rgfa/field/integer.rb -lib/rgfa/field/char.rb -lib/rgfa/field/sequence_gfa2.rb -lib/rgfa/field/generic.rb -lib/rgfa/field/alignment_gfa2.rb -lib/rgfa/field/alignment_list_gfa1.rb -lib/rgfa/field/comment.rb -lib/rgfa/field/numeric_array.rb -lib/rgfa/field/optional_identifier_gfa2.rb -lib/rgfa/field/identifier_list_gfa2.rb -lib/rgfa/field/oriented_identifier_gfa2.rb -lib/rgfa/field/byte_array.rb -lib/rgfa/field/sequence_gfa1.rb -lib/rgfa/field/oriented_identifier_list_gfa1.rb -lib/rgfa/field/custom_record_type.rb -lib/rgfa/field/oriented_identifier_list_gfa2.rb -lib/rgfa/field/optional_integer.rb -lib/rgfa/symbol_invert.rb -lib/rgfa/alignment/trace.rb -lib/rgfa/alignment/cigar.rb -lib/rgfa/alignment/placeholder.rb -lib/rgfa/oriented_line.rb -lib/rgfa/line/fragment/references.rb -lib/rgfa/line/header.rb -lib/rgfa/line/fragment.rb -lib/rgfa/line/gap.rb -lib/rgfa/line/group/ordered.rb -lib/rgfa/line/group/unordered.rb -lib/rgfa/line/group/unordered/references.rb -lib/rgfa/line/group/unordered/induced_set.rb -lib/rgfa/line/group/gfa2/references.rb -lib/rgfa/line/group/gfa2/same_id.rb -lib/rgfa/line/group/path/captured_path.rb -lib/rgfa/line/group/path/topology.rb -lib/rgfa/line/group/path/to_gfa2.rb -lib/rgfa/line/group/path/references.rb -lib/rgfa/line/group/path/validation.rb -lib/rgfa/line/group/ordered/captured_path.rb -lib/rgfa/line/group/ordered/references.rb -lib/rgfa/line/group/ordered/to_gfa1.rb -lib/rgfa/line/group/path.rb -lib/rgfa/line/group.rb -lib/rgfa/line/edge/containment/to_gfa2.rb -lib/rgfa/line/edge/containment/canonical.rb -lib/rgfa/line/edge/containment/pos.rb -lib/rgfa/line/edge/gfa2.rb -lib/rgfa/line/edge/containment.rb -lib/rgfa/line/edge/gfa1/to_gfa2.rb -lib/rgfa/line/edge/gfa1/references.rb -lib/rgfa/line/edge/gfa1/alignment_type.rb -lib/rgfa/line/edge/gfa1/other.rb -lib/rgfa/line/edge/gfa1/oriented_segments.rb -lib/rgfa/line/edge/link.rb -lib/rgfa/line/edge/link/equivalence.rb -lib/rgfa/line/edge/link/to_gfa2.rb -lib/rgfa/line/edge/link/references.rb -lib/rgfa/line/edge/link/canonical.rb -lib/rgfa/line/edge/link/complement.rb -lib/rgfa/line/edge/gfa2/references.rb -lib/rgfa/line/edge/gfa2/to_gfa1.rb -lib/rgfa/line/edge/gfa2/alignment_type.rb -lib/rgfa/line/edge/gfa2/other.rb -lib/rgfa/line/edge/common/from_to.rb -lib/rgfa/line/edge/common/alignment_type.rb -lib/rgfa/line/gap/references.rb -lib/rgfa/line/custom_record/init.rb -lib/rgfa/line/custom_record.rb -lib/rgfa/line/edge.rb -lib/rgfa/line/segment/factory.rb -lib/rgfa/line/segment/writer_wo_sequence.rb -lib/rgfa/line/segment/gfa2.rb -lib/rgfa/line/segment/gfa2_to_gfa1.rb -lib/rgfa/line/segment/length_gfa1.rb -lib/rgfa/line/segment/gfa1.rb -lib/rgfa/line/segment/references.rb -lib/rgfa/line/segment/gfa1_to_gfa2.rb -lib/rgfa/line/segment/coverage.rb -lib/rgfa/line/comment.rb -lib/rgfa/line/segment.rb -lib/rgfa/line/unknown.rb -lib/rgfa/line/common/version_conversion.rb -lib/rgfa/line/common/virtual_to_real.rb -lib/rgfa/line/common/connection.rb -lib/rgfa/line/common/equivalence.rb -lib/rgfa/line/common/cloning.rb -lib/rgfa/line/common/field_data.rb -lib/rgfa/line/common/dynamic_fields.rb -lib/rgfa/line/common/init.rb -lib/rgfa/line/common/field_datatype.rb -lib/rgfa/line/common/disconnection.rb -lib/rgfa/line/common/update_references.rb -lib/rgfa/line/common/writer.rb -lib/rgfa/line/common/validate.rb -lib/rgfa/line/header/field_data.rb -lib/rgfa/line/header/version_conversion.rb -lib/rgfa/line/header/connection.rb -lib/rgfa/line/header/multiline.rb -lib/rgfa/line/comment/tags.rb -lib/rgfa/line/comment/init.rb -lib/rgfa/line/comment/writer.rb -lib/rgfa/line/unknown/writer.rb -lib/rgfa/line/unknown/virtual.rb -lib/rgfa/sequence.rb -lib/rgfa/numeric_array.rb -lib/rgfa/byte_array.rb -lib/rgfa/lines.rb -lib/rgfa/alignment.rb -lib/rgfa/placeholder.rb -lib/rgfa/line.rb -lib/rgfa/lines/finders.rb -lib/rgfa/lines/destructors.rb -lib/rgfa/lines/creators.rb -lib/rgfa/lines/headers.rb -lib/rgfa/lines/collections.rb -lib/rgfa/field_array.rb -lib/rgfa/graph_operations.rb -bin/rgfa-mergelinear -bin/rgfa-findcrisprs -bin/gfadiff -bin/rgfa-simdebruijn - ] - s.homepage = 'http://github.com/ggonnella/rgfa' - s.license = 'CC-BY-SA' - s.required_ruby_version = '>= 2.0' -end diff --git a/setup.py b/setup.py index 7938984..a503c5f 100644 --- a/setup.py +++ b/setup.py @@ -1,20 +1,34 @@ -from setuptools import setup +from setuptools import setup, find_packages def readme(): with open('README.rst') as f: return f.read() setup(name='gfapy', - version='1.3', - description='Python library for accessing the GFA format', + version='1.0.0rc1', + description='Library for handling data in the GFA1 and GFA2 formats', long_description=readme(), - url='https://github.com/ggonnella/rgfa', + url='https://github.com/ggonnella/gfapy', keywords="bioinformatics genomics sequences GFA assembly graphs", - author='Giorgio Gonnella, Tim Weber', + author='Giorgio Gonnella and others (see CONTRIBUTORS)', author_email='gonnella@zbh.uni-hamburg.de', - license='MIT', - packages=['gfapy'], - scripts=['bin/pygfadiff'], + license='ISC', + # see https://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: End Users/Desktop', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: ISC License (ISCL)', + 'Operating System :: OS Independent', + 'Operating System :: Unix', + 'Programming Language :: Python :: 3 :: Only', + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'Topic :: Software Development :: Libraries', + ], + packages=find_packages(), + scripts=['bin/gfadiff'], zip_safe=False, test_suite="nose.collector", include_package_data=True, diff --git a/test/README b/test/README deleted file mode 100644 index d5c7f4b..0000000 --- a/test/README +++ /dev/null @@ -1,7 +0,0 @@ -test_api # => test of API public methods -test_internals # => test of some non-public methods -test_unit # => unit tests for some modules - -Not all modules have unit tests, as in some cases everything is tested -by test_api, in some other cases by test_api plus test_internals. -If this is not the case, then test_unit are present. diff --git a/test/disable_extension.rb b/test/disable_extension.rb deleted file mode 100644 index 3a74fcf..0000000 --- a/test/disable_extension.rb +++ /dev/null @@ -1,23 +0,0 @@ -require_relative "../lib/rgfa.rb" -require_relative "./extension" - -class RGFA::Line::Segment::GFA2 - DEPENDENT_LINES.delete(:metagenomic_assignments) - undef :metagenomic_asssignments -end - -class RGFA::Line - class << self - undef :subclass - def subclass(record_type, version: nil) - orig_subclass(record_type, version: version) - end - end - RECORD_TYPE_VERSIONS[:specific][:gfa2].delete(:M) - RECORD_TYPE_VERSIONS[:specific][:gfa2].delete(:T) -end - -RGFA::RECORDS_WITH_NAME.delete(:M) -RGFA::RECORDS_WITH_NAME.delete(:T) - -RGFA::Field::GFA2_POSFIELD_DATATYPE.delete(:taxon_id) diff --git a/test/extension.rb b/test/extension.rb deleted file mode 100644 index 2028cd1..0000000 --- a/test/extension.rb +++ /dev/null @@ -1,148 +0,0 @@ -require_relative "../lib/rgfa.rb" - -class RGFA::Line::Taxon < RGFA::Line - - RECORD_TYPE = :T - POSFIELDS = [:tid, :desc] - PREDEFINED_TAGS = [:UL] - DATATYPE = { - :tid => :identifier_gfa2, - :desc => :Z, - :UL => :Z, - } - NAME_FIELD = :tid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [:metagenomic_assignments] - OTHER_REFERENCES = [] - - apply_definitions - -end - -class RGFA::Line::MetagenomicAssignment < RGFA::Line - - RECORD_TYPE = :M - POSFIELDS = [:mid, :tid, :sid, :score] - PREDEFINED_TAGS = [] - DATATYPE = { - :mid => :optional_identifier_gfa2, - :tid => :identifier_gfa2, - :sid => :identifier_gfa2, - :score => :optional_integer, - } - NAME_FIELD = :mid - STORAGE_KEY = :name - FIELD_ALIAS = {} - REFERENCE_FIELDS = [:tid, :sid] - BACKREFERENCE_RELATED_FIELDS = [] - DEPENDENT_LINES = [] - OTHER_REFERENCES = [] - - apply_definitions - -end - -class RGFA::Line::MetagenomicAssignment - - def initialize_references - s = @rgfa.segment(sid) - if s.nil? - s = RGFA::Line::Segment::GFA2.new([sid.to_s, "1", "*"], - virtual: true, version: :gfa2) - s.connect(@rgfa) - end - set_existing_field(:sid, s, set_reference: true) - s.add_reference(self, :metagenomic_assignments) - - t = @rgfa.line(tid) - if t.nil? - t = RGFA::Line::Taxon.new([tid.to_s, "*"], - virtual: true, version: :gfa2) - t.connect(@rgfa) - end - set_existing_field(:tid, t, set_reference: true) - t.add_reference(self, :metagenomic_assignments) - end - private :initialize_references - -end - -class RGFA::Line::Segment::GFA2 - DEPENDENT_LINES << :metagenomic_assignments - define_reference_getters -end - -class RGFA::Line - class << self - alias_method :orig_subclass, :subclass - def subclass(record_type, version: nil) - if version.nil? or version == :gfa2 - case record_type.to_sym - when :M then return RGFA::Line::MetagenomicAssignment - when :T then return RGFA::Line::Taxon - end - end - orig_subclass(record_type, version: version) - end - end - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :M - RECORD_TYPE_VERSIONS[:specific][:gfa2] << :T -end - -RGFA::RECORDS_WITH_NAME << :T -RGFA::RECORDS_WITH_NAME << :M - -module RGFA::Field::TaxonID - - def validate_encoded(string) - if string !~ /^taxon:(\d+)$/ and string !~ /^[a-zA-Z0-9_]+$/ - raise RGFA::ValueError, "Invalid taxon ID: #{string}" - end - end - module_function :validate_encoded - - def unsafe_decode(string) - string.to_sym - end - module_function :unsafe_decode - - def decode(string) - validate_encoded(string) - unsafe_decode(string) - end - module_function :decode - - def validate_decoded(object) - case object - when RGFA::Line::Taxon - validate_encoded(object.name.to_s) - when Symbol - validate_encoded(object.to_s) - else - raise RGFA::TypeError, - "Invalid type for taxon ID: #{object.inspect}" - end - end - module_function :validate_decoded - - def unsafe_encode(object) - object = object.name if object.kind_of?(RGFA::Line::Taxon) - object.to_s - end - module_function :unsafe_encode - - def encode(object) - validate_decoded(object) - unsafe_encode(object) - end - module_function :encode - -end - -RGFA::Field::GFA2_POSFIELD_DATATYPE << :taxon_id -RGFA::Field::FIELD_MODULE[:taxon_id] = RGFA::Field::TaxonID -RGFA::Line::Taxon::DATATYPE[:tid] = :taxon_id -RGFA::Line::MetagenomicAssignment::DATATYPE[:tid] = :taxon_id diff --git a/test/test_api_alignment.rb b/test/test_api_alignment.rb deleted file mode 100644 index 602aa47..0000000 --- a/test/test_api_alignment.rb +++ /dev/null @@ -1,205 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Alignment < Test::Unit::TestCase - - @@cigar_1 = RGFA::Alignment::CIGAR.new([ - RGFA::Alignment::CIGAR::Operation.new(12,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I), - RGFA::Alignment::CIGAR::Operation.new(0,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:P)]) - @@cigar_1_s = "12M1D2I0M1P" - - @@cigar_gfa1_1_s = "1S2M3I4=5X6D7P8N9H" - @@cigar_gfa1_1_c_s = "9H8I7P6I5X4=3D2M1D" - @@cigar_gfa1_1_rlen = 2+4+5+6+8 - @@cigar_gfa1_1_qlen = 1+2+3+4+5 - - @@cigar_gfa2_1_s = "1M2I3D4P" - @@cigar_gfa2_1_c_s = "4P3I2D1M" - @@cigar_gfa2_1_rlen = 1+3 - @@cigar_gfa2_1_qlen = 1+2 - - @@trace_1 = RGFA::Alignment::Trace.new([12,12,0]) - @@trace_1_s = "12,12,0" - - @@cigar_invalid_value_1 = RGFA::Alignment::CIGAR.new([ - RGFA::Alignment::CIGAR::Operation.new(-12,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I)]) - @@cigar_invalid_value_2 = RGFA::Alignment::CIGAR.new([ - RGFA::Alignment::CIGAR::Operation.new(12,:Y), - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I)]) - @@cigar_invalid_type_1 = RGFA::Alignment::CIGAR.new([ - :x, - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I)]) - - @@trace_invalid_value_1 = - RGFA::Alignment::Trace.new([-2,1,12]) - @@trace_invalid_type_1 = - RGFA::Alignment::Trace.new([12.0,1,12]) - - @@cigar_empty = RGFA::Alignment::CIGAR.new([]) - @@trace_empty = RGFA::Alignment::Trace.new([]) - @@placeholder = RGFA::Alignment::Placeholder.new - @@placeholder_s = "*" - - @@string_invalid = [ - "-12M1D2I", "12Y1D2I", "x1D2I", - "-2,1,12", "12.0,1,12", "*x", - ] - - @@cigar_op_1 = RGFA::Alignment::CIGAR::Operation.new(1,:D) - @@cigar_op_1_s = "1D" - @@cigar_op_1_len = 1 - @@cigar_op_1_code = :D - @@cigar_op_2 = RGFA::Alignment::CIGAR::Operation.new(2,:I) - @@cigar_op_2_s = "2I" - @@cigar_op_2_len = 2 - @@cigar_op_2_code = :I - - def test_to_s - assert_equal(@@cigar_1_s, @@cigar_1_s.to_s) - assert_equal(@@cigar_1_s, @@cigar_1.to_s) - assert_equal(@@trace_1_s, @@trace_1.to_s) - assert_equal(@@placeholder_s, @@placeholder.to_s) - assert_equal(@@placeholder_s, @@cigar_empty.to_s) - assert_equal(@@placeholder_s, @@trace_empty.to_s) - end - - def test_cigar_clone - cigar1_clone = @@cigar_1.clone - assert_equal(@@cigar_1_s, cigar1_clone.to_s) - cigar1_clone[0].code = "=" - # copy is deep, only the clone has changed: - assert_not_equal(@@cigar_1_s, cigar1_clone.to_s) - assert_equal(@@cigar_1_s, @@cigar_1.to_s) - end - - def test_to_alignment - assert_equal(@@cigar_1, @@cigar_1_s.to_alignment) - assert_equal(@@trace_1, @@trace_1_s.to_alignment) - assert_equal(@@placeholder, @@placeholder_s.to_alignment) - [@@cigar_1, @@trace_1, @@cigar_empty, - @@trace_empty, @@placeholder].each do |alignment| - assert_equal(alignment, alignment.to_alignment) - end - @@string_invalid.each do |string| - assert_raises(RGFA::FormatError) { string.to_alignment } - end - end - - def test_decode_encode_invariant - [@@trace_1_s, @@cigar_1_s, @@placeholder_s].each do |string| - assert_equal(string, string.to_alignment.to_s) - end - end - - def test_is_placeholder - [@@cigar_empty, @@trace_empty, @@placeholder, @@placeholder_s].each do |a| - assert(a.placeholder?) - end - [@@cigar_1, @@cigar_1_s, @@trace_1, @@trace_1_s].each do |a| - assert(!a.placeholder?) - end - end - - def test_validate - assert_nothing_raised { @@trace_1.validate } - assert_nothing_raised { @@trace_empty.validate } - assert_nothing_raised { @@cigar_1.validate } - assert_nothing_raised { @@cigar_empty.validate } - assert_nothing_raised { @@placeholder.validate } - assert_raises(RGFA::ValueError) { @@trace_invalid_value_1.validate } - assert_raises(RGFA::ValueError) { @@cigar_invalid_value_1.validate } - assert_raises(RGFA::ValueError) { @@cigar_invalid_value_2.validate } - assert_raises(RGFA::TypeError) { @@trace_invalid_type_1.validate } - assert_raises(RGFA::TypeError) { @@cigar_invalid_type_1.validate } - end - - def test_version_specific_validate - assert_nothing_raised { @@cigar_gfa1_1_s. - to_alignment(version: :gfa1, valid: false)} - assert_raises(RGFA::FormatError) { @@cigar_gfa1_1_s. - to_alignment(version: :gfa2, valid: false)} - assert_nothing_raised { @@cigar_gfa2_1_s. - to_alignment(version: :gfa1, valid: false)} - assert_nothing_raised { @@cigar_gfa2_1_s. - to_alignment(version: :gfa2, valid: false)} - end - - def test_array_methods - [@@cigar_empty, @@trace_empty].each {|a| assert(a.empty?) } - [@@cigar_1, @@trace_1].each {|a| assert(!a.empty?) } - assert_equal(RGFA::Alignment::CIGAR::Operation.new(1,:D), @@cigar_1[1]) - assert_equal(12, @@trace_1[1]) - end - - def test_cigar_operation_methods - assert_equal(@@cigar_op_1_len, @@cigar_op_1.len) - assert_equal(@@cigar_op_1_code, @@cigar_op_1.code) - assert_equal(@@cigar_op_1_s, @@cigar_op_1.to_s) - @@cigar_op_1.len = @@cigar_op_2_len - @@cigar_op_1.code = @@cigar_op_2_code - assert_equal(@@cigar_op_2, @@cigar_op_1) - assert_equal(@@cigar_op_2_len, @@cigar_op_1.len) - assert_equal(@@cigar_op_2_code, @@cigar_op_1.code) - assert_equal(@@cigar_op_2_s, @@cigar_op_2.to_s) - end - - def test_cigar_operation_validation - assert_nothing_raised { @@cigar_op_1.validate } - assert_nothing_raised { @@cigar_op_1.validate(version: :gfa2) } - assert_nothing_raised { @@cigar_op_2.validate } - assert_nothing_raised { @@cigar_op_2.validate(version: :gfa2) } - assert_raise(RGFA::VersionError) { @@cigar_op_1.validate(version: :gfaX) } - stringlen = RGFA::Alignment::CIGAR::Operation.new("1", :M) - assert_nothing_raised { stringlen.validate } - stringcode = RGFA::Alignment::CIGAR::Operation.new(1, "M") - assert_nothing_raised { stringcode.validate } - malformed1 = RGFA::Alignment::CIGAR::Operation.new([1], :M) - assert_raise(RGFA::TypeError) { malformed1.validate } - malformed2 = RGFA::Alignment::CIGAR::Operation.new(-1, :M) - assert_raise(RGFA::ValueError) { malformed2.validate } - malformed3 = RGFA::Alignment::CIGAR::Operation.new(1, :L) - assert_raise(RGFA::ValueError) { malformed3.validate } - gfa1only = RGFA::Alignment::CIGAR::Operation.new(1, :X) - assert_nothing_raised { gfa1only.validate } - assert_raise(RGFA::ValueError) { gfa1only.validate(version: :gfa2) } - end - - def test_cigar_complement - assert_equal(@@cigar_gfa1_1_c_s, - @@cigar_gfa1_1_s.to_alignment(version: :gfa1).complement.to_s) - assert_equal(@@cigar_gfa2_1_c_s, - @@cigar_gfa2_1_s.to_alignment.complement.to_s) - end - - def test_cigar_length_on - assert_equal(@@cigar_gfa1_1_rlen, - @@cigar_gfa1_1_s.to_alignment(version: :gfa1). - length_on_reference) - assert_equal(@@cigar_gfa1_1_qlen, - @@cigar_gfa1_1_s.to_alignment(version: :gfa1). - length_on_query) - assert_equal(@@cigar_gfa1_1_qlen, - @@cigar_gfa1_1_c_s.to_alignment(version: :gfa1). - length_on_reference) - assert_equal(@@cigar_gfa1_1_rlen, - @@cigar_gfa1_1_c_s.to_alignment(version: :gfa1). - length_on_query) - assert_equal(@@cigar_gfa2_1_rlen, - @@cigar_gfa2_1_s.to_alignment.length_on_reference) - assert_equal(@@cigar_gfa2_1_qlen, - @@cigar_gfa2_1_s.to_alignment.length_on_query) - assert_equal(@@cigar_gfa2_1_qlen, - @@cigar_gfa2_1_c_s.to_alignment.length_on_reference) - assert_equal(@@cigar_gfa2_1_rlen, - @@cigar_gfa2_1_c_s.to_alignment.length_on_query) - end - -end diff --git a/test/test_api_comments.rb b/test/test_api_comments.rb deleted file mode 100644 index 809e044..0000000 --- a/test/test_api_comments.rb +++ /dev/null @@ -1,130 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Comments < Test::Unit::TestCase - - def test_initialize - l = RGFA::Line::Comment.new(["hallo"]) - assert_equal("# hallo", l.to_s) - l = RGFA::Line::Comment.new(["hallo", "\t"]) - assert_equal("#\thallo", l.to_s) - end - - def test_fields - l = RGFA::Line::Comment.new(["hallo"]) - assert_equal("hallo", l.content) - assert_equal(" ", l.spacer) - l.content = "hello" - assert_equal("hello", l.content) - assert_equal("# hello", l.to_s) - l.spacer = " " - assert_equal("hello", l.content) - assert_equal("# hello", l.to_s) - end - - def test_validation - assert_raises(RGFA::FormatError) {RGFA::Line::Comment.new(["hallo\nhallo"])} - assert_raises(RGFA::FormatError) {RGFA::Line::Comment.new(["hallo", "\n"])} - assert_nothing_raised { - RGFA::Line::Comment.new(["hallo", "\n"], vlevel: 0) } - l = RGFA::Line::Comment.new(["hallo"]) - assert_nothing_raised {l.content = "hallo\n"} - assert_raises(RGFA::FormatError) { l.to_s } - l.content = "hallo" - assert_nothing_raised {l.to_s} - assert_nothing_raised {l.spacer = "\n"} - assert_raises(RGFA::FormatError) { l.to_s } - l = RGFA::Line::Comment.new(["hallo"], vlevel: 3) - assert_raises(RGFA::FormatError) { l.content = "hallo\n" } - assert_raises(RGFA::FormatError) { l.spacer = "\n" } - end - - def test_from_string - str = "# this is a comment" - l = str.to_rgfa_line - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(str[2..-1], l.content) - assert_equal(" ", l.spacer) - str = "#this is another comment" - l = str.to_rgfa_line - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(str[1..-1], l.content) - assert_equal("", l.spacer) - str = "#\t and this too" - l = str.to_rgfa_line - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(str[3..-1], l.content) - assert_equal(str[1..2], l.spacer) - str = "#: and this too" - l = str.to_rgfa_line - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(str[1..-1], l.content) - assert_equal("", l.spacer) - end - - def test_to_s - str = "# this is a comment" - l = str.to_rgfa_line - assert_equal(str, l.to_s) - str = "#this is another\tcomment" - l = str.to_rgfa_line - assert_equal(str, l.to_s) - str = "#this is another\tcomment" - l = str.to_rgfa_line - l.spacer = " " - assert_equal("# "+str[1..-1], l.to_s) - end - - def test_tags - assert_raises(RGFA::ValueError) { - RGFA::Line::Comment.new(["hallo", " ", "zz:Z:hallo"]) } - l = "# hallo zz:Z:hallo".to_rgfa_line - assert_equal("hallo zz:Z:hallo", l.content) - assert_raises(NoMethodError) { l.zz } - assert_raises(NoMethodError) { l.zz = 1 } - assert_raises(RGFA::RuntimeError) { l.set(:zz, 1) } - assert_nil(l.get(:zz)) - end - - def test_to_gfa1 - str = "# this is a comment" - l = str.to_rgfa_line(version: :gfa2) - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(:gfa2, l.version) - assert_equal(str, l.to_s) - assert_equal(:gfa2, l.to_gfa2.version) - assert_equal(str, l.to_gfa2.to_s) - assert_equal(:gfa1, l.to_gfa1.version) - assert_equal(str, l.to_gfa1.to_s) - end - - def test_to_gfa2 - str = "# this is a comment" - l = str.to_rgfa_line(version: :gfa1) - assert_equal(RGFA::Line::Comment, l.class) - assert_equal(:gfa1, l.version) - assert_equal(str, l.to_s) - assert_equal(:gfa1, l.to_gfa1.version) - assert_equal(str, l.to_gfa1.to_s) - assert_equal(:gfa2, l.to_gfa2.version) - assert_equal(str, l.to_gfa2.to_s) - end - - def test_rgfa_comments - gfa = RGFA.new - c1 = "#this is a comment" - c2 = "# this is also a comment" - c3 = "#and \tthis too!" - assert_nothing_raised { gfa << c1 } - assert_nothing_raised { gfa << c2 } - assert_nothing_raised { gfa << c3 } - assert_equal([c1,c2,c3], gfa.comments.map(&:to_s)) - assert_equal(c1, gfa.comments[0].to_s) - gfa.rm(gfa.comments[0]) - assert_equal([c2,c3], gfa.comments.map(&:to_s)) - gfa.comments[0].disconnect - assert_equal([c3], gfa.comments.map(&:to_s)) - end - -end diff --git a/test/test_api_custom_records.rb b/test/test_api_custom_records.rb deleted file mode 100644 index 4e75c23..0000000 --- a/test/test_api_custom_records.rb +++ /dev/null @@ -1,61 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::CustomRecords < Test::Unit::TestCase - - def test_from_string - str1 = "X\tthis is a\tcustom line" - l1 = str1.to_rgfa_line - assert_equal(RGFA::Line::CustomRecord, l1.class) - assert_equal(:X, l1.record_type) - assert_equal("this is a", l1.field1) - assert_equal("custom line", l1.field2) - end - - def test_from_string_with_tags - str2 = "XX\txx:i:2\txxxxxx\txx:i:1" - l2 = str2.to_rgfa_line - assert_equal(RGFA::Line::CustomRecord, l2.class) - assert_equal(:XX, l2.record_type) - assert_equal("xx:i:2", l2.field1) - assert_equal("xxxxxx", l2.field2) - assert_raise(NoMethodError){l2.field3} - assert_equal(1, l2.xx) - l2.xx = 3 - assert_equal(3, l2.xx) - l2.field1 = "blabla" - assert_equal("blabla", l2.field1) - end - - def test_to_s - str1 = "X\tthis is a\tcustom line" - assert_equal(str1, str1.to_rgfa_line.to_s) - str2 = "XX\txx:i:2\txxxxxx\txx:i:1" - assert_equal(str2, str2.to_rgfa_line.to_s) - end - - def test_add_custom_records - gfa = RGFA.new(version: :gfa2) - x1 = "X\tthis is a custom record" - assert_nothing_raised { gfa << x1 } - assert_equal([:X], gfa.custom_record_keys) - assert_equal([x1], gfa.custom_records(:X).map(&:to_s)) - end - - def test_delete_custom_records - gfa = RGFA.new(version: :gfa2) - c = "X\tThis is a custom_record" - gfa << c - assert_equal([c], gfa.custom_records(:X).map(&:to_s)) - gfa.custom_records(:X).each(&:disconnect) - assert_equal([], gfa.custom_records(:X)) - end - - def test_custom_records - x = ["X\tVN:Z:1.0", "Y\ttesttesttest"] - assert_equal(x[0..0], x.to_rgfa.custom_records(:X).map(&:to_s)) - assert_equal(x[1..1], x.to_rgfa.custom_records(:Y).map(&:to_s)) - end - -end diff --git a/test/test_api_extensions.rb b/test/test_api_extensions.rb deleted file mode 100644 index eebd80e..0000000 --- a/test/test_api_extensions.rb +++ /dev/null @@ -1,36 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Extensions < Test::Unit::TestCase - - require_relative "./extension" - - def test_extensions - g = RGFA.new(version: :gfa2) - RGFA::Line::MetagenomicAssignment.new(["*","N12","C","20"]) - g << (sA = "S\tA\t1000\t*".to_rgfa_line) - g << (tB12 = "T\tB12_c\tB12 common strain".to_rgfa_line) - g << (m1 = "M\t1\ttaxon:123\tA\t40\txx:Z:cjaks536".to_rgfa_line) - g << (m2 = "M\t2\ttaxon:123\tB\t*\txx:Z:cga5r5cs".to_rgfa_line) - g << (sB = "S\tB\t1000\t*".to_rgfa_line) - g << (mx = "M\t*\tB12_c\tB\t20".to_rgfa_line) - g << (t123 = "T\ttaxon:123\tSpecies 123\tUL:Z:http://www.taxon123.com". - to_rgfa_line) - assert_equal(RGFA::Line::MetagenomicAssignment, m1.class) - assert_equal(RGFA::Line::Taxon, tB12.class) - assert_equal(:"1", m1.mid) - assert(mx.mid.placeholder?) - assert_equal(t123, m1.tid) - assert_equal(sA, m1.sid) - assert_equal("cjaks536", m1.xx) - assert_equal([m2,mx], sB.metagenomic_assignments) - assert_equal([m1,m2], t123.metagenomic_assignments) - assert_equal(:"taxon:123", t123.tid) - assert_equal("Species 123", t123.desc) - assert_equal("http://www.taxon123.com", t123.UL) - end - - #require_relative "./disable_extension" - -end diff --git a/test/test_api_gfa1_lines.rb b/test/test_api_gfa1_lines.rb deleted file mode 100644 index 6019e02..0000000 --- a/test/test_api_gfa1_lines.rb +++ /dev/null @@ -1,214 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new - -class TestAPI::GFA1Lines < Test::Unit::TestCase - - def test_C - fields=["C","1","+","2","-","12","12M","MQ:i:1232","NM:i:3","ab:Z:abcd"] - str=fields.join("\t") - assert_nothing_raised { str.to_rgfa_line } - assert_equal(RGFA::Line::Edge::Containment, str.to_rgfa_line.class) - assert_equal(fields[0].to_sym, str.to_rgfa_line.record_type) - assert_equal(fields[1].to_sym, str.to_rgfa_line.from) - assert_equal(fields[2].to_sym, str.to_rgfa_line.from_orient) - assert_equal(fields[3].to_sym, str.to_rgfa_line.to) - assert_equal(fields[4].to_sym, str.to_rgfa_line.to_orient) - assert_equal(12, str.to_rgfa_line.pos) - assert_equal([RGFA::Alignment::CIGAR::Operation.new(12,:M)], str.to_rgfa_line.overlap) - assert_equal(1232, str.to_rgfa_line.MQ) - assert_equal(3, str.to_rgfa_line.NM) - assert_equal("abcd", str.to_rgfa_line.ab) - assert_raises(RGFA::FormatError) { (str+"\tH1").to_rgfa_line } - assert_raises(RGFA::FormatError) { "C\tH".to_rgfa_line } - assert_raises(RGFA::FormatError) do - f=fields.dup; f[2]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[4]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[5]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[6]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[7]="MQ:Z:1232"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[8]="NM:Z:1232"; f.join("\t").to_rgfa_line(vlevel: 1) - end - end - - def test_L - fields=["L","1","+","2","-","12M","RC:i:1232","NM:i:3","ab:Z:abcd", - "FC:i:2321","KC:i:1212","MQ:i:40"] - str=fields.join("\t") - assert_nothing_raised { str.to_rgfa_line } - assert_equal(RGFA::Line::Edge::Link, str.to_rgfa_line.class) - assert_equal(fields[0].to_sym, str.to_rgfa_line.record_type) - assert_equal(fields[1].to_sym, str.to_rgfa_line.from) - assert_equal(fields[2].to_sym, str.to_rgfa_line.from_orient) - assert_equal(fields[3].to_sym, str.to_rgfa_line.to) - assert_equal(fields[4].to_sym, str.to_rgfa_line.to_orient) - assert_equal([RGFA::Alignment::CIGAR::Operation.new(12,:M)], - str.to_rgfa_line.overlap) - assert_equal(1232, str.to_rgfa_line.RC) - assert_equal(3, str.to_rgfa_line.NM) - assert_equal(2321, str.to_rgfa_line.FC) - assert_equal(1212, str.to_rgfa_line.KC) - assert_equal(40, str.to_rgfa_line.MQ) - assert_equal("abcd", str.to_rgfa_line.ab) - assert_raises(RGFA::FormatError) { (str+"\tH1").to_rgfa_line } - assert_raises(RGFA::FormatError) { "L\tH".to_rgfa_line } - assert_raises(RGFA::FormatError) do - f=fields.dup; f[2]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[4]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[5]="x"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[6]="RC:Z:1232"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[7]="NM:Z:1232"; f.join("\t").to_rgfa_line(vlevel: 1) - end - end - - def test_L_coords - g = RGFA.new(version: :gfa1) - g << "S\t1\t*\tLN:i:100" - g << "L\t1\t+\t2\t-\t1M2D10M1I" - assert_equal([87,100], g.dovetails[0].from_coords) - assert_raises(RGFA::ValueError) {g.dovetails[0].to_coords} - g << "S\t2\t*\tLN:i:100" - assert_equal([88,100], g.dovetails[0].to_coords) - g << "L\t3\t-\t4\t+\t10M2P3D1M" - assert_equal([0,14], g.dovetails[1].from_coords) - assert_equal([0,11], g.dovetails[1].to_coords) - end - - def test_L_other - l = "L\t1\t+\t2\t-\t*".to_rgfa_line - assert_equal(:"2", l.other(:"1")) - assert_equal(:"1", l.other(:"2")) - assert_raise(RGFA::NotFoundError){l.other(:"0")} - end - - def test_L_circular - l = "L\t1\t+\t2\t-\t*".to_rgfa_line - assert_equal(false, l.circular?) - l = "L\t1\t+\t1\t-\t*".to_rgfa_line - assert_equal(true, l.circular?) - end - - def test_S - fields=["S","1","ACGTCACANNN","RC:i:1232","LN:i:11","ab:Z:abcd", - "FC:i:2321","KC:i:1212"] - str=fields.join("\t") - assert_nothing_raised { str.to_rgfa_line } - assert_equal(RGFA::Line::Segment::GFA1, str.to_rgfa_line.class) - assert_equal(fields[0].to_sym, str.to_rgfa_line.record_type) - assert_equal(fields[1].to_sym, str.to_rgfa_line.name) - assert_equal(fields[2], str.to_rgfa_line.sequence) - assert_equal(1232, str.to_rgfa_line.RC) - assert_equal(11, str.to_rgfa_line.LN) - assert_equal(2321, str.to_rgfa_line.FC) - assert_equal(1212, str.to_rgfa_line.KC) - assert_equal("abcd", str.to_rgfa_line.ab) - assert_raises(RGFA::FormatError) { (str+"\tH1").to_rgfa_line } - assert_raises(RGFA::FormatError) { "S\tH".to_rgfa_line } - assert_raises(RGFA::FormatError) do - f=fields.dup; f[2]="!@#?"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[3]="RC:Z:1232"; f.join("\t").to_rgfa_line(version: :gfa1) - end - f=["S","2","ACGTCACANNN","LN:i:3"] - assert_raises(RGFA::InconsistencyError) do - f.join("\t").to_rgfa_line(vlevel: 1, version: :gfa1) - end - f=["S","2","ACGTCACANNN","LN:i:11"] - assert_nothing_raised { f.join("\t").to_rgfa_line } - f=["S","2","*","LN:i:3"] - assert_nothing_raised { f.join("\t").to_rgfa_line } - end - - def test_forbidden_segment_names - assert_nothing_raised { "S\tA+B\t*".to_rgfa_line } - assert_nothing_raised { "S\tA-B\t*".to_rgfa_line } - assert_nothing_raised { "S\tA,B\t*".to_rgfa_line } - assert_raises(RGFA::FormatError) do - "S\tA+,B\t*".to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - "S\tA-,B\t*".to_rgfa_line(vlevel: 1) - end - end - - def test_coverage - l = "S\t0\t*\tRC:i:600\tLN:i:100".to_rgfa_line - assert_equal(6, l.coverage) - assert_equal(6, l.coverage!) - l = "S\t0\t*\tRC:i:600".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - l = "S\t0\t*\tLN:i:100".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - l = "S\t0\t*\tFC:i:600\tLN:i:100".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - assert_equal(6, l.coverage(count_tag: :FC)) - assert_equal(6, l.coverage!(count_tag: :FC)) - end - - def test_P - fields=["P","4","1+,2-,3+","9M2I3D1M,12M","ab:Z:abcd"] - str=fields.join("\t") - assert_nothing_raised { str.to_rgfa_line } - assert_equal(RGFA::Line::Group::Path, str.to_rgfa_line.class) - assert_equal(fields[0].to_sym, str.to_rgfa_line.record_type) - assert_equal(fields[1].to_sym, str.to_rgfa_line.path_name) - assert_equal([OL[:"1",:"+"],OL[:"2",:"-"],OL[:"3",:"+"]], - str.to_rgfa_line.segment_names) - assert_equal([[RGFA::Alignment::CIGAR::Operation.new(9,:M), - RGFA::Alignment::CIGAR::Operation.new(2,:I), - RGFA::Alignment::CIGAR::Operation.new(3,:D), - RGFA::Alignment::CIGAR::Operation.new(1,:M)], - [RGFA::Alignment::CIGAR::Operation.new(12,:M)]], - str.to_rgfa_line.overlaps) - assert_equal("abcd", str.to_rgfa_line.ab) - assert_raises(RGFA::FormatError) { (str+"\tH1").to_rgfa_line } - assert_raises(RGFA::FormatError) { "P\tH".to_rgfa_line } - assert_raises(RGFA::FormatError) do - f=fields.dup; f[2]="1,2,3"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::InconsistencyError) do - f=fields.dup; f[2]="1+,2+"; f[3]="9M,12M,3M"; - f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_nothing_raised do - f=fields.dup; f[3]="*,*"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_nothing_raised do - f=fields.dup; f[3]="9M2I3D1M,12M,12M"; f.join("\t"). - to_rgfa_line(vlevel: 3) - end - assert_nothing_raised do - f=fields.dup; f[3]="*"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[3]="12,12"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::FormatError) do - f=fields.dup; f[3]="12M|12M"; f.join("\t").to_rgfa_line(vlevel: 1) - end - end - -end diff --git a/test/test_api_gfa2_lines.rb b/test/test_api_gfa2_lines.rb deleted file mode 100644 index 529b43e..0000000 --- a/test/test_api_gfa2_lines.rb +++ /dev/null @@ -1,57 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new - -class TestAPI::GFA2Lines < Test::Unit::TestCase - - def test_S - fields=["S","1","ACGTCACANNN","RC:i:1232","LN:i:11","ab:Z:abcd", - "FC:i:2321","KC:i:1212"] - str=fields.join("\t") - assert_nothing_raised { str.to_rgfa_line } - assert_equal(RGFA::Line::Segment::GFA1, str.to_rgfa_line.class) - assert_equal(fields[0].to_sym, str.to_rgfa_line.record_type) - assert_equal(fields[1].to_sym, str.to_rgfa_line.name) - assert_equal(fields[2], str.to_rgfa_line.sequence) - assert_equal(1232, str.to_rgfa_line.RC) - assert_equal(11, str.to_rgfa_line.LN) - assert_equal(2321, str.to_rgfa_line.FC) - assert_equal(1212, str.to_rgfa_line.KC) - assert_equal("abcd", str.to_rgfa_line.ab) - assert_raises(RGFA::FormatError) { (str+"\tH1").to_rgfa_line } - assert_raises(RGFA::FormatError) { "S\tH".to_rgfa_line } - assert_raises(RGFA::FormatError) do - f=fields.dup; f[2]="!@#?"; f.join("\t").to_rgfa_line(vlevel: 1) - end - assert_raises(RGFA::TypeError) do - f=fields.dup; f[3]="RC:Z:1232"; f.join("\t").to_rgfa_line(version: :gfa1) - end - f=["S","2","ACGTCACANNN","LN:i:3"] - assert_raises(RGFA::InconsistencyError) do - f.join("\t").to_rgfa_line(vlevel: 1, version: :gfa1) - end - f=["S","2","ACGTCACANNN","LN:i:11"] - assert_nothing_raised { f.join("\t").to_rgfa_line } - f=["S","2","*","LN:i:3"] - assert_nothing_raised { f.join("\t").to_rgfa_line } - end - - def test_coverage - l = "S\t0\t*\tRC:i:600\tLN:i:100".to_rgfa_line - assert_equal(6, l.coverage) - assert_equal(6, l.coverage!) - l = "S\t0\t*\tRC:i:600".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - l = "S\t0\t*\tLN:i:100".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - l = "S\t0\t*\tFC:i:600\tLN:i:100".to_rgfa_line - assert_equal(nil, l.coverage) - assert_raises(RGFA::NotFoundError) {l.coverage!} - assert_equal(6, l.coverage(count_tag: :FC)) - assert_equal(6, l.coverage!(count_tag: :FC)) - end - -end diff --git a/test/test_api_header.rb b/test/test_api_header.rb deleted file mode 100644 index 481e231..0000000 --- a/test/test_api_header.rb +++ /dev/null @@ -1,77 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Header < Test::Unit::TestCase - - def test_rgfa_header - g = RGFA.new - assert_equal(RGFA::Line::Header, g.header.class) - assert_equal([], g.header.tagnames) - g << "H\txx:i:1".to_rgfa_line - assert_equal([:xx], g.header.tagnames) - end - - def test_rgfa_header_line_connect - g = RGFA.new - line = "H\txx:i:1".to_rgfa_line - assert_raise(RGFA::RuntimeError) { line.connect(g) } - assert_nothing_raised { g.add_line(line) } - end - - def test_header_version_editing - standalone = "H\txx:i:1\tVN:Z:1.0".to_rgfa_line - assert_nothing_raised {standalone.VN = "2.0"} - g = RGFA.new - g << "H\txx:i:1\tVN:Z:1.0" - assert_nothing_raised { g.header.xx = 2} - assert_raise(RGFA::RuntimeError) { - g.header.VN = "2.0" } - end - - def test_error_inconsistent_definitions - g = RGFA.new - g << "H\txx:i:1" - assert_nothing_raised {g << "H\txx:i:2" } - g << "H\tTS:i:120" - assert_nothing_raised {g << "H\tTS:i:120" } - assert_raise(RGFA::InconsistencyError) { g << "H\tTS:i:122" } - end - - def test_rgfa_multiple_def_tags - g = RGFA.new - 4.times do |i| - g << "H\txx:i:#{i}".to_rgfa_line - end - assert_equal([:xx], g.header.tagnames) - assert_equal([0,1,2,3], g.header.xx) - assert_equal([0,1,2,3], g.header.get(:xx)) - assert_equal(:i, g.header.get_datatype(:xx)) - assert_nothing_raised { g.header.validate_field(:xx) } - [0,2,3].each {|i| g.header.xx.delete(i)} - g.header.xx = (g.header.xx += [4]) - assert_raise(RGFA::TypeError) { g.header.validate_field(:xx) } - g.header.xx = g.header.xx.to_rgfa_field_array(:i) - assert_nothing_raised { g.header.validate_field(:xx) } - assert_equal([1,4], g.header.get(:xx)) - assert_equal("1\t4", g.header.field_to_s(:xx)) - assert_equal("xx:i:1\txx:i:4", g.header.field_to_s(:xx, tag: true)) - assert_equal(["H\txx:i:1","H\txx:i:4"], g.headers.map(&:to_s)) - g.header.add(:xx, 12) - g.header.add(:yy, 13) - assert_equal([1,4,12], g.header.xx) - assert_equal(13, g.header.yy) - end - - def test_rgfa_single_def_tags - g = RGFA.new - g << "H\txx:i:1".to_rgfa_line - assert_equal([:xx], g.header.tagnames) - assert_equal(1, g.header.xx) - g.header.set(:xx, 12) - assert_equal(12, g.header.xx) - g.header.delete(:xx) - assert_equal(nil, g.header.xx) - end - -end diff --git a/test/test_api_linear_paths.rb b/test/test_api_linear_paths.rb deleted file mode 100644 index 285f0aa..0000000 --- a/test/test_api_linear_paths.rb +++ /dev/null @@ -1,58 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI||=Module.new - -class TestAPI::LinearPaths < Test::Unit::TestCase - - def test_linear_path_merging - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/linear_merging.1.#{sfx}") - assert_raises(RGFA::ValueError) do - gfa.merge_linear_path([["0", :R],["1", :R],["2", :L],["3", :R]]) - end - gfa = RGFA.from_file("testdata/linear_merging.2.#{sfx}") - assert_nothing_raised do - gfa.merge_linear_path([["0", :R],["1", :R],["2", :L],["3", :R]]) - end - assert_raises(RGFA::NotFoundError) {gfa.segment!("0")} - assert_raises(RGFA::NotFoundError) {gfa.segment!("1")} - assert_raises(RGFA::NotFoundError) {gfa.segment!("2")} - assert_raises(RGFA::NotFoundError) {gfa.segment!("3")} - assert_nothing_raised {gfa.segment!("0_1_2_3")} - assert_equal([], gfa.dovetails) - assert_equal("ACGACGACGTCGA", gfa.segment("0_1_2_3").sequence) - end - end - - def test_linear_path_merge_all - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/linear_merging.3.#{sfx}") - gfa.merge_linear_paths - assert_nothing_raised { gfa.merge_linear_paths } - assert_equal([:"0_1_2_3"], gfa.segment_names) - assert_equal(1, gfa.segments.size) - assert_equal([], gfa.dovetails) - gfa = RGFA.from_file("testdata/linear_merging.4.#{sfx}") - assert_nothing_raised { gfa.merge_linear_paths } - assert_equal(3, gfa.segments.size) - assert_equal([:"0",:"3",:"1_2"], gfa.segments.map(&:name)) - gfa = RGFA.from_file("testdata/linear_merging.5.#{sfx}") - assert_nothing_raised { gfa.merge_linear_paths } - assert_equal(3, gfa.segments.size) - assert_equal([:"0", :"1", :"2_3"], gfa.segments.map(&:name)) - end - end - - def test_linear_path_merge_example1 - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/example1.#{sfx}") - assert_equal([%w[18 19 1], - %w[11 9 12], - %w[22 16 20 21 23]], - gfa.linear_paths.map{|sp|sp.map{|sn_et| - sn_et.to_segment_end.name.to_s}}) - end - end - -end diff --git a/test/test_api_lines_collections.rb b/test/test_api_lines_collections.rb deleted file mode 100644 index 735ed5f..0000000 --- a/test/test_api_lines_collections.rb +++ /dev/null @@ -1,100 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -TestAPI::Lines ||= Module.new - -class TestAPI::Lines::Collections < Test::Unit::TestCase - - def test_gfa1_collections - gfa = RGFA.from_file("testdata/all_line_types.gfa1.gfa") - # comments - assert_equal(1, gfa.comments.size) - assert(gfa.comments[0].content =~ /collections/) - # containments - assert_equal(2, gfa.containments.size) - assert_equal(["2_to_6", "1_to_5"], gfa.containments.map(&:id)) - # dovetails - assert_equal(4, gfa.dovetails.size) - assert_equal(["1_to_2", "1_to_3", "11_to_12", "11_to_13"], - gfa.dovetails.map(&:id)) - # edges - assert_equal(6, gfa.edges.size) - assert_equal(["1_to_2", "1_to_3", "11_to_12", - "11_to_13", "2_to_6", "1_to_5"], - gfa.edges.map(&:id)) - # segments - assert_equal([:"1", :"3", :"5", :"13", :"11", :"12", :"4", :"6", :"2"], - gfa.segments.map(&:name)) - # segment_names - assert_equal([:"1", :"3", :"5", :"13", :"11", :"12", :"4", :"6", :"2"], - gfa.segment_names) - # paths - assert_equal([:"14", :"15"], gfa.paths.map(&:name)) - # path_names - assert_equal([:"14", :"15"], gfa.path_names) - # names - assert_equal(gfa.segment_names + gfa.path_names, - gfa.names) - # lines - assert_equal(gfa.comments + gfa.headers + gfa.segments + gfa.edges + - gfa.paths, gfa.lines) - end - - def test_gfa2_collections - gfa = RGFA.from_file("testdata/all_line_types.gfa2.gfa") - # comments - assert_equal(3, gfa.comments.size) - assert(gfa.comments[0].content =~ /collections/) - # edges - assert_equal([:"1_to_2", :"2_to_6", :"1_to_3", - :"11_to_12", :"11_to_13", :"1_to_5"], - gfa.edges.map(&:name)) - # edge_names - assert_equal([:"1_to_2", :"2_to_6", :"1_to_3", - :"11_to_12", :"11_to_13", :"1_to_5"], - gfa.edge_names) - # dovetails - assert_equal([:"1_to_2", :"1_to_3", :"11_to_12", :"11_to_13"], - gfa.dovetails.map(&:name)) - # containments - assert_equal([:"2_to_6", :"1_to_5"], - gfa.containments.map(&:name)) - # gaps - assert_equal([:"1_to_11", :"2_to_12"], gfa.gaps.map(&:name)) - # gap_names - assert_equal([:"1_to_11", :"2_to_12"], gfa.gap_names) - # sets - assert_equal([:"16", :"16sub"], gfa.sets.map(&:name)) - # set_names - assert_equal([:"16", :"16sub"], gfa.set_names) - # paths - assert_equal([:"14", :"15"], gfa.paths.map(&:name)) - # path_names - assert_equal([:"14", :"15"], gfa.path_names) - # segments - assert_equal([:"1", :"3", :"5", :"13", :"11", :"12", :"4", :"6", :"2"], - gfa.segments.map(&:name)) - # segment_names - assert_equal([:"1", :"3", :"5", :"13", :"11", :"12", :"4", :"6", :"2"], - gfa.segment_names) - # fragments - assert_equal(["read1_in_2", "read2_in_2"], gfa.fragments.map(&:id)) - # external_names - assert_equal([:"read1", :"read2"], gfa.external_names) - # custom_record_keys - assert_equal([:X, :Y], gfa.custom_record_keys) - # custom_records - assert_equal(3, gfa.custom_records.size) - assert_equal([:X, :X, :Y], gfa.custom_records.map(&:record_type)) - # custom_records(:X) - assert_equal([:X, :X], gfa.custom_records(:X).map(&:record_type)) - # names - assert_equal(gfa.segment_names + gfa.edge_names + gfa.gap_names + - gfa.path_names + gfa.set_names, gfa.names) - # lines - assert_equal(gfa.comments + gfa.headers + gfa.segments + gfa.edges + - gfa.paths + gfa.sets + gfa.gaps + gfa.fragments + - gfa.custom_records, gfa.lines) - end -end diff --git a/test/test_api_lines_creators.rb b/test/test_api_lines_creators.rb deleted file mode 100644 index e4772e2..0000000 --- a/test/test_api_lines_creators.rb +++ /dev/null @@ -1,144 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -TestAPI::Lines ||= Module.new -class TestAPI::Lines::Creators < Test::Unit::TestCase - - # XXX missing GFA2 - - def test_add_headers - gfa = RGFA.new - h = "H\tVN:Z:1.0" - assert_nothing_raised { gfa << h } - assert_equal([h], gfa.headers.map(&:to_s)) - end - - def test_add_segments - gfa = RGFA.new - s1 = "S\t1\t*".to_rgfa_line - s2 = "S\t2\t*".to_rgfa_line - assert_nothing_raised { gfa << s1 } - assert_nothing_raised { gfa << s2 } - assert_equal([s1, s2], gfa.segments) - assert_equal([:"1", :"2"], gfa.segment_names) - assert_equal(s1, gfa.segment("1")) - assert_equal(nil, gfa.segment("0")) - assert_nothing_raised { gfa.segment!("1") } - assert_raises(RGFA::NotFoundError) { gfa.segment!("0") } - assert_raises(RGFA::NotUniqueError) { gfa << s2.clone } - end - - def test_add_links - s1 = "S\t1\t*" - s2 = "S\t2\t*" - l1 = "L\t1\t+\t2\t+\t12M".to_rgfa_line - l2 = "L\t1\t+\t3\t+\t12M" - gfa = RGFA.new - gfa << s1 - gfa << s2 - assert_nothing_raised { gfa << l1 } - assert_equal([l1], gfa.dovetails) - assert_equal([l1], gfa.segment(:"1").end_relations(:R, ["2", :L])) - assert_equal([l1], gfa.segment(:"2").end_relations(:L, ["1", :R])) - assert_equal([], gfa.segment(:"2").end_relations(:R, ["1", :L])) - assert_nothing_raised { gfa << l2 } - end - - def test_add_containments - s1 = "S\t1\t*" - s2 = "S\t2\t*" - c1 = "C\t1\t+\t2\t+\t12\t12M".to_rgfa_line - c2 = "C\t1\t+\t3\t+\t12\t12M" - gfa = RGFA.new - gfa << s1 - gfa << s2 - assert_nothing_raised { gfa << c1 } - assert_equal([c1], gfa.containments) - assert_equal([c1], - gfa.segment(:"1").relations_to(:"2", :edges_to_contained)) - assert_equal([], - gfa.segment(:"2").relations_to(:"1", :edges_to_contained)) - assert_nothing_raised { gfa << c2 } - end - - def test_add_paths - s1 = "S\t1\t*" - s2 = "S\t2\t*" - p1 = "P\t4\t1+,2+\t122M".to_rgfa_line - p2 = "P\t1\t1+,2+\t122M" - p3 = "P\t5\t1+,2+,3+\t122M,120M" - gfa = RGFA.new - gfa << s1 - gfa << s2 - assert_nothing_raised { gfa << p1 } - assert_equal([p1], gfa.paths) - assert_equal([:"4"], gfa.path_names) - assert_equal(p1, gfa.line(:"4")) - assert_equal(nil, gfa.line(:"5")) - assert_raises(RGFA::NotUniqueError) { gfa << p2 } - assert_nothing_raised { gfa << p3 } - end - - def test_segments_first_order - s1 = "S\t1\t*" - s2 = "S\t2\t*" - l1 = "L\t1\t+\t2\t+\t122M" - l2 = "L\t1\t+\t3\t+\t122M" - c1 = "C\t1\t+\t2\t+\t12\t12M" - c2 = "C\t1\t+\t3\t+\t12\t12M" - p1 = "P\t4\t1+,2+\t122M" - p2 = "P\t1\t1+,2+\t122M" - p3 = "P\t5\t1+,3+\t122M" - gfa = RGFA.new - gfa.require_segments_first_order - gfa << s1 - gfa << s2 - assert_nothing_raised { gfa << l1 } - assert_raises(RGFA::NotFoundError) { gfa << l2 } - assert_nothing_raised { gfa << c1 } - assert_raises(RGFA::NotFoundError) { gfa << c2 } - assert_nothing_raised { gfa << p1 } - assert_raises(RGFA::NotUniqueError) { gfa << p2 } - assert_raises(RGFA::NotFoundError) { gfa << p3 } - end - - def test_header_add - gfa = RGFA.new - gfa << "H\tVN:Z:1.0" - gfa << "H\taa:i:12\tab:Z:test1" - gfa << "H\tac:Z:test2" - gfa.header.add(:aa, 15) - assert_equal( - [ - "H\tVN:Z:1.0", - "H\taa:i:12", - "H\taa:i:15", - "H\tab:Z:test1", - "H\tac:Z:test2", - ], - gfa.headers.map(&:to_s).sort) - gfa.header.add(:aa, 16) - assert_equal( - [ - "H\tVN:Z:1.0", - "H\taa:i:12", - "H\taa:i:15", - "H\taa:i:16", - "H\tab:Z:test1", - "H\tac:Z:test2", - ], - gfa.headers.map(&:to_s).sort) - gfa.header.delete(:aa) - gfa.header.aa = 26 - assert_equal( - [ - "H\tVN:Z:1.0", - "H\taa:i:26", - "H\tab:Z:test1", - "H\tac:Z:test2", - ], - gfa.headers.map(&:to_s).sort) - end - -end diff --git a/test/test_api_lines_destructors.rb b/test/test_api_lines_destructors.rb deleted file mode 100644 index 1ed5737..0000000 --- a/test/test_api_lines_destructors.rb +++ /dev/null @@ -1,78 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -TestAPI::Lines ||= Module.new -class TestAPI::Lines::Destructors < Test::Unit::TestCase - - def test_delete_links - gfa = RGFA.new - s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] - l = "L\t1\t+\t2\t+\t12M" - c = "C\t1\t+\t0\t+\t12\t12M" - (s + [l,c]).each {|line| gfa << line } - assert_equal([l], gfa.dovetails.map(&:to_s)) - assert_equal([l], - gfa.segment(:"1").end_relations(:R, [:"2", :L]).map(&:to_s)) - gfa.segment(:"1").oriented_relations(:+, OL[:"2", :+]).map(&:disconnect) - assert_equal([], gfa.dovetails) - assert_equal([], gfa.segment(:"1").end_relations(:R, [:"2", :L])) - assert_equal([c], gfa.containments.map(&:to_s)) - assert_equal(c, - gfa.segment(:"1").relations_to(gfa.segment(:"0"), - :edges_to_contained)[0].to_s) - gfa << l - assert_not_equal([], gfa.dovetails) - gfa.segment(:"1").oriented_relations(:+, OL[:"2", :+]).map(&:disconnect) - assert_equal([], gfa.dovetails) - end - - def test_delete_containments - gfa = RGFA.new - s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] - l = "L\t1\t+\t2\t+\t12M" - c = "C\t1\t+\t0\t+\t12\t12M" - (s + [l,c]).each {|line| gfa << line } - gfa.segment(:"1").relations_to(gfa.segment(:"0"), :edges_to_contained). - each(&:disconnect) - assert_equal([], gfa.containments) - assert_equal(nil, gfa.segment(:"1").relations_to(:"0", - :edges_to_contained)[0]) - gfa << c - assert_not_equal([], gfa.containments) - assert_equal(c, gfa.segment(:"1").relations_to(:"0", - :edges_to_contained)[0].to_s) - gfa.segment(:"1").relations_to(gfa.segment(:"0"), :edges_to_contained). - each(&:disconnect) - assert_equal([], gfa.containments) - end - - def test_delete_segment - gfa = RGFA.new - gfa << "H\tVN:Z:1.0" - s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] - l = "L\t1\t+\t2\t+\t12M" - c = "C\t1\t+\t0\t+\t12\t12M" - p = "P\t4\t2+,0-\t12M" - (s + [l,c,p]).each {|line| gfa << line } - assert_equal(s, gfa.segments.map(&:to_s)) - assert_equal([:"0", :"1", :"2"], gfa.segment_names) - assert_equal([l], gfa.dovetails.select{|n|!n.virtual?}.map(&:to_s)) - assert_equal([c], gfa.containments.map(&:to_s)) - assert_equal([p], gfa.paths.map(&:to_s)) - assert_equal([:"4"], gfa.path_names) - gfa.segment("0").disconnect - assert_equal([s[1],s[2]], gfa.segments.map(&:to_s)) - assert_equal([:"1", :"2"], gfa.segment_names) - assert_equal([l], gfa.dovetails.select{|n|!n.virtual?}.map(&:to_s)) - assert_equal([], gfa.containments.map(&:to_s)) - assert_equal([], gfa.paths.map(&:to_s)) - assert_equal([], gfa.path_names) - gfa.segment("1").disconnect - assert_equal([s[2]], gfa.segments.map(&:to_s)) - assert_equal([], gfa.dovetails) - gfa.rm(:"2") - assert_equal([], gfa.segments) - end - -end diff --git a/test/test_api_lines_finders.rb b/test/test_api_lines_finders.rb deleted file mode 100644 index 3c387ab..0000000 --- a/test/test_api_lines_finders.rb +++ /dev/null @@ -1,178 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -TestAPI::Lines ||= Module.new -class TestAPI::Lines::Finders < Test::Unit::TestCase - - @@l_gfa1 = ["S\t1\t*", - "S\t2\t*", - "S\t3\t*", - "S\t4\tCGAT", - "L\t1\t+\t2\t+\t*", - "L\t1\t-\t3\t+\t*", - "C\t1\t-\t4\t-\t1\t*", - "P\tp1\t1+,2+\t*"].map(&:to_rgfa_line) - @@l_gfa2 = ["S\t1\t100\t*", - "S\t2\t110\t*", - "E\te1\t1+\t2-\t0\t100$\t10\t110$\t*", - "G\tg1\t1-\t2-\t1000\t*", - "O\to1\t1+ 2-", - "U\tu1\t1 e1", - "F\t1\tread1-\t0\t10\t102\t122\t*", - "F\t1\tread1-\t30\t100$\t180\t255\t*", - "F\t2\tread1-\t40\t50\t52\t64\t*", - "X\tx1\txx:Z:A", - "X\tx2", - "G\t*\t1+\t2+\t2000\t*"].map(&:to_rgfa_line) - @@gfa1 = @@l_gfa1.to_rgfa - @@gfa2 = @@l_gfa2.to_rgfa - - - def test_segment_gfa1 - # existing name as argument - assert_equal(@@l_gfa1[0],@@gfa1.segment(:"1")) - assert_equal(@@l_gfa1[0],@@gfa1.segment!(:"1")) - # not existing name as argument - assert_equal(nil,@@gfa1.segment(:"0")) - assert_raises(RGFA::NotFoundError) {@@gfa1.segment!(:"0")} - # line as argument - assert_equal(@@l_gfa1[0],@@gfa1.segment(@@l_gfa1[0])) - assert_equal(@@l_gfa1[0],@@gfa1.segment!(@@l_gfa1[0])) - # connection to rgfa is not checked if argument is line - assert_equal(@@l_gfa2[0],@@gfa1.segment(@@l_gfa2[0])) - assert_equal(@@l_gfa2[0],@@gfa1.segment!(@@l_gfa2[0])) - end - - def test_segment_gfa2 - # existing name as argument - assert_equal(@@l_gfa2[0],@@gfa2.segment(:"1")) - assert_equal(@@l_gfa2[0],@@gfa2.segment!(:"1")) - # not existing name as argument - assert_equal(nil,@@gfa2.segment(:"0")) - assert_raises(RGFA::NotFoundError) {@@gfa2.segment!(:"0")} - # line as argument - assert_equal(@@l_gfa2[0],@@gfa2.segment(@@l_gfa2[0])) - assert_equal(@@l_gfa2[0],@@gfa2.segment!(@@l_gfa2[0])) - # connection to rgfa is not checked if argument is line - assert_equal(@@l_gfa1[0],@@gfa2.segment(@@l_gfa1[0])) - assert_equal(@@l_gfa1[0],@@gfa2.segment!(@@l_gfa1[0])) - end - - def test_line_gfa1 - # segment name as argument - assert_equal(@@l_gfa1[0],@@gfa1.line(:"1")) - assert_equal(@@l_gfa1[0],@@gfa1.line!(:"1")) - # path name as argument - assert_equal(@@l_gfa1[7],@@gfa1.line(:"p1")) - assert_equal(@@l_gfa1[7],@@gfa1.line!(:"p1")) - # not existing name as argument - assert_equal(nil,@@gfa1.line(:"0")) - assert_raises(RGFA::NotFoundError) {@@gfa1.line!(:"0")} - # line as argument - assert_equal(@@l_gfa1[0],@@gfa1.line(@@l_gfa1[0])) - assert_equal(@@l_gfa1[0],@@gfa1.line!(@@l_gfa1[0])) - # connection to rgfa is not checked if argument is line - assert_equal(@@l_gfa2[0],@@gfa1.line(@@l_gfa2[0])) - assert_equal(@@l_gfa2[0],@@gfa1.line!(@@l_gfa2[0])) - end - - def test_line_gfa2 - # segment name as argument - assert_equal(@@l_gfa2[0],@@gfa2.line(:"1")) - assert_equal(@@l_gfa2[0],@@gfa2.line!(:"1")) - # edge name as argument - assert_equal(@@l_gfa2[2],@@gfa2.line(:"e1")) - assert_equal(@@l_gfa2[2],@@gfa2.line!(:"e1")) - # gap name as argument - assert_equal(@@l_gfa2[3],@@gfa2.line(:"g1")) - assert_equal(@@l_gfa2[3],@@gfa2.line!(:"g1")) - # path name as argument - assert_equal(@@l_gfa2[4],@@gfa2.line(:"o1")) - assert_equal(@@l_gfa2[4],@@gfa2.line!(:"o1")) - # set name as argument - assert_equal(@@l_gfa2[5],@@gfa2.line(:"u1")) - assert_equal(@@l_gfa2[5],@@gfa2.line!(:"u1")) - # not existing name as argument - assert_equal(nil,@@gfa2.line(:"0")) - assert_raises(RGFA::NotFoundError) {@@gfa2.line!(:"0")} - # line as argument - assert_equal(@@l_gfa2[0],@@gfa2.line(@@l_gfa2[0])) - assert_equal(@@l_gfa2[0],@@gfa2.line!(@@l_gfa2[0])) - # connection to rgfa is not checked if argument is line - assert_equal(@@l_gfa1[0],@@gfa2.line(@@l_gfa1[0])) - assert_equal(@@l_gfa1[0],@@gfa2.line!(@@l_gfa1[0])) - end - - def test_fragments_for_external - assert_equal(@@l_gfa2[6..8], @@gfa2.fragments_for_external(:"read1")) - assert_equal([], @@gfa2.fragments_for_external(:"read2")) - end - - def test_select_by_hash_gfa1 - # search segments - assert_equal(@@l_gfa1[0..3], @@gfa1.select({:record_type => :S, - :sequence => "CGAT"})) - assert_equal(@@l_gfa1[0..0], @@gfa1.select({:record_type => :S, - :name => :"1"})) - # search links - assert_equal(@@l_gfa1[4..4], @@gfa1.select({:record_type => :L, - :from => :"1", - :from_orient => :+})) - # search containments - assert_equal(@@l_gfa1[6..6], @@gfa1.select({:record_type => :C, - :from => :"1", - :pos => 1})) - # search paths - assert_equal(@@l_gfa1[7..7], @@gfa1.select({:record_type => :P, - :segment_names => "1+,2+"})) - # no record type specified - assert_equal(@@l_gfa1[0..0], @@gfa1.select({:name => :"1"})) - assert_equal(@@l_gfa1[4..6], @@gfa1.select({:from => :"1"})) - # reference as value - assert_equal(@@l_gfa1[4..6], @@gfa1.select({:from => @@l_gfa1[0]})) - # placeholder is equal to any value - assert_equal(@@l_gfa1[0..2], @@gfa1.select({:sequence => "ACC"})) - end - - def test_select_by_line_gfa1 - @@l_gfa1.size.times do |i| - assert_equal(@@l_gfa1[i..i], @@gfa1.select(@@l_gfa1[i])) - end - end - - def test_select_by_hash_gfa2 - # search segments - assert_equal(@@l_gfa2[0..1], @@gfa2.select({:record_type => :S, - :sequence => "CGAT"})) - assert_equal(@@l_gfa2[1..1], @@gfa2.select({:record_type => :S, - :slen => 110})) - # search edges - assert_equal(@@l_gfa2[2..2], @@gfa2.select({:record_type => :E, - :sid1 => OL[:"1",:+]})) - # search gaps - assert_equal(@@l_gfa2[3..3], @@gfa2.select({:record_type => :G, - :sid1 => OL[:"1",:-]})) - assert_equal(@@l_gfa2[11..11], @@gfa2.select({:record_type => :G, - :disp => 2000})) - # search paths - assert_equal(@@l_gfa2[4..4], @@gfa2.select({:record_type => :O, - :items => "1+ 2-"})) - # search sets - assert_equal(@@l_gfa2[5..5], @@gfa2.select({:record_type => :U, - :name => :"u1"})) - # search fragments - assert_equal(@@l_gfa2[6..8], @@gfa2.select({:record_type => :F, - :external => "read1-"})) - # search custom records - assert_equal(@@l_gfa2[9..9], @@gfa2.select({:record_type => :X, - :xx => "A"})) - end - - def test_select_by_line_gfa2 - @@l_gfa2.size.times do |i| - assert_equal(@@l_gfa2[i..i], @@gfa2.select(@@l_gfa2[i])) - end - end - -end diff --git a/test/test_api_multiplication.rb b/test/test_api_multiplication.rb deleted file mode 100644 index df5d12b..0000000 --- a/test/test_api_multiplication.rb +++ /dev/null @@ -1,256 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Multiplication < Test::Unit::TestCase - - def test_multiply_segment - gfa = RGFA.new - gfa << "H\tVN:Z:1.0" - s = ["S\t0\t*\tRC:i:600", - "S\t1\t*\tRC:i:6000", - "S\t2\t*\tRC:i:60000"] - l = "L\t1\t+\t2\t+\t12M" - c = "C\t1\t+\t0\t+\t12\t12M" - p = "P\t3\t2+,0-\t12M" - (s + [l,c,p]).each {|line| gfa << line } - assert_equal(s, gfa.segments.map(&:to_s)) - assert_equal([l], gfa.dovetails.select{|n|!n.virtual?}.map(&:to_s)) - assert_equal([c], gfa.containments.map(&:to_s)) - assert_equal([l], - gfa.segment(:"1").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_equal([c], gfa.segment(:"1").relations_to(:"0").map(&:to_s)) - assert_equal(6000, gfa.segment(:"1").RC) - gfa.multiply(:"1", 2) - assert_equal([l], - gfa.segment(:"1").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_equal([c], gfa.segment(:"1").relations_to(:"0").map(&:to_s)) - assert_not_equal([], - gfa.segment(:"1*2").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_not_equal([], gfa.segment(:"1*2").relations_to(:"0")) - assert_equal(3000, gfa.segment(:"1").RC) - assert_equal(3000, gfa.segment(:"1*2").RC) - gfa.multiply(:"1*2", 3 , copy_names:["6","7"]) - assert_equal([l], - gfa.segment(:"1").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_not_equal([], - gfa.segment(:"1*2").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_not_equal([], - gfa.segment(:"6").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_not_equal([], - gfa.segment(:"7").end_relations(:R, [:"2", :L]).map(&:to_s)) - assert_not_equal([], gfa.segment(:"1*2").relations_to(:"0")) - assert_not_equal([], gfa.segment(:"6").relations_to(:"0")) - assert_not_equal([], gfa.segment(:"7").relations_to(:"0")) - assert_equal(3000, gfa.segment(:"1").RC) - assert_equal(1000, gfa.segment(:"1*2").RC) - assert_equal(1000, gfa.segment(:"6").RC) - assert_equal(1000, gfa.segment(:"7").RC) - end - - def test_multiply_segment_copy_names - gfa = ["H\tVN:Z:1.0", - "S\t1\t*\tRC:i:600", - "S\t1b\t*\tRC:i:6000", - "S\t2\t*\tRC:i:60000", - "S\t3\t*\tRC:i:60000"].to_rgfa - gfa.multiply(:"2", 2, copy_names: :upcase) - assert_nothing_raised {gfa.segment!("2B")} - gfa.multiply(:"2", 2, copy_names: :upcase) - assert_nothing_raised {gfa.segment!("2C")} - gfa.multiply(:"2", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("2_copy")} - gfa.multiply(:"2", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("2_copy2")} - gfa.multiply(:"2", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("2_copy3")} - gfa.multiply(:"2_copy", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("2_copy4")} - gfa.multiply(:"2_copy4", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("2_copy5")} - gfa.multiply(:"2", 2, copy_names: :number) - assert_nothing_raised {gfa.segment!("4")} - gfa.multiply(:"1b", 2) - assert_nothing_raised {gfa.segment!("1b*2")} - gfa.multiply(:"1b", 2, copy_names: :number) - assert_nothing_raised {gfa.segment!("1b2")} - gfa.multiply(:"1b", 2, copy_names: :copy) - assert_nothing_raised {gfa.segment!("1b_copy")} - gfa.multiply(:"1b_copy", 2, copy_names: :lowcase) - assert_nothing_raised {gfa.segment!("1b_copz")} - gfa.multiply(:"1b_copy", 2, copy_names: :upcase) - assert_nothing_raised {gfa.segment!("1b_copyB")} - end - - def test_links_distribution_l1_m2 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l1.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l1.m2.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_enable_extensions - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l1.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l1.m2.#{sfx}") - g1.enable_extensions - g2.enable_extensions - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply(:"1", 2) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_links_distribution_l2_m2 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m2.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_no_links_distribution_l2_m2 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m2.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :off) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_links_distribution_l2_m3 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m3.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 3) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_no_links_distribution_l2_m3 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m3.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 3, distribute: :off) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_links_distribution_l3_m2 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l3.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l3.m2.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_no_links_distribution_l3_m2 - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l3.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l3.m2.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :off) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_muliply_without_rgfatools - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l3.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l3.m2.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply(:"1", 2) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_distribution_policy_equal_with_equal - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m2.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :equal) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_distribution_policy_equal_with_not_equal - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l3.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l3.m2.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :equal) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_distribution_policy_B - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m2.no_ld.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :L) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - - def test_distribution_policy_E - ["gfa", "gfa2"].each do |sfx| - g1 = RGFA.from_file("testdata/links_distri.l2.#{sfx}") - g2 = RGFA.from_file("testdata/links_distri.l2.m2.#{sfx}") - assert_not_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_not_equal(g2.dovetails.map(&:to_s).sort, - g1.dovetails.map(&:to_s).sort) - g1.multiply_extended(:"1", 2, distribute: :R) - assert_equal(g2.segment_names.sort,g1.segment_names.sort) - assert_equal(g2.dovetails.map(&:to_s).sort, g1.dovetails.map(&:to_s).sort) - end - end - -end diff --git a/test/test_api_placeholders.rb b/test/test_api_placeholders.rb deleted file mode 100644 index 866a0cc..0000000 --- a/test/test_api_placeholders.rb +++ /dev/null @@ -1,45 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Placeholders < Test::Unit::TestCase - - @@p = RGFA::Placeholder.new - - def test_to_s - assert_equal("*", @@p.to_s) - end - - def test_subclasses - assert_equal(@@p, RGFA::Alignment::Placeholder.new) - end - - def test_is_placeholder - assert(@@p.placeholder?) - assert("*".placeholder?) - assert(!"1".placeholder?) - assert(:*.placeholder?) - assert(!:**.placeholder?) - assert(!1.placeholder?) - assert(!1.0.placeholder?) - assert([].placeholder?) - assert(![:x].placeholder?) - assert(RGFA::Alignment::Placeholder.new.placeholder?) - end - - def test_compatibility_methods - # array/string - assert(@@p.empty?) - assert(@@p[1].placeholder?) - assert(@@p[0..-1].placeholder?) - assert_equal(0, @@p.length) - assert_equal(0, @@p.size) - # sequence - assert(@@p.rc.placeholder?) - # integer - assert((@@p + 1).placeholder?) - # validation - assert_nothing_raised {@@p.validate} - end - -end diff --git a/test/test_api_positionals.rb b/test/test_api_positionals.rb deleted file mode 100644 index adf73d1..0000000 --- a/test/test_api_positionals.rb +++ /dev/null @@ -1,262 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Positionals < Test::Unit::TestCase - - @@s = { - :S1 => "S\t1\t*", - :L => "L\t1\t+\t2\t+\t*", - :C => "C\t1\t+\t2\t+\t10\t*", - :P => "P\tx\t1+,2+\t*", - :S2 => "S\t2\t100\t*", - :E => "E\t*\t1+\t2+\t10\t20\t30\t40\t*", - :F => "F\t1\t5+\t11\t21\t31\t41\t*", - :G => "G\t*\t1+\t2+\t1000\t1", - :U => "U\t*\t1 2 3", - :O => "O\t*\t1+ 2+ 3+", - } - @@f = Hash[@@s.map{|k,v|[k,v.split("\t")]}] - @@l = Hash[@@s.map{|k,v|[k,v.to_rgfa_line]}] - - @@fieldnames = { - :S1 => [:name, :sequence], - :L => [:from_segment, :from_orient, :to_segment, :to_orient, :overlap], - :C => [:from_segment, :from_orient, :to_segment, :to_orient, :pos, :overlap], - :P => [:path_name, :segment_names, :overlaps], - :S2 => [:sid, :slen, :sequence], - :E => [:eid, :sid1, :sid2, :beg1, :end1, :beg2, :end2, :alignment], - :F => [:sid, :external, :s_beg, :s_end, :f_beg, :f_end, :alignment], - :G => [:gid, :sid1, :sid2, :disp, :var], - :U => [:pid, :items], - :O => [:pid, :items], - } - - # alternative values to set tests - @@v1 = { - :S1 => {:name => :sx, :sequence => "accg"}, - :L => {:from_segment => :a1, :from_orient => :-, :to_segment => :a2, :to_orient => :-, - :overlap => "12M".to_alignment}, - :C => {:from_segment => :cx, :from_orient => :-, :to_segment => :cy, :to_orient => :-, - :pos => 123, :overlap => "120M".to_alignment}, - :P => {:path_name => :px, :segment_names => [OL[:x,:+], OL[:y,:-]], - :overlaps => ["10M".to_alignment]}, - :S2 => {:sid => :s2s, :slen => 999, :sequence => "gggg"}, - :E => {:eid => :e2e, :sid1 => OL[:s2s,:-], - :sid2 => OL[:t2t,:-], - :beg1 => 0, :end1 => "100$".to_pos, - :beg2 => 10, :end2 => "110$".to_pos, - :alignment => "10M1I10M1D80M".to_alignment}, - :F => {:sid => :s2s, :external => OL[:ex2ex,:-], - :s_beg => 0, :s_end => "100$".to_pos, - :f_beg => 10, :f_end => "110$".to_pos, - :alignment => "10M1I10M1D80M".to_alignment}, - :G => {:gid => :g2g, :sid1 => OL[:s2s,:+], :sid2 => OL[:t2t,:-], - :disp => 2000, :var => 100}, - :O => {:pid => :O100, :items => [OL[:x1,:+], - OL[:x2,:+], - OL[:x3,:-]]}, - :U => {:pid => :U100, :items => [:x1, :x2, :x3]}, - } - @@v2 = { - :S1 => {:name => :xs, :sequence => "aggc"}, - :L => {:from_segment => :a5, :from_orient => :+, :to_segment => :a7, :to_orient => :+, - :overlap => "9M3I3M".to_alignment}, - :C => {:from_segment => :cp, :from_orient => :+, :to_segment => :cl, :to_orient => :+, - :pos => 213, :overlap => "110M4D10M".to_alignment}, - :P => {:path_name => :pu, :segment_names => [OL[:k,:-], - OL[:l,:+]], :overlaps => ["11M".to_alignment]}, - :S2 => {:sid => :s4s, :slen => 1999, :sequence => "aaaa"}, - :E => {:eid => :e4e, :sid1 => OL[:s4s,:+], - :sid2 => OL[:t4t,:+], - :beg1 => 10, :end1 => "110$".to_pos, - :beg2 => 0, :end2 => "100$".to_pos, - :alignment => "10M1I20M1D80M".to_alignment}, - :F => {:sid => :s4s, :external => OL[:ex4ex, :+], - :s_beg => 10, :s_end => "110$".to_pos, - :f_beg => 0, :f_end => "100$".to_pos, - :alignment => "10M1I20M1D80M".to_alignment}, - :G => {:gid => :g4g, :sid1 => OL[:s4s,:-], :sid2 => OL[:t4t,:+], - :disp => 3000, :var => 200}, - :O => {:pid => :O200, :items => [OL[:x7,:-], - OL[:x6,:+], - OL[:x3,:+]]}, - :U => {:pid => :U200, :items => [:x6, :x7, :x4]}, - } - @@aliases = { - :S1 => {:name => :sid}, :P => {:path_name => :name}, - :S2 => {:sid => :name}, :E => {:eid => :name}, :G => {:gid => :name}, - :U => {:pid => :name}, :O => {:pid => :name}, - :L => {:from_segment => :from, :to_segment => :to}, - :C => {:from_segment => :container, :from_orient => :container_orient, - :to_segment => :contained, :to_orient => :contained_orient} - } - - def test_number_of_positionals - @@f.each do |rt, fields| - assert_nothing_raised { fields.to_rgfa_line } - too_less = fields.clone; too_less.pop - assert_raise(RGFA::FormatError) { too_less.to_rgfa_line } - too_many = fields.clone; too_many << "*" - assert_raise(RGFA::FormatError) { too_many.to_rgfa_line } - end - end - - def test_positional_fieldnames - @@l.each do |rt, line| - assert_equal(@@fieldnames[rt], line.positional_fieldnames) - end - end - - def test_field_getters_and_setters - @@fieldnames.each do |rt, fn_list| - fn_list.each_with_index do |fn, i| - i+=1 # skip record_type - # field_to_s() - assert_equal(@@f[rt][i], @@l[rt].field_to_s(fn)) - # validate_field/validate - assert_nothing_raised { @@l[rt].validate_field(fn) } - assert_nothing_raised { @@l[rt].validate } - # fieldname() == get(fieldname) - assert_equal(@@l[rt].send(fn), @@l[rt].get(fn)) - # fieldname=() and fieldname() - l = @@l[rt].clone - l.send("#{fn}=", @@v1[rt][fn]) - assert_equal(@@v1[rt][fn], l.send(fn)) - # set() and get() - l.set(fn, @@v2[rt][fn]) - assert_equal(@@v2[rt][fn], l.get(fn)) - end - end - end - - def test_aliases - @@aliases.each do |rt, aliasmap| - aliasmap.each do |orig, al| - # get(orig) == get(alias) - assert_equal(@@l[rt].send(orig), @@l[rt].send(al)) - assert_equal(@@l[rt].get(orig), @@l[rt].get(al)) - # validate_field/validate - assert_nothing_raised { @@l[rt].validate_field(al) } - assert_nothing_raised { @@l[rt].validate } - # field_to_s(orig) == field_to_s(alias) - assert_equal(@@l[rt].field_to_s(orig), @@l[rt].field_to_s(al)) - # set(al, value) + get(orig) - l = @@l[rt].clone - assert_not_equal(@@v1[rt][orig], l.send(orig)) - l.set(al, @@v1[rt][orig]) - assert_equal(@@v1[rt][orig], l.send(orig)) - # alias=value + orig() - assert_not_equal(@@v2[rt][orig], l.send(orig)) - l.send(:"#{al}=", @@v2[rt][orig]) - assert_equal(@@v2[rt][orig], l.send(orig)) - # set(orig, value) + get(alias) - assert_not_equal(@@v1[rt][orig], l.send(al)) - l.set(orig, @@v1[rt][orig]) - assert_equal(@@v1[rt][orig], l.send(al)) - # orig=value + alias() - assert_not_equal(@@v2[rt][orig], l.send(al)) - l.send(:"#{orig}=", @@v2[rt][orig]) - assert_equal(@@v2[rt][orig], l.send(al)) - end - end - end - - def test_array_fields - assert_kind_of(Array, @@l[:P].segment_names) - assert_kind_of(RGFA::OrientedLine, @@l[:P].segment_names.first) - assert_kind_of(Array, @@l[:P].overlaps) - assert_kind_of(RGFA::Alignment::Placeholder, @@l[:P].overlaps.first) - assert_kind_of(Array, @@l[:O].items) - assert_kind_of(RGFA::OrientedLine, @@l[:O].items.first) - assert_kind_of(Array, @@l[:U].items) - assert_kind_of(Symbol, @@l[:U].items.first) - end - - def test_orientation - # orientation is symbol - assert_equal(:+, @@l[:L].from_orient) - assert_equal(:+, @@l[:L].to_orient) - # invert - assert_equal(:-, @@l[:L].to_orient.invert) - assert_equal(:+, :-.invert) - assert_equal(:-, :+.invert) - # string representation - assert_equal("+", @@l[:L].field_to_s(:from_orient)) - # invert does not work with string representation - assert_raise(NoMethodError) {"+".invert} - # assigning the string representation - l = @@l[:L].clone - l.from_orient = "+" - assert_equal(:+, l.from_orient) - assert_equal(:-, l.from_orient.invert) - # non :+/:- symbols is an error - assert_raises(RGFA::FormatError) {l.from_orient = :x; l.validate} - # only :+/:- and their string representations are accepted - assert_raises(RGFA::FormatError) {l.from_orient = "x"; l.validate} - assert_raises(RGFA::FormatError) {l.from_orient = 1; l.validate} - end - - def test_oriented_segment - os = @@l[:P].segment_names.first - # getter methods - assert_equal(:"1", os.line) - assert_equal(:+, os.orient) - # invert - assert_equal(:"1", os.invert.line) - assert_equal(:-, os.invert.orient) - assert_equal(:-, os.orient.invert) - # setter methods - os.line = :"one" - os.orient = :- - assert_equal(:"one", os.line) - assert_equal(:-, os.orient) - # name - assert_equal(:"one", os.name) - os.line = @@l[:S1] - assert_equal(@@l[:S1], os.line) - assert_equal(@@l[:S1].name, os.name) - end - - def test_sequence - # placeholder - assert(@@l[:S1].sequence.placeholder?) - assert(@@l[:S2].sequence.placeholder?) - s = @@l[:S1].clone - s.sequence = "ACCT" - assert(!s.sequence.placeholder?) - # sequence is string - assert_equal("ACCT", s.sequence) - # rc - assert_equal("AGGT", s.sequence.rc) - # GFA2 allowed alphabet is larger than GFA1 - assert_nothing_raised { s.validate } - s.sequence = ";;;{}" - assert_raises(RGFA::FormatError) { s.validate } - s = @@l[:S2].clone - s.sequence = ";;;{}" - assert_nothing_raised { s.validate } - # to_sequence - assert_kind_of(RGFA::Placeholder,"*".to_sequence) - assert_kind_of(String,"ACG".to_sequence) - end - - def test_sequence_rc - assert_equal("gcatcgatcgt","acgatcgatgc".rc) - # case - assert_equal("gCaTCgatcgt","acgatcGAtGc".rc) - # wildcards - assert_equal("gcatcnatcgt","acgatngatgc".rc) - assert_equal("gcatcYatcgt","acgatRgatgc".rc) - # RNA - assert_equal("gcaucgaucgu","acgaucgaugc".rc(rna: true)) - assert_equal("===.",".===".rc) - # valid - assert_raises(RGFA::ValueError){"acgatXgatgc".rc} - assert_nothing_raised{"acgatXgatgc".rc(valid: true)} - # placeholder - assert_equal("*","*".rc) - assert_raises(RGFA::ValueError){"**".rc} - end - -end diff --git a/test/test_api_positions.rb b/test/test_api_positions.rb deleted file mode 100644 index a433b6c..0000000 --- a/test/test_api_positions.rb +++ /dev/null @@ -1,67 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Positions < Test::Unit::TestCase - - def test_positions - # from string and integer - pos1 = 12.to_lastpos; pos2 = "12$".to_pos - assert_equal(pos1, pos2) - assert_kind_of(RGFA::LastPos, pos1) - assert_kind_of(RGFA::LastPos, pos2) - # value - assert_equal(12, pos1.value) - assert_equal(12, pos2.value) - assert_equal(12, 12.value) - # to_pos on string without dollar - assert_equal(12, "12".to_pos) - assert_kind_of(Integer, "12".to_pos) - # to pos: wrong format - assert_raise (RGFA::FormatError) { "12=".to_pos } - # 0$ is allowed, although unclear if useful - assert("0$".to_pos.last?) - # comparison with integer and string - assert_equal(RGFA::LastPos.new(10), 10) - assert_equal(10, RGFA::LastPos.new(10)) - # to_s - assert_equal("12$", pos1.to_s) - # to_i - assert_equal(12, pos1.to_i) - end - - def test_positions_negative - # negative values - assert_raise (RGFA::ValueError) { "-1".to_pos } - assert_raise (RGFA::ValueError) { "-1$".to_pos } - # negative values, valid: true - assert_equal(-1, "-1".to_pos(valid: true)) - assert_kind_of(Integer, "-1".to_pos(valid: true)) - assert_equal(RGFA::LastPos.new(-1), "-1$".to_pos(valid: true)) - assert_equal(RGFA::LastPos.new(-1), -1.to_lastpos(valid: true)) - # validate - assert_raise (RGFA::ValueError) {"-1$".to_pos(valid: true).validate} - assert_raise (RGFA::ValueError) {-1.to_lastpos(valid: true).validate} - end - - def test_positions_first_last - # first? and last? - assert(!"0".to_pos.last?) - assert(!"12".to_pos.last?) - assert("12$".to_pos.last?) - assert("0".to_pos.first?) - assert(!"12".to_pos.first?) - assert(!"12$".to_pos.first?) - end - - def test_positions_subtract - a = "13$".to_pos - a1 = a - 0 - a2 = a - 1 - assert_equal(13, a1) - assert_equal(12, a2) - assert(a1.last?) - assert(!a2.last?) - end - -end diff --git a/test/test_api_references_edges_gfa1.rb b/test/test_api_references_edges_gfa1.rb deleted file mode 100644 index c0a66ac..0000000 --- a/test/test_api_references_edges_gfa1.rb +++ /dev/null @@ -1,125 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesEdgesGFA1 < Test::Unit::TestCase - - def test_links_references - g = RGFA.new - lab = "L\ta\t+\tb\t+\t*".to_rgfa_line - assert_equal(:a, lab.from) - assert_equal(:b, lab.to) - g << lab - g << (sa = "S\ta\t*".to_rgfa_line) - g << (sb = "S\tb\t*".to_rgfa_line) - assert_equal(sa, lab.from) - assert_equal(sb, lab.to) - lab.disconnect - assert_equal(:a, lab.from) - assert_equal(:b, lab.to) - # disconnection of segment cascades on links - g << lab - assert(lab.connected?) - assert_equal(sa, lab.from) - sa.disconnect - assert(!lab.connected?) - assert_equal(:a, lab.from) - end - - def test_links_backreferences - g = RGFA.new - g << (sa = "S\ta\t*".to_rgfa_line) - # links - s = {}; l = {} - [:b, :c, :d, :e, :f, :g, :h, :i].each do |name| - g << (s[name] = "S\t#{name}\t*".to_rgfa_line) - end - ["a+b+", "a+c-", "a-d+", "a-e-", - "f+a+", "g+a-", "h-a+", "i-a-"].each do |name| - g << (l[name] = name.chars.unshift("L").push("*").join("\t").to_rgfa_line) - end - # dovetails_[LR]() - assert_equal([l["a+b+"], l["a+c-"], - l["g+a-"], l["i-a-"]], sa.dovetails_R) - assert_equal([l["a-d+"], l["a-e-"], - l["f+a+"], l["h-a+"]], sa.dovetails_L) - # dovetails_of_end() - assert_equal(sa.dovetails_R, sa.dovetails_of_end(:R)) - assert_equal(sa.dovetails_L, sa.dovetails_of_end(:L)) - assert_equal(sa.dovetails_L + sa.dovetails_R, sa.dovetails) - # neighbours - assert_equal([:b, :c, :d, :e, :f, :g, :h, :i].sort, - sa.neighbours.map(&:name).sort) - # gfa2 specific collections are empty in gfa1 - assert_equal([], sa.gaps) - assert_equal([], sa.fragments) - assert_equal([], sa.internals) - # upon disconnection - sa.disconnect - assert_equal([], sa.dovetails_R) - assert_equal([], sa.dovetails_R) - assert_equal([], sa.dovetails_of_end(:L)) - assert_equal([], sa.dovetails_of_end(:R)) - assert_equal([], sa.dovetails) - assert_equal([], sa.neighbours) - end - - def test_containments_references - g = RGFA.new - cab = "C\ta\t+\tb\t+\t10\t*".to_rgfa_line - assert_equal(:a, cab.from) - assert_equal(:b, cab.to) - g << (sa = "S\ta\t*".to_rgfa_line) - g << (sb = "S\tb\t*".to_rgfa_line) - g << cab - assert_equal(sa, cab.from) - assert_equal(sb, cab.to) - cab.disconnect - assert_equal(:a, cab.from) - assert_equal(:b, cab.to) - # disconnection of segment cascades on containments - g << cab - assert(cab.connected?) - assert_equal(sa, cab.from) - sa.disconnect - assert(!cab.connected?) - assert_equal(:a, cab.from) - end - - def test_containments_backreferences - g = RGFA.new - g << (sa = "S\ta\t*".to_rgfa_line) - # containments: - s = {}; c = {} - [:b, :c, :d, :e, :f, :g, :h, :i].each do |name| - g << (s[name] = "S\t#{name}\t*".to_rgfa_line) - end - ["a+b+", "a+c-", "a-d+", "a-e-", - "f+a+", "g+a-", "h-a+", "i-a-"].each do |name| - g << (c[name] = (["C"]+name.chars+["10","*"]).join("\t").to_rgfa_line) - end - # edges to contained/containers - assert_equal([c["a+b+"], c["a+c-"], - c["a-d+"], c["a-e-"]], sa.edges_to_contained) - assert_equal([c["f+a+"], c["g+a-"], - c["h-a+"], c["i-a-"]], sa.edges_to_containers) - # containments - assert_equal(sa.edges_to_contained + sa.edges_to_containers, - sa.containments) - # contained/containers - assert_equal([s[:b], s[:c], s[:d], s[:e]], sa.contained) - assert_equal([s[:f], s[:g], s[:h], s[:i]], sa.containers) - # gfa2 specific collections are empty in gfa1 - assert_equal([], sa.gaps) - assert_equal([], sa.fragments) - assert_equal([], sa.internals) - # upon disconnection - sa.disconnect - assert_equal([], sa.edges_to_contained) - assert_equal([], sa.edges_to_containers) - assert_equal([], sa.containments) - assert_equal([], sa.contained) - assert_equal([], sa.containers) - end - -end diff --git a/test/test_api_references_edges_gfa2.rb b/test/test_api_references_edges_gfa2.rb deleted file mode 100644 index a350dd9..0000000 --- a/test/test_api_references_edges_gfa2.rb +++ /dev/null @@ -1,220 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesEdges < Test::Unit::TestCase - - def test_edges_references - g = RGFA.new - lab = "E\t*\ta+\tb+\t0\t10\t90\t100$\t*".to_rgfa_line - assert_equal(OL[:a,:+], lab.sid1) - assert_equal(OL[:b,:+], lab.sid2) - g << (sa = "S\ta\t100\t*".to_rgfa_line) - g << (sb = "S\tb\t100\t*".to_rgfa_line) - g << lab - assert_equal(sa, lab.sid1.line) - assert_equal(sb, lab.sid2.line) - lab.disconnect - assert_equal(:a, lab.sid1.line) - assert_equal(:b, lab.sid2.line) - # disconnection of segment cascades on edges - g << lab - assert(lab.connected?) - assert_equal(sa, lab.sid1.line) - sa.disconnect - assert(!lab.connected?) - assert_equal(:a, lab.sid1.line) - end - - def test_edges_backreferences - g = RGFA.new - g << (sa = "S\ta\t100\t*".to_rgfa_line) - s = {} - {"0"=>0,"1"=>30,"2"=>70,"$"=>"100$".to_pos}.each do |sbeg1, beg1| - {"0"=>0,"1"=>30,"2"=>70,"$"=>"100$".to_pos}.each do |send1, end1| - next if beg1 > end1 - {"0"=>0,"1"=>30,"2"=>70,"$"=>"100$".to_pos}.each do |sbeg2, beg2| - {"0"=>0,"1"=>30,"2"=>70,"$"=>"100$".to_pos}.each do |send2, end2| - next if beg2 > end2 - [:+,:-].each do |or1| - [:+,:-].each do |or2| - eid = "<#{or1}#{or2}#{sbeg1}#{send1}#{sbeg2}#{send2}" - other = "s#{eid}" - g << ["E",eid,"a#{or1}","#{other}#{or2}", - beg1,end1,beg2,end2,"*"].join("\t") - g << (s[other] = "S\t#{other}\t100\t*".to_rgfa_line) - eid = ">#{or1}#{or2}#{sbeg1}#{send1}#{sbeg2}#{send2}" - other = "s#{eid}" - g << ["E",eid,"#{other}#{or1}","a#{or2}", - beg1,end1,beg2,end2,"*"].join("\t") - g << (s[other] = "S\t#{other}\t100\t*".to_rgfa_line) - end - end - end - end - end - end - exp_sa_d_L = [] - exp_sa_d_R = [] - exp_sa_e_cr = [] - exp_sa_e_cd = [] - exp_sa_i = [] - # a from 0 to non-$, other from non-0 to $; - # same orientation; => d_L - # opposite orientations; => internals - ["0","1","2"].each do |e_a| - ["1","2","$"].each do |b_other| - ["++","--"].each do |ors| - exp_sa_d_L << "<#{ors}0#{e_a}#{b_other}$".to_sym - exp_sa_d_L << ">#{ors}#{b_other}$0#{e_a}".to_sym - end - ["+-","-+"].each do |ors| - exp_sa_i << "<#{ors}0#{e_a}#{b_other}$".to_sym - exp_sa_i << ">#{ors}#{b_other}$0#{e_a}".to_sym - end - end - end - # one from non-0 to non-$, other non-0 to non-$; => internals - ["11","12","22"].each do |pos_one| - ["11","12","22"].each do |pos_other| - ["++","--","+-","-+"].each do |ors| - ["<",">"].each do |d| - exp_sa_i << "#{d}#{ors}#{pos_one}#{pos_other}".to_sym - end - end - end - end - # one from non-0 to non-$, other 0 to non-$; => internals - ["11","12","22"].each do |pos_one| - ["00","01","02"].each do |pos_other| - ["++","--","+-","-+"].each do |ors| - ["<",">"].each do |d| - exp_sa_i << "#{d}#{ors}#{pos_one}#{pos_other}".to_sym - exp_sa_i << "#{d}#{ors}#{pos_other}#{pos_one}".to_sym - end - end - end - end - # one from non-0 to non-$, other non-0 to $; => internals - ["11","12","22"].each do |pos_one| - ["1$","2$","$$"].each do |pos_other| - ["++","--","+-","-+"].each do |ors| - ["<",">"].each do |d| - exp_sa_i << "#{d}#{ors}#{pos_one}#{pos_other}".to_sym - exp_sa_i << "#{d}#{ors}#{pos_other}#{pos_one}".to_sym - end - end - end - end - # other from 0 to non-$, a from non-0 to $ - # same orientation; => d_R - # opposite orientations; => internals - ["0","1","2"].each do |e_other| - ["1","2","$"].each do |b_a| - ["++","--"].each do |ors| - exp_sa_d_R << "<#{ors}#{b_a}$0#{e_other}".to_sym - exp_sa_d_R << ">#{ors}0#{e_other}#{b_a}$".to_sym - end - ["+-","-+"].each do |ors| - exp_sa_i << "<#{ors}#{b_a}$0#{e_other}".to_sym - exp_sa_i << ">#{ors}0#{e_other}#{b_a}$".to_sym - end - end - end - # both from 0 to non-$, - # opposite orientations; => d_L - # same orientation; => internals - ["0","1","2"].each do |e1| - ["0","1","2"].each do |e2| - pos = "0#{e1}0#{e2}" - ["+-","-+"].each do |ors| - ["<",">"].each do |d| - exp_sa_d_L << "#{d}#{ors}#{pos}".to_sym - end - end - ["++","--"].each do |ors| - ["<",">"].each do |d| - exp_sa_i << "#{d}#{ors}#{pos}".to_sym - end - end - end - end - # both from non-0 to $, - # opposite orientations; => d_R - # same orientation; => internals - ["1","2","$"].each do |e1| - ["1","2","$"].each do |e2| - pos = "#{e1}$#{e2}$" - ["+-","-+"].each do |ors| - ["<",">"].each do |d| - exp_sa_d_R << "#{d}#{ors}#{pos}".to_sym - end - end - ["++","--"].each do |ors| - ["<",">"].each do |d| - exp_sa_i << "#{d}#{ors}#{pos}".to_sym - end - end - end - end - # a whole; other non-whole => edges_to_containers - ["00","01","02","11","12","1$","22","2$","$$"].each do |pos_other| - ["++","--","+-","-+"].each do |ors| - exp_sa_e_cr << "<#{ors}0$#{pos_other}".to_sym - exp_sa_e_cr << ">#{ors}#{pos_other}0$".to_sym - end - end - # a not-whole; other whole => edges_to_contained - ["00","01","02","11","12","1$","22","2$","$$"].each do |pos_a| - ["++","--","+-","-+"].each do |ors| - exp_sa_e_cd << "<#{ors}#{pos_a}0$".to_sym - exp_sa_e_cd << ">#{ors}0$#{pos_a}".to_sym - end - end - # a sid1; both whole => edges_to_contained - ["++","--","+-","-+"].each do |ors| - exp_sa_e_cd << "<#{ors}0$0$".to_sym - end - # a sid2; both whole => edges_to_containers - ["++","--","+-","-+"].each do |ors| - exp_sa_e_cr << ">#{ors}0$0$".to_sym - end - # dovetails_[LR] - assert_equal(exp_sa_d_L.sort,sa.dovetails_L.map(&:name).sort) - assert_equal(exp_sa_d_R.sort,sa.dovetails_R.map(&:name).sort) - # dovetails_of_end() - assert_equal(sa.dovetails_L,sa.dovetails_of_end(:L)) - assert_equal(sa.dovetails_R,sa.dovetails_of_end(:R)) - assert_equal((sa.dovetails_L + sa.dovetails_R),sa.dovetails) - # neighbours - assert_equal((exp_sa_d_L+exp_sa_d_R).map{|eid|:"s#{eid}"}.sort, - sa.neighbours.map(&:name).sort) - # edges_to_containers/contained - assert_equal(exp_sa_e_cr.sort,sa.edges_to_containers.map(&:name).sort) - assert_equal(exp_sa_e_cd.sort,sa.edges_to_contained.map(&:name).sort) - # containments - assert_equal((exp_sa_e_cr+exp_sa_e_cd).sort, - sa.containments.map(&:name).sort) - # contained/containers - assert_equal(exp_sa_e_cr.map{|eid|:"s#{eid}"}.sort, - sa.containers.map(&:name).sort) - assert_equal(exp_sa_e_cd.map{|eid|:"s#{eid}"}.sort, - sa.contained.map(&:name).sort) - # internals - assert_equal(exp_sa_i.sort, sa.internals.map(&:name).sort) - # upon disconnection - sa.disconnect - assert_equal([], sa.dovetails_L) - assert_equal([], sa.dovetails_R) - assert_equal([], sa.dovetails_of_end(:L)) - assert_equal([], sa.dovetails_of_end(:R)) - assert_equal([], sa.neighbours) - assert_equal([], sa.edges_to_containers) - assert_equal([], sa.edges_to_contained) - assert_equal([], sa.containments) - assert_equal([], sa.contained) - assert_equal([], sa.containers) - assert_equal([], sa.internals) - end - -end diff --git a/test/test_api_references_edit.rb b/test/test_api_references_edit.rb deleted file mode 100644 index 21610e3..0000000 --- a/test/test_api_references_edit.rb +++ /dev/null @@ -1,17 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesEdit < Test::Unit::TestCase - - # XXX - def test_reference_fields_editing - # for each kind of line - # for each reference field (hard code here) - # test that editing is allowed when line is not connected - # test that editing is blocked when line is connected - # test disconnection-editing-reconnection - # test other editing possibilities if any (see manual) - end - -end diff --git a/test/test_api_references_f_g_lines.rb b/test/test_api_references_f_g_lines.rb deleted file mode 100644 index 5766215..0000000 --- a/test/test_api_references_f_g_lines.rb +++ /dev/null @@ -1,96 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesFGLines < Test::Unit::TestCase - - def test_fragments_references - g = RGFA.new - f = "F\ta\tf+\t0\t200\t281\t502$\t*".to_rgfa_line - assert_equal(:a, f.sid) - assert_equal(OL[:f,:+], f.external) - g << (sa = "S\ta\t100\t*".to_rgfa_line) - g << f - assert_equal(sa, f.sid) - f.disconnect - assert_equal(:a, f.sid) - # disconnection of segment cascades on fragments - g << f - assert(f.connected?) - assert_equal(sa, f.sid) - sa.disconnect - assert(!f.connected?) - assert_equal(:a, f.sid) - end - - def test_fragments_backreferences - g = RGFA.new - f1 = "F\ta\tf+\t0\t200\t281\t502$\t*".to_rgfa_line - f2 = "F\ta\tf+\t240\t440$\t0\t210\t*".to_rgfa_line - g << (sa = "S\ta\t100\t*".to_rgfa_line) - g << f1 - g << f2 - assert_equal([f1,f2], sa.fragments) - # disconnection effects - f1.disconnect - assert_equal([f2], sa.fragments) - sa.disconnect - assert_equal([], sa.fragments) - end - - def test_gap_references - g = RGFA.new - gap = "G\t*\ta+\tb+\t90\t*".to_rgfa_line - assert_equal(OL[:a,:+], gap.sid1) - assert_equal(OL[:b,:+], gap.sid2) - g << (sa = "S\ta\t100\t*".to_rgfa_line) - g << (sb = "S\tb\t100\t*".to_rgfa_line) - g << gap - assert_equal(sa, gap.sid1.line) - assert_equal(sb, gap.sid2.line) - gap.disconnect - assert_equal(:a, gap.sid1.line) - assert_equal(:b, gap.sid2.line) - # disconnection of segment cascades on gaps - g << gap - assert(gap.connected?) - assert_equal(sa, gap.sid1.line) - sa.disconnect - assert(!gap.connected?) - assert_equal(:a, gap.sid1.line) - end - - def test_gaps_backreferences - g = RGFA.new - g << (sa = "S\ta\t100\t*".to_rgfa_line) - # gaps - s = {}; gap = {} - [:b, :c, :d, :e, :f, :g, :h, :i].each do |name| - g << (s[name] = "S\t#{name}\t100\t*".to_rgfa_line) - end - ["a+b+", "a+c-", "a-d+", "a-e-", - "f+a+", "g+a-", "h-a+", "i-a-"].each do |name| - g << (gap[name] = - ["G","*",name[0..1],name[2..3],200,"*"].join("\t").to_rgfa_line) - end - # gaps_[LR]() - assert_equal([gap["a-d+"], gap["a-e-"], gap["f+a+"], gap["h-a+"]], - sa.gaps_L) - assert_equal([gap["a+b+"], gap["a+c-"], gap["g+a-"], gap["i-a-"]], - sa.gaps_R) - # gaps_of_end() - assert_equal(sa.gaps_L, sa.gaps_of_end(:L)) - assert_equal(sa.gaps_R, sa.gaps_of_end(:R)) - assert_equal(sa.gaps_L + sa.gaps_R, sa.gaps) - # disconnection effects - gap["a-d+"].disconnect - assert_equal([gap["a-e-"], gap["f+a+"], gap["h-a+"]], sa.gaps_L) - sa.disconnect - assert_equal([], sa.gaps_L) - assert_equal([], sa.gaps_R) - assert_equal([], sa.gaps_of_end(:L)) - assert_equal([], sa.gaps_of_end(:R)) - assert_equal([], sa.gaps) - end - -end diff --git a/test/test_api_references_groups.rb b/test/test_api_references_groups.rb deleted file mode 100644 index cd919df..0000000 --- a/test/test_api_references_groups.rb +++ /dev/null @@ -1,247 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesGroups < Test::Unit::TestCase - - def test_paths_references - g = RGFA.new - s = {}; l = {} - [:a, :b, :c, :d, :e, :f].each do |name| - g << (s[name] = "S\t#{name}\t*".to_rgfa_line) - end - path = "P\tp1\tf+,a+,b+,c-,e+\t*".to_rgfa_line - assert_equal([OL[:f,:+], OL[:a,:+], OL[:b,:+], OL[:c,:-], - OL[:e,:+]], path.segment_names) - assert_equal([], path.links) - # connection - g << path - # add links - ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"].each do |name| - g << (l[name] = name.chars.unshift("L").push("*").join("\t").to_rgfa_line) - end - # segment_names - assert_equal([OL[s[:f],:+], OL[s[:a],:+], OL[s[:b],:+], OL[s[:c],:-], - OL[s[:e],:+]], path.segment_names) - # links - assert_equal([OL[l["a-f-"],:-], OL[l["a+b+"],:+], OL[l["b+c-"],:+], - OL[l["e-c+"],:-]], path.links) - # path disconnection - path.disconnect - assert_equal([OL[:f,:+], OL[:a,:+], OL[:b,:+], OL[:c,:-], OL[:e,:+]], - path.segment_names) - assert_equal([], path.links) - g << path - # links disconnection cascades on paths: - assert(path.connected?) - l["a-f-"].disconnect - assert(!path.connected?) - assert_equal([OL[:f,:+], OL[:a,:+], OL[:b,:+], OL[:c,:-], OL[:e,:+]], - path.segment_names) - g << path - g << l["a-f-"] - # segment disconnection cascades on links and then paths: - assert(path.connected?) - s[:a].disconnect - assert(!path.connected?) - assert_equal([OL[:f,:+], OL[:a,:+], OL[:b,:+], OL[:c,:-], OL[:e,:+]], - path.segment_names) - assert_equal([], path.links) - end - - def test_paths_backreferences - g = RGFA.new - s = {}; l = {} - [:a, :b, :c, :d, :e, :f].each do |name| - g << (s[name] = "S\t#{name}\t*".to_rgfa_line) - end - g << (path = "P\tp1\tf+,a+,b+,c-,e+\t*".to_rgfa_line) - [:a, :b, :c, :e, :f].each do |sname| - assert_equal([path], s[sname].paths) - end - assert_equal([], s[:d].paths) - ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"].each do |name| - g << (l[name] = name.chars.unshift("L").push("*").join("\t").to_rgfa_line) - end - ["a+b+", "b+c-", "e-c+", "a-f-"].each do |lname| - assert_equal([path], l[lname].paths) - end - assert_equal([], l["c-d+"].paths) - # disconnection effects - path.disconnect - ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"].each do |lname| - assert_equal([], l[lname].paths) - end - [:a, :b, :c, :d, :e, :f].each do |sname| - assert_equal([], s[sname].paths) - end - # reconnection - path.connect(g) - [:a, :b, :c, :e, :f].each do |sname| - assert_equal([path], s[sname].paths) - end - assert_equal([], s[:d].paths) - ["a+b+", "b+c-", "e-c+", "a-f-"].each do |lname| - assert_equal([path], l[lname].paths) - end - assert_equal([], l["c-d+"].paths) - end - - def test_gfa2_paths_references - g = RGFA.new - s = {} - [:a, :b, :c, :d, :e, :f].each do |name| - g << (s[name] = "S\t#{name}\t1000\t*".to_rgfa_line) - end - path1_part1 = "O\tp1\tp2- b+".to_rgfa_line - path1_part2 = "O\tp1\tc- e-c+-".to_rgfa_line - path1 = path1_part2 - path2 = "O\tp2\tf+ a+".to_rgfa_line - assert_equal([OL[:p2,:-], OL[:b,:+]], path1_part1.items) - assert_equal([OL[:c,:-], OL[:"e-c+",:-]], path1_part2.items) - assert_equal([OL[:f,:+], OL[:a,:+]], path2.items) - assert_raise(RGFA::RuntimeError){path1.captured_path} - assert_raise(RGFA::RuntimeError){path2.captured_path} - # connection - g << path1_part1 - g << path1_part2 - g << path2 - # edges - e = {} - ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-", "f-b+"].each do |name| - coord1 = name[1] == "+" ? "900\t1000$" : "0\t100" - coord2 = name[3] == "+" ? "0\t100" : "900\t1000$" - g << (e[name] = ("E\t#{name}\t#{name[0..1]}\t#{name[2..3]}\t"+ - "#{coord1}\t#{coord2}\t100M").to_rgfa_line) - end - # items - assert_equal([OL[path2,:-], OL[s[:b],:+], OL[s[:c],:-], OL[e["e-c+"],:-]], - path1.items) - assert_equal([OL[s[:f],:+], OL[s[:a],:+]], path2.items) - # induced set - assert_equal([OL[s[:f],:+], OL[e["a-f-"],:-], OL[s[:a],:+]], - path2.captured_path) - assert_equal([OL[s[:a],:-], OL[e["a-f-"],:+], OL[s[:f],:-], - OL[e["f-b+"],:+], OL[s[:b],:+], OL[e["b+c-"],:+], - OL[s[:c],:-], OL[e["e-c+"],:-], OL[s[:e],:+]], - path1.captured_path) - # backreferences - [path2, s[:b], s[:c], e["e-c+"]].each do |line| - assert_equal([path1], line.paths) - end - [s[:f], s[:a]].each do |line| - assert_equal([path2], line.paths) - end - # group disconnection - path1.disconnect - assert_equal([OL[:p2,:-], OL[:b,:+], OL[:c,:-], OL[:"e-c+",:-]], - path1.items) - assert_raise(RGFA::RuntimeError){path1.captured_path} - assert_equal([OL[s[:f],:+], OL[s[:a],:+]], path2.items) - [path2, s[:b], s[:c], e["e-c+"]].each do |line| - assert_equal([], line.paths) - end - # group reconnection - g << path1 - assert_equal([OL[path2,:-], OL[s[:b],:+], OL[s[:c],:-], OL[e["e-c+"],:-]], - path1.items) - assert_equal([OL[s[:f],:+], OL[s[:a],:+]], path2.items) - [path2, s[:b], s[:c], e["e-c+"]].each do |line| - assert_equal([path1], line.paths) - end - # item disconnection cascades on group - assert(path1.connected?) - assert(path2.connected?) - e["e-c+"].disconnect - assert(!path1.connected?) - assert(path2.connected?) - g << e["e-c+"] - g << path1 - # two-level disconnection cascade - assert(path1.connected?) - assert(path2.connected?) - s[:f].disconnect - assert(!path2.connected?) - assert(!path1.connected?) - end - - def test_sets_references - g = RGFA.new - s = {} - set1 = "U\tset1\tb set2 c e-c+".to_rgfa_line - set2 = "U\tset2\tg c-d+ path1".to_rgfa_line - path1 = "O\tpath1\tf+ a+".to_rgfa_line - assert_equal([:b, :set2, :c, :"e-c+"], set1.items) - assert_equal([:g, :"c-d+", :path1], set2.items) - # induced set of non-connected cannot be computed - assert_raise(RGFA::RuntimeError){set1.induced_set} - assert_raise(RGFA::RuntimeError){set2.induced_set} - # connection - g << set1 - g << set2 - # induced set cannot be computed, as long as not all references are solved - assert_raise(RGFA::RuntimeError) {set1.induced_set} - # connect items - g << path1 - [:a, :b, :c, :d, :e, :f, :g].each do |name| - g << (s[name] = "S\t#{name}\t1000\t*".to_rgfa_line) - end - e = {} - ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"].each do |name| - coord1 = name[1] == "+" ? "900\t1000$" : "0\t100" - coord2 = name[3] == "+" ? "0\t100" : "900\t1000$" - g << (e[name] = ("E\t#{name}\t#{name[0..1]}\t#{name[2..3]}\t"+ - "#{coord1}\t#{coord2}\t100M").to_rgfa_line) - end - # items - assert_equal([s[:b], set2, s[:c], e["e-c+"]], set1.items) - assert_equal([s[:g], e["c-d+"], path1], set2.items) - # induced set - assert_equal([OL[s[:f],:+], OL[s[:a],:+]], - path1.captured_segments) - assert_equal([s[:g], s[:c], s[:d], s[:f], s[:a]], - set2.induced_segments_set) - assert_equal([s[:b], s[:g], s[:c], s[:d], s[:f], s[:a], s[:e]], - set1.induced_segments_set) - assert_equal([e["c-d+"], e["a-f-"]], - set2.induced_edges_set) - assert_equal([e["a+b+"],e["b+c-"],e["c-d+"],e["e-c+"],e["a-f-"]], - set1.induced_edges_set) - assert_equal(set1.induced_segments_set + set1.induced_edges_set, - set1.induced_set) - # backreferences - [s[:b], set2, s[:c], e["e-c+"]].each do |line| - assert_equal([set1], line.sets) - end - [s[:g], e["c-d+"], path1].each do |line| - assert_equal([set2], line.sets) - end - # group disconnection - set1.disconnect - assert_equal([:b, :set2, :c, :"e-c+"], set1.items) - [s[:b], set2, s[:c], e["e-c+"]].each do |line| - assert_equal([], line.sets) - end - # group reconnection - g << set1 - assert_equal([s[:b], set2, s[:c], e["e-c+"]], set1.items) - [s[:b], set2, s[:c], e["e-c+"]].each do |line| - assert_equal([set1], line.sets) - end - # item disconnection cascades on group - assert(set1.connected?) - e["e-c+"].disconnect - assert(!set1.connected?) - g << e["e-c+"] - g << set1 - # multilevel disconnection cascade - assert(path1.connected?) - assert(set2.connected?) - assert(set1.connected?) - s[:f].disconnect - assert(!path1.connected?) - assert(!set2.connected?) - assert(!set1.connected?) - end - -end diff --git a/test/test_api_references_virtual.rb b/test/test_api_references_virtual.rb deleted file mode 100644 index 17920a2..0000000 --- a/test/test_api_references_virtual.rb +++ /dev/null @@ -1,127 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestAPI ||= Module.new -class TestAPI::ReferencesVirtual < Test::Unit::TestCase - - def test_edges_gaps_create_virtual_segments - data = [ - [:gfa1, {:lines => ["L\ta\t+\tb\t-\t*", "C\ta\t-\tb\t+\t100\t*"], - :m1 => :oriented_from, :m2 => :oriented_to, - :sA => "S\ta\t*", :sB => "S\tb\t*", - :collection => :edges}], - [:gfa2, {:lines => ["E\t*\ta+\tb-\t0\t100\t900\t1000$\t*"], - :m1 => :sid1, :m2 => :sid2, - :sA => "S\ta\t1000\t*", :sB => "S\tb\t1000\t*", - :collection => :edges}], - [:gfa2, {:lines => ["G\t*\ta+\tb-\t1000\t100"], - :m1 => :sid1, :m2 => :sid2, - :sA => "S\ta\t1000\t*", :sB => "S\tb\t1000\t*", - :collection => :gaps}] - ] - data.each do |v,values| - values[:lines].each do |linestr| - g = RGFA.new(version: v) - g << (line = linestr.to_rgfa_line) - assert_equal([:a, :b], g.segments.map(&:name)) - g.segments.each {|s| assert(s.virtual?)} - g << (sA = values[:sA].to_rgfa_line) - assert_equal([:a, :b].sort, g.segments.map(&:name).sort) - assert(!g.segment(:a).virtual?) - assert(g.segment(:b).virtual?) - assert_equal(sA, line.send(values[:m1]).line) - assert_equal(sA, g.segment(:a)) - assert_equal([line], sA.send(values[:collection])) - g << (sB = values[:sB].to_rgfa_line) - assert_equal([:a, :b].sort, g.segments.map(&:name).sort) - assert(!g.segment(:b).virtual?) - assert_equal(sB, line.send(values[:m2]).line) - assert_equal(sB, g.segment(:b)) - assert_equal([line], sB.send(values[:collection])) - end - end - end - - def test_fragments_create_virtual_segments - g = RGFA.new(version: :gfa2) - g << (fr = "F\ta\tread10-\t0\t10\t990\t1000$\t*".to_rgfa_line) - assert_equal([:a], g.segments.map(&:name)) - assert(g.segment(:a).virtual?) - g << (sA = "S\ta\t1000\t*".to_rgfa_line) - assert_equal([:a], g.segments.map(&:name)) - assert(!g.segment(:a).virtual?) - assert_equal(sA, fr.sid) - assert_equal(sA, g.segment(:a)) - assert_equal([fr], sA.fragments) - end - - def test_paths_create_virtual_links - g = RGFA.new(version: :gfa1) - path = "P\tp1\tb+,ccc-,e+\t10M1I2M,15M".to_rgfa_line - g << path - path.segment_names.each {|i| assert(i.line.virtual?)} - assert_equal([:b, :ccc, :e], g.segments.map(&:name)) - g << (sB = "S\tb\t*".to_rgfa_line) - assert(!path.segment_names[0].line.virtual?) - assert_equal(sB, path.segment_names[0].line) - assert_equal([path], sB.paths) - path.links.each {|i| assert(i.line.virtual?)} - g << (l = "L\tccc\t+\tb\t-\t2M1D10M".to_rgfa_line) - assert(!path.links[0].line.virtual?) - assert_equal(l, path.links[0].line) - assert_equal([path], l.paths) - g << (l = "L\tccc\t-\te\t+\t15M".to_rgfa_line) - assert(!path.links[1].line.virtual?) - assert_equal(l, path.links[1].line) - assert_equal([path], l.paths) - end - - def test_ordered_groups_create_virtual_unknown_records - g = RGFA.new(version: :gfa2) - path = "O\tp1\tchildpath- b+ c- edge-".to_rgfa_line - g << path - path.items.each do |i| - assert(i.line.virtual?) - assert_equal(nil, i.line.record_type) - end - g << (childpath = "O\tchildpath\tf+ a+".to_rgfa_line) - assert(!path.items[0].line.virtual?) - assert_equal(childpath, path.items[0].line) - assert_equal([path], childpath.paths) - g << (sB = "S\tb\t1000\t*".to_rgfa_line) - assert(!path.items[1].line.virtual?) - assert_equal(sB, path.items[1].line) - assert_equal([path], sB.paths) - g << (edge = "E\tedge\te-\tc+\t0\t100\t900\t1000$\t*".to_rgfa_line) - assert(!path.items[-1].line.virtual?) - assert_equal(edge, path.items[-1].line) - assert_equal([path], edge.paths) - end - - def test_unordered_groups_create_virtual_unknown_records - g = RGFA.new(version: :gfa2) - set = "U\tset\tchildpath b childset edge".to_rgfa_line - g << set - set.items.each do |i| - assert(i.virtual?) - assert_equal(nil, i.record_type) - end - g << (childpath = "O\tchildpath\tf+ a+".to_rgfa_line) - assert(!set.items[0].virtual?) - assert_equal(childpath, set.items[0]) - assert_equal([set], childpath.sets) - g << (sB = "S\tb\t1000\t*".to_rgfa_line) - assert(!set.items[1].virtual?) - assert_equal(sB, set.items[1]) - assert_equal([set], sB.sets) - g << (childset = "U\tchildset\tg edge2".to_rgfa_line) - assert(!set.items[2].virtual?) - assert_equal(childset, set.items[2]) - assert_equal([set], childset.sets) - g << (edge = "E\tedge\te-\tc+\t0\t100\t900\t1000$\t*".to_rgfa_line) - assert(!set.items[3].virtual?) - assert_equal(edge, set.items[3]) - assert_equal([set], edge.sets) - end - -end diff --git a/test/test_api_rename_lines.rb b/test/test_api_rename_lines.rb deleted file mode 100644 index 81f5392..0000000 --- a/test/test_api_rename_lines.rb +++ /dev/null @@ -1,26 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::RenameLines < Test::Unit::TestCase - - def test_rename - gfa = ["S\t0\t*", "S\t1\t*", "S\t2\t*", "L\t0\t+\t2\t-\t12M", - "C\t1\t+\t0\t+\t12\t12M", "P\t4\t2+,0-\t12M"].to_rgfa - gfa.segment("0").name = "X" - assert_raises(RGFA::NotFoundError){gfa.segment!("0")} - assert_equal([:"X", :"1", :"2"].sort, gfa.segment_names.sort) - assert_equal("L\tX\t+\t2\t-\t12M", gfa.dovetails[0].to_s) - assert_equal("C\t1\t+\tX\t+\t12\t12M", gfa.containments[0].to_s) - assert_equal("P\t4\t2+,X-\t12M", gfa.paths[0].to_s) - assert_raises(RGFA::NotFoundError){gfa.segment!("0").dovetails_of_end(:R)} - assert_equal("L\tX\t+\t2\t-\t12M", gfa.segment("X").dovetails_of_end(:R)[0].to_s) - assert_equal("C\t1\t+\tX\t+\t12\t12M", - gfa.segment!("1").edges_to_contained[0].to_s) - assert_raises(RGFA::NotFoundError){gfa.segment!("0").containers} - assert_equal("C\t1\t+\tX\t+\t12\t12M", - gfa.segment!("X").edges_to_containers[0].to_s) - assert_equal("P\t4\t2+,X-\t12M", gfa.segment!("X").paths[0].to_s) - end - -end diff --git a/test/test_api_rgfa_basics.rb b/test/test_api_rgfa_basics.rb deleted file mode 100644 index 491fe9c..0000000 --- a/test/test_api_rgfa_basics.rb +++ /dev/null @@ -1,114 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -require "tempfile" - -TestAPI ||= Module.new -TestAPI::RGFA ||= Module.new - -class TestAPI::RGFA::Basics < Test::Unit::TestCase - - def test_initialize - assert_nothing_raised { RGFA.new } - gfa = RGFA.new - assert_equal(RGFA, gfa.class) - end - - def test_version_empty - gfa = RGFA.new - assert_equal(nil, gfa.version) - gfa = RGFA.new(version: :gfa1) - assert_equal(:gfa1, gfa.version) - gfa = RGFA.new(version: :gfa2) - assert_equal(:gfa2, gfa.version) - assert_raises(RGFA::VersionError) { RGFA.new(version: :"0.0") } - end - - def test_segment_names - gfa = RGFA.new - assert_equal([], gfa.segment_names) - gfa << "S\t1\t*" - gfa << "S\t2\t*" - assert_equal([:"1", :"2"], gfa.segment_names) - gfa.segment("1").disconnect - assert_equal([:"2"], gfa.segment_names) - end - - def test_path_names - gfa = RGFA.new(version: :gfa1) - assert_equal([], gfa.path_names) - gfa << "P\t3\t1+,4-\t*" - assert_equal([:"3"], gfa.path_names) - gfa.paths[0].disconnect - assert_equal([], gfa.path_names) - end - - def test_validate - gfa = RGFA.new(version: :gfa1) - gfa << "S\t1\t*" - assert_nothing_raised { gfa.validate } - gfa << "L\t1\t+\t2\t-\t*" - assert_raise(RGFA::NotFoundError) { gfa.validate } - gfa << "S\t2\t*" - assert_nothing_raised { gfa.validate } - gfa << "P\t3\t1+,4-\t*" - assert_raise(RGFA::NotFoundError) { gfa.validate } - gfa << "S\t4\t*" - assert_raise(RGFA::NotFoundError) { gfa.validate } - gfa << "L\t4\t+\t1\t-\t*" - assert_nothing_raised { gfa.validate } - end - - def test_to_s - lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*", - "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"] - gfa = RGFA.new - lines.each {|l| gfa << l} - assert_equal(lines.join("\n")+"\n", gfa.to_s) - end - - def test_to_rgfa - gfa = RGFA.new - gfa2 = gfa.to_rgfa - assert(gfa2) - assert_equal(RGFA, gfa2.class) - end - - def test_from_file - filename = "testdata/example1.gfa" - gfa = RGFA.from_file(filename) - assert(gfa) - assert_equal(IO.read(filename), gfa.to_s) - end - - def test_to_file - filename = "testdata/example1.gfa" - gfa = RGFA.from_file(filename) - tmp = Tempfile.new("example1") - gfa.to_file(tmp.path) - tmp.rewind - assert_equal(IO.read(filename), IO.read(tmp)) - end - - def test_string_to_rgfa - lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*", - "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"] - gfa1 = RGFA.new - lines.each {|l| gfa1 << l} - gfa2 = lines.join("\n").to_rgfa - assert(gfa2) - assert_equal(RGFA, gfa2.class) - assert_equal(gfa1.to_s, gfa2.to_s) - end - - def test_array_to_rgfa - lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*", - "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"] - gfa1 = RGFA.new - lines.each {|l| gfa1 << l} - gfa2 = lines.to_rgfa - assert(gfa2) - assert_equal(RGFA, gfa2.class) - assert_equal(gfa1.to_s, gfa2.to_s) - end - -end diff --git a/test/test_api_tags.rb b/test/test_api_tags.rb deleted file mode 100644 index e7079bf..0000000 --- a/test/test_api_tags.rb +++ /dev/null @@ -1,445 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Tags < Test::Unit::TestCase - - def test_predefined_tags - # correct type: - assert_nothing_raised do - RGFA::Line::Header.new(["VN:Z:1"], vlevel: 3) - end - # custom tags with the same letters as predefined tags but lower case - assert_nothing_raised do - RGFA::Line::Header.new(["vn:i:1"], vlevel: 3) - end - # wrong type - assert_nothing_raised do - RGFA::Line::Header.new(["VN:i:1"], vlevel: 0) - end - [1,2,3].each do |level| - assert_raise(RGFA::TypeError) do - RGFA::Line::Header.new(["VN:i:1"], vlevel: level) - end - end - end - - def test_custom_tags - [:gfa1, :gfa2].each do |version| - # upper case - assert_nothing_raised do - RGFA::Line::Header.new(["ZZ:Z:1"], version: version, vlevel: 0) - end - assert_nothing_raised do - "H\tZZ:Z:1".to_rgfa_line(version: version, vlevel: 0) - end - assert_nothing_raised do - "H\tZZ:Z:1".to_rgfa(version: version, vlevel: 0) - end - [1,2,3].each do |level| - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["ZZ:Z:1"], version: version, vlevel: level) - end - assert_raise(RGFA::FormatError) do - "H\tZZ:Z:1".to_rgfa_line(version: version, vlevel: level) - end - assert_raise(RGFA::FormatError) do - "H\tZZ:Z:1".to_rgfa(version: version, vlevel: level) - end - end - # lower case - [0,1,2,3].each do |level| - assert_nothing_raised do - RGFA::Line::Header.new(["zz:Z:1"], version: version, vlevel: 0) - "H\tzz:Z:1".to_rgfa_line(version: version, vlevel: 0) - "H\tzz:Z:1".to_rgfa(version: version, vlevel: 0) - end - end - end - end - - def test_wrong_tag_format - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["VN i:1"]) - end - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["vna:i:1"]) - end - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["VN:ZZ:1"]) - end - # the content can include :, so four : are e.g. not an error - assert_equal("1:1:1", RGFA::Line::Header.new(["VN:Z:1:1:1"]).VN) - end - - def test_wrong_tag_data - # validation level 0 - # - some wrong data passes through - assert_nothing_raised { - RGFA::Line::Header.new(["zz:B:i,1,1,A"], vlevel: 0) } - assert_nothing_raised { - RGFA::Line::Header.new(["zz:Z:i,\t1,1,A"], vlevel: 0) } - # - some errors are catched - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["zz:i:1A"], vlevel: 0) - end - # level > 0, wrong data is catched - [1,2,3].each do |level| - assert_raise(RGFA::ValueError) do - RGFA::Line::Header.new(["zz:B:i,1,1,A"], vlevel: level) - end - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["zz:i:1A"], vlevel: level) - end - end - end - - def test_duplicate_tag - [:gfa1, :gfa2].each do |version| - assert_nothing_raised do - RGFA::Line::Header.new(["zz:i:1", "VN:Z:1", "zz:i:2"], - version: version, vlevel: 0) - end - assert_nothing_raised do - "H\tzz:i:1\tVN:Z:0\tzz:i:2".to_rgfa_line(version: version, - vlevel: 0) - end - assert_nothing_raised do - "H\tzz:i:1\tVN:Z:0\tzz:i:2".to_rgfa(version: version, - vlevel: 0) - end - [1,2,3].each do |level| - assert_raise(RGFA::NotUniqueError) do - RGFA::Line::Header.new(["zz:i:1", "VN:Z:0", "zz:i:2"], - version: version, - vlevel: level) - end - assert_raise(RGFA::NotUniqueError) do - "H\tzz:i:1\tVN:Z:0\tzz:i:2".to_rgfa_line(version: version, - vlevel: level) - end - assert_raise(RGFA::NotUniqueError) do - "H\tzz:i:1\tVN:Z:#{version}\tzz:i:2".to_rgfa(version: version, - vlevel: level) - end - end - end - end - - def test_validate_field - l = RGFA::Line::Header.new(["zz:i:1", "VN:Z:1.0"], - version: :gfa1, vlevel: 0) - l.zz = "x" - assert_raise(RGFA::FormatError) { l.validate_field(:zz) } - l.set_datatype(:zz, :Z) - assert_nothing_raised { l.validate_field(:zz) } - end - - def test_validate - # wrong tag value - l = RGFA::Line::Header.new(["zz:i:1", "VN:Z:1.0"], - version: :gfa1, vlevel: 0) - l.zz = "x" - assert_raise(RGFA::FormatError) { l.validate } - # wrong predefined tag name - l = RGFA::Line::Header.new(["zz:i:1", "VZ:Z:1.0"], - version: :gfa1, vlevel: 0) - assert_raise(RGFA::FormatError) { l.validate } - # wrong predefined tag datatype - l = RGFA::Line::Header.new(["zz:i:1", "VN:i:1"], - version: :gfa1, vlevel: 0) - assert_raise(RGFA::FormatError) { l.validate } - end - - # test tags for get/set tests: - # - KC -> predefined, set - # - RC -> predefined, not set; - # - XX -> custom, invalid (upper case) - # - xx -> custom set - # - zz -> custom not set - - def test_get_tag_content - [:gfa1, :gfa2].each do |version| - [0,1,2,3].each do |level| - l = RGFA::Line::Segment::Factory.new(["12","*","xx:f:1.3","KC:i:10"], - vlevel: level) - # tagnames - assert_equal([:xx, :KC], l.tagnames) - # test presence of tag - assert(l.KC) - assert(!l.RC) - assert_raise(NoMethodError) { l.XX } - assert(l.xx) - assert(!l.zz) - # get tag content, fieldname methods - assert_equal(10, l.KC) - assert_equal(nil, l.RC) - assert_raise(NoMethodError) { l.XX } - assert_equal(1.3, l.xx) - assert_equal(nil, l.zz) - # get tag content, get() - assert_equal(10, l.get(:KC)) - assert_equal(nil, l.get(:RC)) - assert_equal(nil, l.get(:XX)) - assert_equal(1.3, l.get(:xx)) - assert_equal(nil, l.get(:zz)) - # banged version, fieldname methods - assert_equal(10, l.KC!) - assert_raise(RGFA::NotFoundError) { l.RC! } - assert_raise(NoMethodError) { l.XX! } - assert_equal(1.3, l.xx!) - assert_raise(RGFA::NotFoundError) { l.zz! } - # banged version, get() - assert_equal(10, l.get!(:KC)) - assert_raise(RGFA::NotFoundError) { l.get!(:RC) } - assert_raise(RGFA::NotFoundError) { l.get!(:XX) } - assert_equal(1.3, l.get!(:xx)) - assert_raise(RGFA::NotFoundError) { l.get!(:zz) } - # get tag datatype - assert_equal(:i, l.get_datatype(:KC)) - assert_equal(:i, l.get_datatype(:RC)) - assert_equal(nil, l.get_datatype(:XX)) - assert_equal(:f, l.get_datatype(:xx)) - assert_equal(nil, l.get_datatype(:zz)) - # as string: content only - assert_equal("10", l.field_to_s(:KC)) - assert_raise(RGFA::NotFoundError) { l.field_to_s(:RC) } - assert_raise(RGFA::NotFoundError) { l.field_to_s(:XX) } - assert_equal("1.3", l.field_to_s(:xx)) - assert_raise(RGFA::NotFoundError) { l.field_to_s(:zz) } - # as string: complete - assert_equal("KC:i:10", l.field_to_s(:KC, tag: true)) - assert_equal("xx:f:1.3", l.field_to_s(:xx, tag: true)) - # respond_to? normal version - assert(l.respond_to?(:KC)) - assert(l.respond_to?(:RC)) - assert(!l.respond_to?(:XX)) - assert(l.respond_to?(:xx)) - assert(l.respond_to?(:zz)) - # respond_to? banged version - assert(l.respond_to?(:KC!)) - assert(l.respond_to?(:RC!)) - assert(!l.respond_to?(:XX!)) - assert(l.respond_to?(:xx!)) - assert(l.respond_to?(:zz!)) - end - end - end - - def test_set_tag_content - [:gfa1, :gfa2].each do |version| - [0,3,4,5].each do |level| - l = RGFA::Line::Segment::Factory.new(["12","*","xx:f:13","KC:i:10"], - vlevel: level) - # set tag content, fieldname methods - assert_nothing_raised { l.KC = 12 }; assert_equal(12, l.KC) - assert_nothing_raised { l.RC = 12 }; assert_equal(12, l.RC) - assert_nothing_raised { l.xx = 1.2 }; assert_equal(1.2, l.xx) - assert_nothing_raised { l.zz = 1.2 }; assert_equal(1.2, l.zz) - # set tag content, set() - assert_nothing_raised { l.set(:KC, 14) }; assert_equal(14, l.KC) - assert_nothing_raised { l.set(:RC, 14) }; assert_equal(14, l.RC) - assert_nothing_raised { l.set(:xx, 1.4) }; assert_equal(1.4, l.xx) - assert_nothing_raised { l.set(:zz, 1.4) }; assert_equal(1.4, l.zz) - # respond to? - assert(l.respond_to?(:KC=)) - assert(l.respond_to?(:RC=)) - assert(!l.respond_to?(:XX=)) - assert(l.respond_to?(:xx=)) - assert(l.respond_to?(:zz=)) - # set datatype for predefined field - assert_raise(RGFA::RuntimeError) { l.set_datatype(:KC, :Z) } - assert_raise(RGFA::RuntimeError) { l.set_datatype(:RC, :Z) } - # set datatype for non-existing custom tag - assert_nothing_raised { l.set_datatype(:zz, :i) } - if level == 0 - assert_nothing_raised { l.set_datatype(:XX, :Z) } - elsif level >= 1 - assert_raise(RGFA::FormatError) { l.set_datatype(:XX, :Z) } - end - # change datatype for existing custom tag - assert_nothing_raised { l.xx = 1.1 } - assert_nothing_raised { l.xx = "1.1" } - if level == 2 - assert_nothing_raised { l.xx = "1A" } - assert_raise(RGFA::FormatError) { l.to_s } - elsif level == 3 - assert_raise(RGFA::FormatError) { l.xx = "1A" } - end - assert_nothing_raised { l.set_datatype(:xx, :Z); l.xx = "1A" } - # unknown datatype - assert_raise(RGFA::ArgumentError) { l.set_datatype(:xx, :P) } - end - end - end - - def test_delete_tag - [:gfa1, :gfa2].each do |version| - [0,3,4,5].each do |level| - l = RGFA::Line::Segment::Factory.new(["12","*","xx:f:13","KC:i:10"], - vlevel: level) - # delete method - assert_nothing_raised { l.delete(:KC) } - assert_equal(nil, l.KC) - assert_equal([:xx], l.tagnames) - assert_nothing_raised { l.delete(:RC) } - assert_nothing_raised { l.delete(:XX) } - assert_nothing_raised { l.delete(:xx) } - assert_equal([], l.tagnames) - assert_nothing_raised { l.delete(:zz) } - l = RGFA::Line::Segment::Factory.new(["12","*","xx:f:13","KC:i:10"], - vlevel: level) - # set to nil - assert_nothing_raised { l.set(:KC,nil) } - assert_equal(nil, l.KC) - assert_equal([:xx], l.tagnames) - assert_nothing_raised { l.set(:RC,nil) } - if level == 0 - assert_nothing_raised { l.set(:XX,nil) } - else - assert_raises(RGFA::FormatError) { l.set(:XX,nil) } - end - assert_nothing_raised { l.set(:xx,nil) } - assert_equal([], l.tagnames) - assert_nothing_raised { l.set(:zz,nil) } - end - end - end - - def test_datatype_to_ruby_objects - l = RGFA::Line::Header.new(["a1:A:1", "z1:Z:hallo", - "b1:B:c,12,12", "b2:B:f,1E-2,3.0,3", - "h1:H:00A1", - "j1:J:[12,\"a\"]", "j2:J:{\"a\":1,\"b\":[2,3]}", - "f1:f:-1.23E-04", "i1:i:-123"]) - assert_equal(String, l.a1.class) - assert_equal(String, l.z1.class) - assert_equal(RGFA::NumericArray, l.b1.class) - assert_equal(RGFA::NumericArray, l.b2.class) - assert_equal(RGFA::ByteArray, l.h1.class) - assert_equal(Array, l.j1.class) - assert_equal(Hash, l.j2.class) - assert_equal(Fixnum, l.i1.class) - assert_equal(Float, l.f1.class) - end - - def test_ruby_object_to_datatype - l = RGFA::Line::Header.new([]) - # String - assert_nothing_raised { l.zz="1" } - assert_equal("1", l.zz) - assert_equal(:"Z", l.get_datatype(:zz)) - assert_equal("1", l.field_to_s(:zz)) - assert_equal("1", l.to_s.to_rgfa_line.zz) - # Integer - assert_nothing_raised { l.ii=1 } - assert_equal(1, l.ii) - assert_equal(:"i", l.get_datatype(:ii)) - assert_equal("1", l.field_to_s(:ii)) - assert_equal(1, l.to_s.to_rgfa_line.ii) - # Float - assert_nothing_raised { l.ff=1.0 } - assert_equal(1.0, l.ff) - assert_equal(:"f", l.get_datatype(:ff)) - assert_equal("1.0", l.field_to_s(:ff)) - assert_equal(1.0, l.to_s.to_rgfa_line.ff) - # Array: all floats - assert_nothing_raised { l.af=[1.0,1.0] } - assert_equal([1.0,1.0], l.af) - assert_equal(:"B", l.get_datatype(:af)) - assert_equal("f,1.0,1.0", l.field_to_s(:af)) - assert_equal([1.0,1.0].to_byte_array, l.to_s.to_rgfa_line.af) - # Array: all integers - assert_nothing_raised { l.ai=[1,1] } - assert_equal([1,1], l.ai) - assert_equal(:"B", l.get_datatype(:ai)) - assert_equal("C,1,1", l.field_to_s(:ai)) - assert_equal([1,1].to_byte_array, l.to_s.to_rgfa_line.ai) - # Array: anything else - assert_nothing_raised { l.aa=[1,1.0,:X] } - assert_equal([1,1.0,:X], l.aa) - assert_equal(:"J", l.get_datatype(:aa)) - assert_equal('[1,1.0,"X"]', l.field_to_s(:aa)) - assert_equal([1,1.0,"X"], l.to_s.to_rgfa_line.aa) - # Hash - assert_nothing_raised { l.hh={:a => 1.0, :b => 1} } - assert_equal({:a=>1.0,:b=>1}, l.hh) - assert_equal(:"J", l.get_datatype(:hh)) - assert_equal('{"a":1.0,"b":1}', l.field_to_s(:hh)) - assert_equal({"a"=>1.0,"b"=>1}, l.to_s.to_rgfa_line.hh) - # RGFA::ByteArray - assert_nothing_raised { l.ba=[0,255].to_byte_array } - assert_equal([0,255].to_byte_array, l.ba) - assert_equal(:H, l.get_datatype(:ba)) - assert_equal('00FF', l.field_to_s(:ba)) - assert_equal([0,255].to_byte_array, l.to_s.to_rgfa_line.ba) - end - - def test_byte_arrays - # creation: new, from array, from string - a,b,c=nil - assert_nothing_raised { a = RGFA::ByteArray.new([1,2,3,4,5]) } - assert_nothing_raised { b = [1,2,3,4,5].to_byte_array } - assert_equal(a, b) - assert_nothing_raised { c = "12ACF4AA601C1F".to_byte_array } - assert_equal([18, 172, 244, 170, 96, 28, 31].to_byte_array, c) - # validation - assert_nothing_raised { a.validate } - assert_nothing_raised { a = RGFA::ByteArray.new([1,2,3,4,356]) } - assert_raises(RGFA::ValueError) { a.validate } - assert_raises(RGFA::FormatError) { a = "12ACF4AA601C1".to_byte_array } - assert_raises(RGFA::FormatError) { a = "".to_byte_array } - assert_raises(RGFA::FormatError) { a = "12ACG4AA601C1F".to_byte_array } - # to string - a = [18, 172, 244, 170, 96, 28, 31].to_byte_array - assert_equal("12ACF4AA601C1F", a.to_s) - a[2] = 280 - assert_raises(RGFA::ValueError) { a.to_s } - end - - def test_numeric_arrays - # creation: new, from array, from string - a,b,c=nil - assert_nothing_raised { a = RGFA::NumericArray.new([1,2,3,4,5]) } - assert_nothing_raised { b = [1,2,3,4,5].to_numeric_array } - assert_equal(a, b) - assert_nothing_raised { c = "i,1,2,3,4,5".to_numeric_array } - assert_equal([1, 2, 3, 4, 5].to_numeric_array, c) - # validation - assert_nothing_raised { a.validate } - assert_nothing_raised { RGFA::NumericArray.new([1,2,3,4,356]).validate } - assert_raises(RGFA::ValueError) { - RGFA::NumericArray.new([1,2.0,3,4,356]).validate } - assert_raises(RGFA::ValueError) { - RGFA::NumericArray.new([1.0,2.0,3,4,356]).validate } - assert_raises(RGFA::ValueError) { - RGFA::NumericArray.new([1,:x,3,4,356]).validate } - assert_raises(RGFA::ValueError) { a = "i,1,X,2".to_numeric_array } - assert_raises(RGFA::FormatError) { a = "".to_numeric_array } - assert_raises(RGFA::FormatError) { a = "i,1,2,".to_numeric_array } - assert_raises(RGFA::TypeError) { a = "x,1,2".to_numeric_array } - # to string - a = [18, 72, 244, 70, 96, 38, 31].to_numeric_array - assert_equal("C", a.compute_subtype) - assert_equal("C,18,72,244,70,96,38,31", a.to_s) - a[2] = -2 - assert_equal("c", a.compute_subtype) - assert_equal("c,18,72,-2,70,96,38,31", a.to_s) - a[2] = 280 - assert_equal("S", a.compute_subtype) - assert_equal("S,18,72,280,70,96,38,31", a.to_s) - a[2] = -280 - assert_equal("s", a.compute_subtype) - assert_equal("s,18,72,-280,70,96,38,31", a.to_s) - a[2] = 280000 - assert_equal("I", a.compute_subtype) - assert_equal("I,18,72,280000,70,96,38,31", a.to_s) - a[2] = -280000 - assert_equal("i", a.compute_subtype) - assert_equal("i,18,72,-280000,70,96,38,31", a.to_s) - a.map! {|x|x.to_f} - assert_equal("f", a.compute_subtype) - assert_equal("f,18.0,72.0,-280000.0,70.0,96.0,38.0,31.0", a.to_s) - end - -end diff --git a/test/test_api_version.rb b/test/test_api_version.rb deleted file mode 100644 index 5d23489..0000000 --- a/test/test_api_version.rb +++ /dev/null @@ -1,277 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::Version < Test::Unit::TestCase - - def test_init_without_version_by_init - gfa = RGFA.new() - assert_equal(nil, gfa.version) - end - - def test_init_GFA1 - gfa = RGFA.new(version: :gfa1) - assert_equal(:gfa1, gfa.version) - end - - def test_init_GFA2 - gfa = RGFA.new(version: :gfa2) - assert_equal(:gfa2, gfa.version) - end - - def test_init_invalid_version - assert_raises(RGFA::VersionError) { RGFA.new(version: :"x.x") } - end - - def test_GFA1_header - hother = "H\taa:A:a\tff:f:1.1" - hv1 = "H\tzz:Z:test\tVN:Z:1.0\tii:i:11" - gfa = RGFA.new() - gfa << hother - assert_equal(nil, gfa.version) - gfa << hv1 - assert_equal(:gfa1, gfa.version) - end - - def test_GFA2_header - hother = "H\taa:A:a\tff:f:1.1" - hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" - gfa = RGFA.new() - gfa << hother - assert_equal(nil, gfa.version) - gfa << hv2 - assert_equal(:gfa2, gfa.version) - end - - def test_unknown_version_in_header - hother = "H\taa:A:a\tff:f:1.1" - hvx = "H\tzz:Z:test\tVN:Z:x.x\tii:i:11" - gfa = RGFA.new() - gfa << hother - assert_equal(nil, gfa.version) - assert_raises(RGFA::VersionError) { gfa << hvx } - end - - def test_wrong_version_in_header - hother = "H\taa:A:a\tff:f:1.1" - hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" - gfa = RGFA.new(version: :gfa1) - gfa << hother - assert_equal(:gfa1, gfa.version) - assert_raises(RGFA::VersionError) { gfa << hv2 } - end - - def test_conflicting_versions_in_header - hother = "H\taa:A:a\tff:f:1.1" - hv1 = "H\tzz:Z:test\tVN:Z:1.0\tii:i:11" - hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" - gfa = RGFA.new() - gfa << hother - gfa << hv1 - assert_raises(RGFA::VersionError) { gfa << hv2 } - end - - def test_version_by_segment_GFA1_syntax - sv1 = "S\tA\t*" - gfa = RGFA.new() - gfa << sv1 - assert_equal(:gfa1, gfa.version) - end - - def test_version_by_segment_GFA2_syntax - sv2 = "S\tB\t100\t*" - gfa = RGFA.new() - gfa << sv2 - assert_equal(:gfa2, gfa.version) - end - - def test_GFA2_segment_in_GFA1 - sv1 = "S\tA\t*" - sv2 = "S\tB\t100\t*" - gfa = RGFA.new() - gfa << sv1 - assert_raises(RGFA::VersionError) { gfa << sv2 } - end - - def test_GFA1_segment_in_GFA2 - sv1 = "S\tA\t*" - sv2 = "S\tB\t100\t*" - gfa = RGFA.new() - gfa << sv2 - assert_raises(RGFA::VersionError) { gfa << sv1 } - end - - def test_version_by_GFA2_specific_line_E - e = "E\t*\tA+\tB+\t0\t10\t20\t30\t*" - gfa = RGFA.new() - gfa << e - assert_equal(:gfa2, gfa.version) - end - - def test_version_by_GFA2_specific_line_G - g = "G\t*\tA+\tB-\t1000\t*" - gfa = RGFA.new() - gfa << g - assert_equal(:gfa2, gfa.version) - end - - def test_version_by_GFA2_specific_line_F - f = "F\tX\tID+\t10\t100\t0\t90$\t*" - gfa = RGFA.new() - gfa << f - assert_equal(:gfa2, gfa.version) - end - - def test_version_by_GFA2_specific_line_O - o = "O\tX\tA+ B- C+" - gfa = RGFA.new() - gfa << o - assert_equal(:gfa2, gfa.version) - end - - def test_version_by_GFA2_specific_line_U - u = "U\tX\tA B C" - gfa = RGFA.new() - gfa << u - assert_equal(:gfa2, gfa.version) - end - - def test_version_guess_GFA1_specific_line_L - str = "L\tA\t-\tB\t+\t*" - gfa = RGFA.new() - gfa << str - gfa.process_line_queue - assert_equal(:gfa1, gfa.version) - end - - def test_version_guess_GFA1_specific_line_C - str = "C\tA\t+\tB\t-\t10\t*" - gfa = RGFA.new() - gfa << str - gfa.process_line_queue - assert_equal(:gfa1, gfa.version) - end - - def test_version_guess_GFA1_specific_line_P - str = "P\t1\ta-,b+\t*" - gfa = RGFA.new() - gfa << str - gfa.process_line_queue - assert_equal(:gfa1, gfa.version) - end - - def test_version_guess_default - gfa = RGFA.new() - gfa.process_line_queue - assert_equal(:gfa2, gfa.version) - end - - def test_header_version - assert_equal(:generic, "H\tVN:Z:1.0".to_rgfa_line.version) - assert_equal(:gfa1, "H\tVN:Z:1.0".to_rgfa_line(version: :gfa1).version) - assert_equal(:gfa2, "H\tVN:Z:1.0".to_rgfa_line(version: :gfa2).version) - end - - def test_comment_version - assert_equal(:generic, "# VN:Z:1.0".to_rgfa_line.version) - assert_equal(:gfa1, "# VN:Z:1.0".to_rgfa_line(version: :gfa1).version) - assert_equal(:gfa2, "# VN:Z:1.0".to_rgfa_line(version: :gfa2).version) - end - - def test_segment_version - assert_equal(:gfa1, "S\tA\tNNNN".to_rgfa_line.version) - assert_equal(:gfa2, "S\tA\t1\tNNNN".to_rgfa_line.version) - assert_equal(:gfa1, "S\tA\tNNNN".to_rgfa_line(version: :gfa1).version) - assert_equal(:gfa2, "S\tA\t1\tNNNN".to_rgfa_line(version: :gfa2).version) - assert_raises(RGFA::FormatError){ - "S\tA\t1\tNNNN".to_rgfa_line(version: :gfa1)} - assert_raises(RGFA::FormatError){ - "S\tA\tNNNN".to_rgfa_line(version: :gfa2)} - end - - def test_link_version - str = "L\tA\t+\tB\t-\t*" - assert_equal(:gfa1, str.to_rgfa_line.version) - assert_equal(:gfa1, str.to_rgfa_line(version: :gfa1).version) - assert_raises(RGFA::VersionError){str.to_rgfa_line(version: :gfa2)} - assert_raises(RGFA::VersionError){ - RGFA::Line::Edge::Link.new(["A","+","B","-","*"], version: :gfa2)} - end - - def test_containment_version - str = "C\tA\t+\tB\t-\t10\t*" - assert_equal(:gfa1, str.to_rgfa_line.version) - assert_equal(:gfa1, str.to_rgfa_line(version: :gfa1).version) - assert_raises(RGFA::VersionError){str.to_rgfa_line(version: :gfa2)} - assert_raises(RGFA::VersionError){ - RGFA::Line::Edge::Containment.new(["A","+","B","-","10","*"], - version: :gfa2)} - end - - def test_edge_version - assert_equal(:gfa2, "E\t*\tA-\tB+\t0\t100\t0\t100\t*".to_rgfa_line.version) - assert_equal(:gfa2, "E\t*\tA-\tB+\t0\t100\t0\t100\t*".to_rgfa_line(version: - :gfa2).version) - assert_raises(RGFA::VersionError){ - "E\t*\tA-\tB+\t0\t100\t0\t100\t*".to_rgfa_line(version: :gfa1)} - assert_raises(RGFA::VersionError){ - RGFA::Line::Edge::GFA2.new(["A-","B+", "0", "100", "0", "100", "*"], - version: :gfa1)} - end - - def test_gap_version - assert_equal(:gfa2, "G\t*\tA-\tB+\t100\t*".to_rgfa_line.version) - assert_equal(:gfa2, "G\t*\tA-\tB+\t100\t*".to_rgfa_line(version: - :gfa2).version) - assert_raises(RGFA::VersionError){ - "G\t*\tA-\tB+\t100\t*".to_rgfa_line(version: :gfa1)} - assert_raises(RGFA::VersionError){ - RGFA::Line::Gap.new(["A-","B+", "100", "*"], version: :gfa1)} - end - - def test_fragment_version - assert_equal(:gfa2, "F\tA\tread1-\t0\t100\t0\t100\t*".to_rgfa_line.version) - assert_equal(:gfa2, "F\tA\tread1-\t0\t100\t0\t100\t*".to_rgfa_line(version: - :gfa2).version) - assert_raises(RGFA::VersionError){ - "F\tA\tread1-\t0\t100\t0\t100\t*".to_rgfa_line(version: :gfa1)} - assert_raises(RGFA::VersionError){ - RGFA::Line::Fragment.new(["A","read-", "0", "100", "0", "100", "*"], - version: :gfa1)} - end - - def test_custom_record_version - assert_equal(:gfa2, "X\tVN:Z:1.0".to_rgfa_line.version) - assert_equal(:gfa2, "X\tVN:Z:1.0".to_rgfa_line(version: :gfa2).version) - assert_raises(RGFA::VersionError){ - "X\tVN:Z:1.0".to_rgfa_line(version: :gfa1)} - assert_raises(RGFA::VersionError){ - RGFA::Line::CustomRecord.new(["X","VN:Z:1.0"], version: :gfa1)} - end - - def test_path_version - str = "P\t1\tA+,B-\t*" - assert_equal(:gfa1, str.to_rgfa_line.version) - assert_equal(:gfa1, str.to_rgfa_line(version: :gfa1).version) - assert_raises(RGFA::VersionError){str.to_rgfa_line(version: :gfa2)} - str = "O\t1\tA+ B-" - assert_equal(:gfa2, str.to_rgfa_line.version) - assert_equal(:gfa2, str.to_rgfa_line(version: :gfa2).version) - assert_raises(RGFA::VersionError){str.to_rgfa_line(version: :gfa1)} - end - - def test_set_version - str = "U\t1\tA B C" - assert_equal(:gfa2, str.to_rgfa_line.version) - assert_equal(:gfa2, str.to_rgfa_line(version: :gfa2).version) - assert_raises(RGFA::VersionError){str.to_rgfa_line(version: :gfa1)} - end - - def test_unknown_record_version - assert_equal(:gfa2, RGFA::Line::Unknown.new(["A"]).version) - assert_equal(:gfa2, RGFA::Line::Unknown.new(["A"], version: :gfa2).version) - assert_raises(RGFA::VersionError){ - RGFA::Line::Unknown.new(["A"], version: :gfa1)} - end - -end diff --git a/test/test_api_version_conversion.rb b/test/test_api_version_conversion.rb deleted file mode 100644 index d57ed37..0000000 --- a/test/test_api_version_conversion.rb +++ /dev/null @@ -1,216 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestAPI ||= Module.new - -class TestAPI::VersionConversion < Test::Unit::TestCase - - def test_header_conversion - gfa1str = "H\tVN:Z:1.0" - gfa2str = "H\tVN:Z:2.0" - assert_equal(gfa1str, gfa2str.to_rgfa_line.to_gfa1.to_s) - assert_equal(gfa1str, gfa2str.to_rgfa_line.to_gfa1.to_s) - assert_equal(gfa2str, gfa1str.to_rgfa_line.to_gfa2.to_s) - assert_equal(gfa2str, gfa2str.to_rgfa_line.to_gfa2.to_s) - end - - def test_comment_conversion - assert_equal("# comment", - "# comment".to_rgfa_line(version: :gfa1).to_gfa1.to_s) - assert_equal("# comment", - "# comment".to_rgfa_line(version: :gfa2).to_gfa1.to_s) - assert_equal("# comment", - "# comment".to_rgfa_line(version: :gfa1).to_gfa2.to_s) - assert_equal("# comment", - "# comment".to_rgfa_line(version: :gfa2).to_gfa2.to_s) - end - - def test_segment_conversion - assert_equal("S\tA\tNNNN", "S\tA\tNNNN".to_rgfa_line.to_gfa1.to_s) - assert_equal("S\tA\t4\tNNNN", "S\tA\tNNNN".to_rgfa_line.to_gfa2.to_s) - assert_equal("S\tA\tNNNN\tLN:i:4", - "S\tA\t4\tNNNN".to_rgfa_line.to_gfa1.to_s) - assert_equal("S\tA\t4\tNNNN", "S\tA\t4\tNNNN".to_rgfa_line.to_gfa2.to_s) - # wrong sequence alphabet for GFA2->GFA1 - assert_equal("S\tA\t4\t[[]]", "S\tA\t4\t[[]]".to_rgfa_line.to_gfa2.to_s) - assert_raise(RGFA::FormatError){"S\tA\t4\t[[]]".to_rgfa_line.to_gfa1} - # wrong identifier for GFA2->GFA1 - assert_equal("S\tA+,\t3\tNNN", "S\tA+,\t3\tNNN".to_rgfa_line.to_gfa2.to_s) - assert_raise(RGFA::FormatError){"S\tA+,\t3\tNNN".to_rgfa_line.to_gfa1} - # sequence not available but LN for GFA1->GFA2 - assert_equal("S\tA\t4\t*", "S\tA\t*\tLN:i:4".to_rgfa_line.to_gfa2.to_s) - # both sequence and LN not available for GFA1->GFA2 - assert_raise(RGFA::NotFoundError){"S\tA\t*".to_rgfa_line.to_gfa2} - end - - def test_link_conversion - gfa1str = "L\tA\t+\tB\t-\t100M" - gfa1str_noov = "L\tA\t+\tB\t+\t*" - gfa2str = "E\t*\tA+\tB-\t100\t200$\t100\t200$\t100M" - # not connected - assert_raise(RGFA::RuntimeError) {gfa1str.to_rgfa_line.to_gfa2} - # connected - g = RGFA.new - g << "S\tA\t*\tLN:i:200" - g << "S\tB\t*\tLN:i:200" - g << (gfa1line = gfa1str.to_rgfa_line) - g << (gfa1line_noov = gfa1str_noov.to_rgfa_line) - assert_equal(gfa2str, gfa1line.to_gfa2.to_s) - assert_equal(gfa1str, gfa1line.to_gfa1.to_s) - # placeholder overlap - assert_raise(RGFA::ValueError) {gfa1line_noov.to_rgfa_line.to_gfa2} - # TODO check if the alignment is compatible with the segment length - end - - def test_containment_conversion - gfa1str = "C\tA\t+\tB\t-\t20\t100M" - gfa1str_noov = "C\tA\t+\tB\t+\t20\t*" - gfa2str = "E\t*\tA+\tB-\t20\t120\t0\t100$\t100M" - # not connected - assert_raise(RGFA::RuntimeError) {gfa1str.to_rgfa_line.to_gfa2} - # connected - g = RGFA.new - g << "S\tA\t*\tLN:i:200" - g << "S\tB\t*\tLN:i:100" - g << (gfa1line = gfa1str.to_rgfa_line) - g << (gfa1line_noov = gfa1str_noov.to_rgfa_line) - assert_equal(gfa2str, gfa1line.to_gfa2.to_s) - assert_equal(gfa1str, gfa1line.to_gfa1.to_s) - # placeholder overlap - assert_raise(RGFA::ValueError) {gfa1line_noov.to_rgfa_line.to_gfa2} - # TODO check if the alignment is compatible with the segment length - end - - def test_edge_conversion - dovetail = "E\t*\tA+\tB-\t100\t200$\t100\t200$\t100M" - dovetail_gfa1 = "L\tA\t+\tB\t-\t100M" - containment = "E\t*\tA+\tB-\t20\t120\t0\t100$\t100M" - containment_gfa1 = "C\tA\t+\tB\t-\t20\t100M" - internal = "E\t*\tA+\tB-\t20\t110\t10\t100$\t90M" - assert_equal(dovetail_gfa1, dovetail.to_rgfa_line.to_gfa1.to_s) - assert_equal(containment_gfa1, containment.to_rgfa_line.to_gfa1.to_s) - assert_raise(RGFA::ValueError){internal.to_rgfa_line.to_gfa1} - end - - def test_L_to_E - g = RGFA.new(version: :gfa1) - g << "S\t1\t*\tLN:i:100" - g << "S\t2\t*\tLN:i:100" - g << "S\t3\t*\tLN:i:100" - g << "S\t4\t*\tLN:i:100" - g << "L\t1\t+\t2\t+\t10M" - g << "L\t1\t-\t2\t-\t20M" - g << "L\t3\t-\t4\t+\t30M" - g << "L\t3\t+\t4\t-\t40M" - assert_equal("E * 1+ 2+ 90 100$ 0 10 10M", - g.dovetails[0].to_gfa2_s) - assert_equal("E * 1- 2- 0 20 80 100$ 20M", - g.dovetails[1].to_gfa2_s) - assert_equal("E * 3- 4+ 0 30 0 30 30M", - g.dovetails[2].to_gfa2_s) - assert_equal("E * 3+ 4- 60 100$ 60 100$ 40M", - g.dovetails[3].to_gfa2_s) - assert_equal(RGFA::Line::Edge::Link, g.dovetails[0].to_gfa1.class) - assert_equal(RGFA::Line::Edge::GFA2, g.dovetails[0].to_gfa2.class) - end - - def test_E_to_L - e1 = "E\t*\t1+\t2+\t90\t100$\t0\t10\t10M".to_rgfa_line - l1 = "L\t1\t+\t2\t+\t10M" - assert_equal(l1, e1.to_gfa1_s) - e2 = "E\t*\t1+\t2+\t0\t20\t80\t100$\t20M".to_rgfa_line - l2 = "L\t2\t+\t1\t+\t20M" - assert_equal(l2, e2.to_gfa1_s) - e3 = "E\t*\t3-\t4+\t0\t30\t0\t30\t30M".to_rgfa_line - l3 = "L\t3\t-\t4\t+\t30M" - assert_equal(l3, e3.to_gfa1_s) - e4 = "E\t*\t3+\t4-\t60\t100$\t60\t100$\t40M".to_rgfa_line - l4 = "L\t3\t+\t4\t-\t40M" - assert_equal(l4, e4.to_gfa1_s) - end - - def test_path_conversion - path_gfa1 = "P\t1\ta+,b-\t100M" - path_gfa2 = "O\t1\ta+ a_to_b+ b-" - # gfa1 => gfa2 - l1 = "L\ta\t+\tb\t-\t100M\tid:Z:a_to_b" - g1 = RGFA.new - g1 << (path_gfa1_line = path_gfa1.to_rgfa_line) - g1 << l1 - g1.process_line_queue - # not connected - assert_raise(RGFA::RuntimeError) {path_gfa1.to_rgfa_line.to_gfa2} - # connected - assert_equal(path_gfa1, path_gfa1_line.to_gfa1.to_s) - assert_equal(path_gfa2, path_gfa1_line.to_gfa2.to_s) - # gfa2 => gfa1 - e = "E\ta_to_b\ta+\tb-\t100\t200$\t100\t200$\t100M" - sA = "S\ta\t200\t*" - sB = "S\tb\t200\t*" - g2 = RGFA.new - g2 << (path_gfa2_line = path_gfa2.to_rgfa_line) - g2 << e - g2 << sA - g2 << sB - # not connected - assert_raise(RGFA::RuntimeError) {path_gfa2.to_rgfa_line.to_gfa1} - # connected - assert_equal(path_gfa1, path_gfa2_line.to_gfa1.to_s) - assert_equal(path_gfa2, path_gfa2_line.to_gfa2.to_s) - end - - def test_gap_conversion - str = "G\t*\tA-\tB+\t100\t*" - assert_equal(str, str.to_rgfa_line.to_gfa2.to_s) - assert_raises(RGFA::VersionError){str.to_rgfa_line.to_gfa1} - end - - def test_fragment_conversion - str = "F\tA\tread1-\t0\t100\t0\t100\t*" - assert_equal(str, str.to_rgfa_line.to_gfa2.to_s) - assert_raises(RGFA::VersionError){str.to_rgfa_line.to_gfa1} - end - - def test_set_conversion - str = "U\t1\tA B C" - assert_equal(str, str.to_rgfa_line.to_gfa2.to_s) - assert_raises(RGFA::VersionError){str.to_rgfa_line.to_gfa1} - end - - def test_custom_record_conversion - str = "X\tx1\tA\tC" - assert_equal(str, str.to_rgfa_line.to_gfa2.to_s) - assert_raises(RGFA::VersionError){str.to_rgfa_line.to_gfa1} - end - - def test_unknown_record_conversion - record = RGFA::Line::Unknown.new(["A"]) - assert_equal(record, record.to_gfa2) - assert_raises(RGFA::VersionError){record.to_gfa1} - end - - def test_gfa_conversion - gfa1_str =<<-END -# comment -H\tVN:Z:1.0 -S\tA\t*\tLN:i:200 -S\tB\t*\tLN:i:200 -S\tC\t*\tLN:i:100 -C\tA\t+\tC\t-\t20\t100M -L\tA\t+\tB\t-\t100M\tid:Z:a_to_b -P\t1\tA+,B-\t100M - END - gfa2_str =<<-END -# comment -H\tVN:Z:2.0 -S\tA\t200\t* -S\tB\t200\t* -S\tC\t100\t* -E\ta_to_b\tA+\tB-\t100\t200$\t100\t200$\t100M -E\t*\tA+\tC-\t20\t120\t0\t100$\t100M -O\t1\tA+ a_to_b+ B- - END - assert_equal(gfa2_str, gfa1_str.to_rgfa.to_gfa2_s) - assert_equal(gfa1_str, gfa2_str.to_rgfa.to_gfa1_s) - end - -end diff --git a/test/test_internals_field_parser.rb b/test/test_internals_field_parser.rb deleted file mode 100644 index cbcc97b..0000000 --- a/test/test_internals_field_parser.rb +++ /dev/null @@ -1,63 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -# Test the methods in RGFA::Field::Parser using different data types -# and examples of valid and invalid data for each datatype - -# XXX: positionals -# XXX: invalid data -# XXX: coordinate with validator/writer and with test_api_tags and -# test_api_positionals - -class (TestInternals||=Module.new)::FieldParser < Test::Unit::TestCase - - def test_parse_gfa_tag - o = "AA:i:1" - assert_equal([:AA,:i,"1"], o.parse_gfa_tag) - assert_raise(RGFA::FormatError) do - "1A:A:A".parse_gfa_tag - end - assert_raise(RGFA::FormatError) do - "_A:A:A".parse_gfa_tag - end - assert_raise(RGFA::FormatError) do - "A:A:A".parse_gfa_tag - end - assert_raise(RGFA::FormatError) do - "AAA:A:A".parse_gfa_tag - end - assert_raise(RGFA::FormatError) {"AA:C:1".parse_gfa_tag} - assert_raise(RGFA::FormatError) {"AA:AA:1".parse_gfa_tag} - assert_raise(RGFA::FormatError) {"AA:a:1".parse_gfa_tag} - end - - def test_parse_gfa_field_A - assert_equal("1", "1".parse_gfa_field(:A)) - end - - def test_parse_gfa_field_i - assert_equal(12, "12".parse_gfa_field(:i)) - end - - def test_parse_gfa_field_f - assert_equal(1.2, "1.2".parse_gfa_field(:f)) - end - - def test_parse_gfa_field_Z - assert_equal("1.2", "1.2".parse_gfa_field(:Z)) - end - - def test_parse_gfa_field_H - assert_equal([26], "1A".parse_gfa_field(:H)) - end - - def test_parse_gfa_field_B - assert_equal([12,12,12], "c,12,12,12".parse_gfa_field(:B)) - assert_equal([1.2,1.2,1.2], "f,1.2,1.2,1.2".parse_gfa_field(:B)) - end - - def test_parse_gfa_field_J - assert_equal({"1" => 2}, "{\"1\":2}".parse_gfa_field(:J)) - end - -end diff --git a/test/test_internals_field_validator.rb b/test/test_internals_field_validator.rb deleted file mode 100644 index 57fe273..0000000 --- a/test/test_internals_field_validator.rb +++ /dev/null @@ -1,57 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -# XXX: see parser -class (TestInternals||=Module.new)::FieldValidator < Test::Unit::TestCase - - def test_field_gfa_field_validate_i - assert_nothing_raised { "1".validate_gfa_field(:i) } - assert_nothing_raised { "12".validate_gfa_field(:i) } - assert_nothing_raised { "-12".validate_gfa_field(:i) } - assert_raise(RGFA::FormatError) {"1A".validate_gfa_field(:i)} - assert_raise(RGFA::FormatError) {"A1".validate_gfa_field(:i)} - assert_raise(RGFA::FormatError) {"2.1".validate_gfa_field(:i)} - end - - def test_field_gfa_field_validate_A - assert_nothing_raised { "A".validate_gfa_field(:A) } - assert_raise(RGFA::FormatError) {"AA".validate_gfa_field(:A)} - end - - def test_field_gfa_field_validate_f - assert_nothing_raised { "-12.1".validate_gfa_field(:f) } - assert_nothing_raised { "-12.1E-2".validate_gfa_field(:f) } - assert_raise(RGFA::FormatError) do - "2.1X".validate_gfa_field(:f) - end - end - - def test_field_gfa_field_validate_Z - assert_nothing_raised { "-12.1E-2".validate_gfa_field(:Z) } - end - - def test_field_gfa_field_validate_H - assert_nothing_raised { "0A12121EFF".validate_gfa_field(:H) } - assert_raise(RGFA::FormatError) do - "21X1".validate_gfa_field(:H) - end - end - - def test_field_gfa_field_validate_B - assert_nothing_raised { "i,12,-5".validate_gfa_field(:B) } - assert_raise(RGFA::FormatError) do - "C,X1".validate_gfa_field(:B) - end - assert_raise(RGFA::FormatError) do - "f.1.1".validate_gfa_field(:B) - end - end - - def test_field_gfa_field_validate_J - assert_nothing_raised {"{\"1\":2}".validate_gfa_field(:J) } - assert_raise(RGFA::FormatError) do - "1\t2".validate_gfa_field(:J) - end - end - -end diff --git a/test/test_internals_field_writer.rb b/test/test_internals_field_writer.rb deleted file mode 100644 index 4b1c394..0000000 --- a/test/test_internals_field_writer.rb +++ /dev/null @@ -1,46 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -# XXX: see parser -class (TestInternals||=Module.new)::FieldWriter < Test::Unit::TestCase - - def test_field_writer_i - assert_equal("13", 13.to_gfa_field) - end - - def test_field_writer_f - assert_equal("1.3", 1.3.to_gfa_field) - end - - def test_field_writer_Z - assert_equal("1B", "1B".to_gfa_field) - end - - def test_field_writer_H - assert_equal("0D0D0D", [13,13,13].to_byte_array.to_gfa_field) - assert_raise(RGFA::ValueError) do - [13,13,1.3].to_byte_array.to_gfa_field - end - assert_raise(RGFA::ValueError) do - [13,13,350].to_byte_array.to_gfa_field - end - end - - def test_field_writer_B - assert_equal("C,13,13,13", [13,13,13].to_gfa_field) - assert_equal("f,1.3,1.3,1.3", [1.3,1.3,1.3].to_gfa_field) - assert_raise(RGFA::ValueError) do - [13,1.3,1.3].to_gfa_field(datatype: :B) - end - end - - def test_field_writer_J - assert_equal("[\"A\",12]", ["A", 12].to_gfa_field) - assert_equal("{\"A\":12}", {"A" => 12}.to_gfa_field) - end - - def test_field_writer_as_tag - assert_equal("AA:i:13", 13.to_gfa_tag(:AA)) - end - -end diff --git a/test/test_internals_tag_datatype.rb b/test/test_internals_tag_datatype.rb deleted file mode 100644 index cae4259..0000000 --- a/test/test_internals_tag_datatype.rb +++ /dev/null @@ -1,26 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestInternals ||= Module.new -class TestInternals::TagDatatype < Test::Unit::TestCase - - def test_datatype_value_independent - assert_equal(:Z, "string".default_gfa_tag_datatype) - assert_equal(:i, 1.default_gfa_tag_datatype) - assert_equal(:f, 1.0.default_gfa_tag_datatype) - assert_equal(:H, RGFA::ByteArray.new([]).default_gfa_tag_datatype) - assert_equal(:B, RGFA::NumericArray.new([]).default_gfa_tag_datatype) - assert_equal(:J, {}.default_gfa_tag_datatype) - end - - def test_datatype_arrays - assert_equal(:B, [1,1].default_gfa_tag_datatype) - assert_equal(:B, [1.0,1.0].default_gfa_tag_datatype) - assert_equal(:J, [1,1.0].default_gfa_tag_datatype) - assert_equal(:J, ["1",1].default_gfa_tag_datatype) - assert_equal(:J, [1.0,"1.0"].default_gfa_tag_datatype) - assert_equal(:J, ["z","z"].default_gfa_tag_datatype) - assert_equal(:J, [[1,2,3],[3,4,5]].default_gfa_tag_datatype) - end - -end diff --git a/test/test_rgfatools.rb b/test/test_rgfatools.rb deleted file mode 100644 index aef0047..0000000 --- a/test/test_rgfatools.rb +++ /dev/null @@ -1,11 +0,0 @@ -require_relative "../lib/rgfatools.rb" -require "test/unit" - -class TestRGFATools < Test::Unit::TestCase - - def test_basics - assert_nothing_raised { RGFA.new } - assert_nothing_raised { RGFA.included_modules.include?(RGFATools) } - end - -end diff --git a/test/test_rgfatools_artifacts.rb b/test/test_rgfatools_artifacts.rb deleted file mode 100644 index 6197598..0000000 --- a/test/test_rgfatools_artifacts.rb +++ /dev/null @@ -1,38 +0,0 @@ -require_relative "../lib/rgfatools.rb" -require "test/unit" - -class TestRGFAToolsArtifacts < Test::Unit::TestCase - - def test_remove_small_components - ["gfa", "gfa2"].each do |sfx| - g = RGFA.from_file("testdata/two_components.#{sfx}") - assert_equal(2, g.connected_components.size) - g.remove_small_components(1000) - assert_equal(2, g.connected_components.size) - g.remove_small_components(3000) - assert_equal(1, g.connected_components.size) - g.remove_small_components(10000) - assert_equal(0, g.connected_components.size) - end - end - - def test_remove_dead_ends - ["gfa", "gfa2"].each do |sfx| - g = RGFA.from_file("testdata/dead_ends.#{sfx}") - assert_equal(6, g.segments.size) - g.remove_dead_ends(100) - assert_equal(6, g.segments.size) - g.remove_dead_ends(1500) - assert_equal(5, g.segments.size) - g.remove_dead_ends(1500) - assert_equal(5, g.segments.size) - g.remove_dead_ends(150000) - assert_equal(3, g.segments.size) - g.remove_dead_ends(150000) - assert_equal(2, g.segments.size) - g.remove_dead_ends(1500000) - assert_equal(0, g.segments.size) - end - end - -end diff --git a/test/test_rgfatools_copy_number.rb b/test/test_rgfatools_copy_number.rb deleted file mode 100644 index 85386ea..0000000 --- a/test/test_rgfatools_copy_number.rb +++ /dev/null @@ -1,42 +0,0 @@ -require_relative "../lib/rgfatools.rb" -require "test/unit" - -class TestRGFAToolsCopyNumber < Test::Unit::TestCase - - def test_delete_low_coverage_segments - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/copynum.1.#{sfx}") - assert_equal([:"0",:"1",:"2"], gfa.segment_names) - gfa.delete_low_coverage_segments(10) - assert_equal([:"1",:"2"], gfa.segment_names) - gfa.delete_low_coverage_segments(100) - assert_equal([:"2"], gfa.segment_names) - gfa.delete_low_coverage_segments(1000) - assert_equal([], gfa.segment_names) - end - end - - def test_compute_copy_numbers - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/copynum.2.#{sfx}") - assert_nothing_raised { gfa.compute_copy_numbers(9) } - assert_equal(0, gfa.segment!("0").cn) - assert_equal(1, gfa.segment!("1").cn) - assert_equal(2, gfa.segment!("2").cn) - assert_equal(3, gfa.segment!("3").cn) - end - end - - def test_apply_copy_number - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/copynum.2.#{sfx}") - assert_equal([:"0",:"1",:"2",:"3"], gfa.segment_names) - gfa.compute_copy_numbers(9) - gfa.apply_copy_numbers - assert_equal([:"1",:"2",:"3",:"2*2",:"3*2",:"3*3"], gfa.segment_names) - gfa.compute_copy_numbers(9) - assert(gfa.segments.map(&:cn).all?{|cn|cn == 1}) - end - end - -end diff --git a/test/test_rgfatools_linear_paths.rb b/test/test_rgfatools_linear_paths.rb deleted file mode 100644 index 7b9230d..0000000 --- a/test/test_rgfatools_linear_paths.rb +++ /dev/null @@ -1,34 +0,0 @@ -require_relative "../lib/rgfatools.rb" -require "test/unit" - -class TestRGFAToolsLinearPaths < Test::Unit::TestCase - - def test_linear_path_merging - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/linear_merging.2.#{sfx}") - gfa.merge_linear_path([["0", :R],["1", :R],["2", :L],["3", :R]], - enable_tracking: true) - assert_nothing_raised {gfa.segment!("0_1_2^_3")} - assert_equal("ACGACGACGTCGA", gfa.segment("0_1_2^_3").sequence) - gfa = RGFA.from_file("testdata/linear_merging.2.#{sfx}") - gfa.enable_extensions - gfa.merge_linear_path([["0", :R],["1", :R],["2", :L],["3", :R]]) - assert_nothing_raised {gfa.segment!("0_1_2^_3")} - assert_equal("ACGACGACGTCGA", gfa.segment("0_1_2^_3").sequence) - end - end - - def test_linear_path_merge_all - ["gfa", "gfa2"].each do |sfx| - gfa = RGFA.from_file("testdata/linear_merging.3.#{sfx}") - gfa.enable_extensions - gfa.merge_linear_paths - assert_equal([:"0_1_2^_3"], gfa.segment_names) - gfa = RGFA.from_file("testdata/linear_merging.4.#{sfx}") - gfa.enable_extensions - gfa.merge_linear_paths - assert_equal([:"0",:"3",:"1_2^"], gfa.segments.map(&:name)) - end - end - -end diff --git a/test/test_unit_alignment.rb b/test/test_unit_alignment.rb deleted file mode 100644 index 0c13f24..0000000 --- a/test/test_unit_alignment.rb +++ /dev/null @@ -1,70 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -# note: API public methods are tested in test_api_alignment -class (TestUnit||=Module.new)::Alignment < Test::Unit::TestCase - - @@cigar_1 = RGFA::Alignment::CIGAR.new([ - RGFA::Alignment::CIGAR::Operation.new(12,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I), - RGFA::Alignment::CIGAR::Operation.new(0,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:P)]) - - @@cigar_1_a = [ - RGFA::Alignment::CIGAR::Operation.new(12,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:D), - RGFA::Alignment::CIGAR::Operation.new(2,:I), - RGFA::Alignment::CIGAR::Operation.new(0,:M), - RGFA::Alignment::CIGAR::Operation.new(1,:P)] - - @@cigar_1_s = "12M1D2I0M1P" - - @@trace_1 = RGFA::Alignment::Trace.new([12,12,0]) - @@trace_1_s = "12,12,0" - @@trace_1_a = [12,12,0] - - def test_array_to_alignment - assert_kind_of(RGFA::Alignment::Placeholder, [].to_alignment) - assert_equal(@@cigar_1, @@cigar_1_a.to_alignment) - assert_raise(RGFA::VersionError) {@@trace_1_a.to_alignment} - assert_equal(@@trace_1, @@trace_1_a.to_alignment(version: :gfa2)) - assert_raise(RGFA::VersionError) {@@cigar_1_a.to_alignment(version: :gfaX)} - assert_raise(RGFA::FormatError) {["x",2,1].to_alignment} - # only the first element is checked, therefore: - malformed1 = [1,2,"x"] - assert_nothing_raised {malformed1.to_alignment(version: :gfa2)} - assert_kind_of(RGFA::Alignment::Trace, - malformed1.to_alignment(version: :gfa2)) - assert_raise(RGFA::TypeError) { - malformed1.to_alignment(version: :gfa2).validate } - malformed2 = [RGFA::Alignment::CIGAR::Operation.new(12,:M),2,"x"] - assert_nothing_raised {malformed2.to_alignment} - assert_kind_of(RGFA::Alignment::CIGAR, malformed2.to_alignment) - assert_raise(RGFA::TypeError) { malformed2.to_alignment.validate } - end - - def test_to_cigar - assert_equal(@@cigar_1, @@cigar_1.to_cigar) - assert_equal(@@cigar_1, @@cigar_1_s.to_cigar) - assert_equal(RGFA::Alignment::Placeholder, "*".to_cigar.class) - assert_equal(@@cigar_1, @@cigar_1_a.to_cigar) - assert_equal(RGFA::Alignment::Placeholder, - RGFA::Alignment::Placeholder.new.to_cigar.class) - end - - def test_to_cigar_operation - op = RGFA::Alignment::CIGAR::Operation.new(12,:M) - assert_equal(op, [12, :M].to_cigar_operation) - assert_equal(op, op.to_cigar_operation) - end - - def test_to_trace - assert_equal(@@trace_1, @@trace_1_s.to_trace) - assert_equal(@@trace_1, @@trace_1.to_trace) - assert_equal(RGFA::Alignment::Placeholder, - RGFA::Alignment::Placeholder.new.to_trace.class) - assert_raise(RGFA::FormatError) {"A,1,2".to_trace} - end - -end diff --git a/test/test_unit_field_array.rb b/test/test_unit_field_array.rb deleted file mode 100644 index a426e52..0000000 --- a/test/test_unit_field_array.rb +++ /dev/null @@ -1,75 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -class (TestUnit||=Module.new)::FieldArray < Test::Unit::TestCase - - def test_initialize - assert_nothing_raised {RGFA::FieldArray.new(:i, [1,2,3])} - # no validations performed by default: - assert_nothing_raised {RGFA::FieldArray.new(:i, [1,2,:a])} - assert_nothing_raised {RGFA::FieldArray.new(:WRONG, [1,2])} - end - - def test_datatype - fa = RGFA::FieldArray.new(:i, [1,2,3]) - assert_equal(:i, fa.datatype) - end - - def test_validate - assert_nothing_raised {RGFA::FieldArray.new(:i, [1,2,3]).validate} - assert_raise(RGFA::TypeError) { - RGFA::FieldArray.new(:i, [1,2,:a]).validate } - assert_raise(RGFA::TypeError) { - RGFA::FieldArray.new(:WRONG, [1,2]).validate } - end - - def test_validate_gfa_field - assert_nothing_raised { - RGFA::FieldArray.new(:i, [1,2,3]).validate_gfa_field(:i) } - assert_raise(RGFA::TypeError) { - RGFA::FieldArray.new(:i, [1,2,3]).validate_gfa_field(:J) } - assert_raise(RGFA::TypeError) { - RGFA::FieldArray.new(:i, [1,2,:a]).validate_gfa_field(:i) } - assert_nothing_raised { - RGFA::FieldArray.new(:WRONG, [1,2]).validate_gfa_field(:i) } - end - - def test_default_gfa_tag_datatype - fa = RGFA::FieldArray.new(:Z, ["1","2","3"]) - assert_equal(:Z, fa.default_gfa_tag_datatype) - # it does not depend on the values: same values, but :i - fa = RGFA::FieldArray.new(:i, ["1","2","3"]) - assert_equal(:i, fa.default_gfa_tag_datatype) - end - - def test_to_gfa_field - fa = RGFA::FieldArray.new(:i, [1,2,3]) - assert_equal("1\t2\t3", fa.to_gfa_field) - end - - def test_to_gfa_tag - fa = RGFA::FieldArray.new(:i, [1,2,3]) - assert_equal("xx:i:1\txx:i:2\txx:i:3", fa.to_gfa_tag("xx")) - end - - def test_vpush - assert_raise(RGFA::FormatError) { - RGFA::FieldArray.new(:i, [1,2,3]).vpush("x") } - assert_raise(RGFA::TypeError) { - RGFA::FieldArray.new(:i, [1,2,3]).vpush(2.0) } - assert_raise(RGFA::InconsistencyError) { - RGFA::FieldArray.new(:i, [1,2,3]).vpush("x", :Z) } - assert_nothing_raised { - RGFA::FieldArray.new(:i, [1,2,3]).vpush("x", :i) } - end - - def test_to_rgfa_field_array - fa = RGFA::FieldArray.new(:i, [1,2,3]) - assert_equal(fa, fa.to_rgfa_field_array(:Z)) - faz = RGFA::FieldArray.new(:Z, ["1","2","3"]) - assert_not_equal(faz.class, fa.map(&:to_s).class) - assert_equal(Array, fa.map(&:to_s).class) - assert_equal(faz, fa.map(&:to_s).to_rgfa_field_array(:Z)) - end - -end diff --git a/test/test_unit_header.rb b/test/test_unit_header.rb deleted file mode 100644 index 9193b3d..0000000 --- a/test/test_unit_header.rb +++ /dev/null @@ -1,124 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::Header < Test::Unit::TestCase - - def test_new - assert_nothing_raised { - RGFA::Line::Header.new(["VN:Z:1.0", "xx:i:11"]) } - end - - def test_string_to_rgfa_line - assert_nothing_raised { "H\tVN:Z:1.0".to_rgfa_line } - assert_equal(RGFA::Line::Header, "H\tVN:Z:1.0".to_rgfa_line.class) - assert_equal(RGFA::Line::Header.new(["VN:Z:1.0", "xx:i:11"]), - "H\tVN:Z:1.0\txx:i:11".to_rgfa_line) - assert_raises(RGFA::FormatError) do - "H\tH2\tVN:Z:1.0".to_rgfa_line - end - assert_raises(RGFA::TypeError) do - "H\tVN:i:1.0".to_rgfa_line - end - end - - def test_to_s - assert_equal("H\tVN:Z:1.0\txx:i:11", - RGFA::Line::Header.new(["VN:Z:1.0", "xx:i:11"]).to_s) - end - - def test_tag_reading - assert_equal("1.0", - RGFA::Line::Header.new(["VN:Z:1.0", "xx:i:11"]).VN) - end - - def test_tag_writing - assert_nothing_raised{ - RGFA::Line::Header.new(["VN:Z:1.0", "xx:i:11"]).VN = "2.0"} - end - - def test_connection - assert(!RGFA::Line::Header.new([]).connected?) - assert(RGFA.new.header.connected?) - assert_raise(RGFA::RuntimeError) { - RGFA::Line::Header.new([]).connect(RGFA.new) } - end - - def test_to_gfa1a - line = "H\tVN:Z:1.0\txx:i:1".to_rgfa_line - assert_equal(["H","VN:Z:1.0", "xx:i:1"], line.to_gfa1_a) - assert_equal(["H","VN:Z:2.0", "xx:i:1"], line.to_gfa2_a) - end - - def test_to_gfa2_a - line = "H\tVN:Z:2.0\txx:i:1".to_rgfa_line - assert_equal(["H","VN:Z:1.0", "xx:i:1"], line.to_gfa1_a) - assert_equal(["H","VN:Z:2.0", "xx:i:1"], line.to_gfa2_a) - end - - def test_add - line = "H\tVN:Z:2.0\txx:i:1".to_rgfa_line - line.add(:yy, "test") - assert_equal("test", line.yy) - line.add(:yy, "test") - assert_equal(["test","test"], line.yy) - line.add(:yy, "test") - assert_equal(["test","test","test"], line.yy) - line.add(:VN, "2.0") - assert_equal("2.0", line.VN) - assert_raise(RGFA::InconsistencyError) { - line.add(:VN, "1.0") } - line.add(:TS, "120") - assert_equal(120, line.TS) - assert_nothing_raised { - line.add(:TS, 120) } - assert_nothing_raised { - line.add(:TS, "120") } - assert_raise(RGFA::InconsistencyError) { - line.add(:TS, 130) } - assert_raise(RGFA::InconsistencyError) { - line.add(:TS, "140") } - end - - def test_field_to_s - line = "H\tVN:Z:1.0\txx:i:1".to_rgfa_line - line.add(:xx, 2) - assert_equal("1.0", line.field_to_s(:VN)) - assert_equal("1\t2", line.field_to_s(:xx)) - assert_equal("VN:Z:1.0", line.field_to_s(:VN, tag: true)) - assert_equal("xx:i:1\txx:i:2", line.field_to_s(:xx, tag: true)) - end - - def test_n_duptags - line = "H\tVN:Z:1.0\txx:i:1".to_rgfa_line - assert_equal(0, line.n_duptags) - line.add(:xx, 2) - assert_equal(1, line.n_duptags) - line.add(:xx, 2) - assert_equal(1, line.n_duptags) - line.add(:zz, 2) - assert_equal(1, line.n_duptags) - line.add(:zz, 2) - assert_equal(2, line.n_duptags) - end - - def test_split - line = "H\tVN:Z:1.0\txx:i:1".to_rgfa_line - line.add(:xx, 2) - assert_equal(3, line.split.size) - line.split.each {|s| assert_equal(RGFA::Line::Header, s.class) } - assert_equal(["H\tVN:Z:1.0", "H\txx:i:1", "H\txx:i:2"], - line.split.map(&:to_s)) - end - - def test_merge - line1 = "H\tVN:Z:1.0\txx:i:1".to_rgfa_line - line2 = "H\txx:i:2\tyy:f:1.0".to_rgfa_line - line1.merge(line2) - assert_equal("1.0", line1.VN) - assert_equal([1,2], line1.xx) - assert_equal(1.0, line1.yy) - end - -end diff --git a/test/test_unit_line.rb b/test/test_unit_line.rb deleted file mode 100644 index b3ce28f..0000000 --- a/test/test_unit_line.rb +++ /dev/null @@ -1,114 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::Line < Test::Unit::TestCase - - def test_initialize_not_enough_positional_fields - assert_nothing_raised do - RGFA::Line::Segment::Factory.new(["1","*"]) - end - assert_raise(RGFA::FormatError) do - RGFA::Line::Segment::Factory.new(["1"]) - end - end - - def test_initialize_too_many_positionals - assert_raise(RGFA::FormatError) do - RGFA::Line::Segment::Factory.new(["1","*","*"]) - end - end - - def test_initialize_predefined_tag_wrong_type - assert_nothing_raised do - RGFA::Line::Header.new(["VN:Z:1"]) - end - assert_raise(RGFA::TypeError) do - RGFA::Line::Header.new(["VN:i:1"]) - end - end - - def test_initialize_wrong_tag_format - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["VN i:1"]) - end - end - - def test_initialize_positional_field_type_error - assert_raise(RGFA::FormatError) do - RGFA::Line::Segment::Factory.new(["1\t1","*","*"]) - end - end - - def test_initialize_tag_type_error - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["zz:i:1A"]) - end - end - - def test_initialize_duplicate_tag - assert_raise(RGFA::NotUniqueError) do - RGFA::Line::Header.new(["zz:i:1","zz:i:2"]) - end - assert_raise(RGFA::NotUniqueError) do - RGFA::Line::Header.new(["zz:i:1", "VN:Z:1", "zz:i:2"]) - end - end - - def test_initialize_custom_tag - assert_raise(RGFA::FormatError) do - RGFA::Line::Header.new(["ZZ:Z:1"]) - end - end - - def test_record_type - l = RGFA::Line::Header.new(["xx:i:13","VN:Z:HI"]) - assert_equal(:H, l.record_type) - assert_raise(NoMethodError) { l.record_type = "S" } - end - - def test_add_tag - l = RGFA::Line::Header.new(["xx:i:13","VN:Z:HI"]) - assert_equal(nil, l.xy) - l.set(:xy, "HI") - assert_equal("HI", l.xy) - end - - def test_unknown_record_type - assert_raise(RGFA::VersionError) { - "Z\txxx".to_rgfa_line(version: :gfa1)} - assert_nothing_raised { - "Z\txxx".to_rgfa_line(version: :gfa2)} - assert_nothing_raised { - "Z\txxx".to_rgfa_line} - end - - def test_to_rgfa_line - str = "H\tVN:Z:1.0" - l = str.to_rgfa_line - assert_equal(RGFA::Line::Header, l.class) - assert_equal(RGFA::Line::Header, l.to_rgfa_line.class) - assert_equal(str, l.to_rgfa_line.to_s) - assert_equal(l, l.to_rgfa_line) - end - - def test_field_alias - s = "S\tA\t*".to_rgfa_line - assert_equal(:A, s.name) - assert_equal(:A, s.sid) - assert_equal(:A, s.get(:name)) - assert_equal(:A, s.get(:sid)) - s.set(:name, :B) - assert_equal(:B, s.get(:sid)) - s.set(:sid, :C) - assert_equal(:C, s.name) - end - - def test_to_s - fields = ["xx:i:13","VN:Z:HI"] - l = RGFA::Line::Header.new(fields.clone) - assert_equal((["H"]+fields).join("\t"),l.to_s) - end - -end diff --git a/test/test_unit_line_cloning.rb b/test/test_unit_line_cloning.rb deleted file mode 100644 index f72984e..0000000 --- a/test/test_unit_line_cloning.rb +++ /dev/null @@ -1,64 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::LineCloning < Test::Unit::TestCase - - def test_clone_tags - l = "H\tVN:Z:1.0".to_rgfa_line - l1 = l - l2 = l.clone - assert_equal(RGFA::Line::Header, l.class) - assert_equal(RGFA::Line::Header, l2.class) - l2.VN="2.0" - assert_equal("2.0", l2.VN) - assert_equal("1.0", l.VN) - l1.VN="2.0" - assert_equal("2.0", l.VN) - end - - def test_clone_deep_string - s = "S\t1\tCAGCTTG".to_rgfa_line - s_clone = s.clone - assert_equal(s_clone.sequence, s.sequence) - assert_not_equal(s_clone.sequence.object_id, s.sequence.object_id) - s_clone.sequence << "CCC" - assert_not_equal(s_clone.sequence, s.sequence) - end - - def test_clone_deep_posfield_array - u = "U\t*\t1 2 3".to_rgfa_line - u_clone = u.clone - assert_equal(u_clone.items, u.items) - assert_not_equal(u_clone.items.object_id, u.items.object_id) - u_clone.items << "4" - assert_not_equal(u_clone.items, u.items) - end - - def test_clone_deep_J_field - h = "H\txx:J:[1,2,3]".to_rgfa_line - h_clone = h.clone - assert_equal(h_clone.xx, h.xx) - assert_not_equal(h_clone.xx.object_id, h.xx.object_id) - h_clone.xx[0] += 1 - assert_not_equal(h_clone.xx, h.xx) - end - - def test_clone_disconnected - g = RGFA.new - g << (sA = "S\tA\t7\tCAGCTTG".to_rgfa_line) - g << (u = "U\t*\tA B C".to_rgfa_line) - assert(u.connected?) - assert_equal([u], sA.sets) - assert_equal([u], g.sets) - u_clone = u.clone - assert(!u_clone.connected?) - assert_equal([u], sA.sets) - assert_equal([u], g.sets) - assert_not_equal([:A, :B, :C], u.items) - assert_equal([:A, :B, :C], u.items.map(&:name)) - assert_equal([:A, :B, :C], u_clone.items) - end - -end diff --git a/test/test_unit_line_connection.rb b/test/test_unit_line_connection.rb deleted file mode 100644 index b6fb6ef..0000000 --- a/test/test_unit_line_connection.rb +++ /dev/null @@ -1,172 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::LineConnection < Test::Unit::TestCase - - def test_connected_and_rgfa - s1 = "S\t1\tACCAT".to_rgfa_line - assert(!s1.connected?) - assert_nil(s1.rgfa) - g = RGFA.new - g << s1 - assert(s1.connected?) - assert_equal(g, s1.rgfa) - end - - def test_connect - s2 = "S\t2\tACCAT".to_rgfa_line - assert(!s2.connected?) - assert_nil(s2.rgfa) - g = RGFA.new - s2.connect(g) - assert(s2.connected?) - assert_equal(g, s2.rgfa) - end - - def test_connect_registers_line - s2 = "S\t2\tACCAT".to_rgfa_line - g = RGFA.new - assert_equal([], g.segments) - s2.connect(g) - assert_equal([s2], g.segments) - end - - def test_disconnect - s1 = "S\t1\tACCAT".to_rgfa_line - g = RGFA.new - g << s1 - assert(s1.connected?) - assert_equal(g, s1.rgfa) - s1.disconnect - assert(!s1.connected?) - assert_nil(s1.rgfa) - end - - def test_disconnect_unregisters_line - s1 = "S\t1\tACCAT".to_rgfa_line - g = RGFA.new - g << s1 - assert_equal([s1], g.segments) - s1.disconnect - assert_equal([], g.segments) - end - - def test_disconnect_removes_field_backreferences - s1 = "S\t1\tACCAT".to_rgfa_line - l = "L\t1\t+\t2\t-\t*".to_rgfa_line - g = RGFA.new - g << s1 - g << l - assert_equal([l], s1.dovetails) - l.disconnect - assert_equal([], s1.dovetails) - end - - def test_disconnect_removes_field_references - s1 = "S\t1\tACCAT".to_rgfa_line - l = "L\t1\t+\t2\t-\t*".to_rgfa_line - g = RGFA.new - g << s1 - g << l - assert(l.from.eql?(s1)) - l.disconnect - assert(!l.from.eql?(s1)) - assert_equal(:"1", l.from) - end - - def test_disconnect_disconnects_dependent_lines - s1 = "S\t1\tACCAT".to_rgfa_line - l = "L\t1\t+\t2\t-\t*".to_rgfa_line - g = RGFA.new - g << s1 - g << l - assert(l.connected?) - s1.disconnect - assert(!l.connected?) - end - - def test_disconnect_removes_nonfield_backreferences - s1 = "S\t1\tACCAT".to_rgfa_line - s2 = "S\t2\tCATGG".to_rgfa_line - s3 = "S\t3\tTGGAA".to_rgfa_line - l12 = "L\t1\t+\t2\t+\t*".to_rgfa_line - l23 = "L\t2\t+\t3\t+\t*".to_rgfa_line - p4 = "P\t4\t1+,2+,3+\t*".to_rgfa_line - g = RGFA.new - [s1, s2, s3, l12, l23, p4].each do |line| - g << line - end - assert_equal([p4], l12.paths) - p4.disconnect - assert_equal([], l12.paths) - end - - def test_disconnect_removes_nonfield_references - s1 = "S\t1\tACCAT".to_rgfa_line - s2 = "S\t2\tCATGG".to_rgfa_line - s3 = "S\t3\tTGGAA".to_rgfa_line - l12 = "L\t1\t+\t2\t+\t*".to_rgfa_line - l23 = "L\t2\t+\t3\t+\t*".to_rgfa_line - p4 = "P\t4\t1+,2+,3+\t*".to_rgfa_line - g = RGFA.new - [s1, s2, s3, l12, l23, p4].each do |line| - g << line - end - assert_equal([OL[l12,:+],OL[l23,:+]], p4.links) - p4.disconnect - assert_equal([], p4.links) - end - - def test_add_reference - s1 = "S\t1\tACCAT".to_rgfa_line - assert_equal([], s1.gaps_L) - s1.add_reference(:X, :gaps_L) - assert_equal([:X], s1.gaps_L) - s1.add_reference(:Y, :gaps_L) - assert_equal([:X, :Y], s1.gaps_L) - s1.add_reference(:Z, :gaps_L, append: false) - assert_equal([:Z, :X, :Y], s1.gaps_L) - end - - def test_delete_reference - s1 = "S\t1\tACCAT".to_rgfa_line - s1.add_reference(:A, :gaps_L) - s1.add_reference(:B, :gaps_L) - s1.add_reference(:C, :gaps_L) - s1.add_reference(:D, :gaps_L) - s1.add_reference(:E, :gaps_L) - assert_equal([:A, :B, :C, :D, :E], s1.gaps_L) - s1.delete_reference(:C, :gaps_L) - assert_equal([:A, :B, :D, :E], s1.gaps_L) - s1.delete_first_reference(:gaps_L) - assert_equal([:B, :D, :E], s1.gaps_L) - s1.delete_last_reference(:gaps_L) - assert_equal([:B, :D], s1.gaps_L) - end - - def test_update_references - s1 = "S\t1\tACCAT".to_rgfa_line - gA = RGFA::Line::Gap.new({}) - gnewA = RGFA::Line::Gap.new({}) - gB = RGFA::Line::Gap.new({}) - gC = RGFA::Line::Gap.new({}) - gD = RGFA::Line::Gap.new({}) - gE = RGFA::Line::Gap.new({}) - gX = RGFA::Line::Gap.new({}) - s1.add_reference(gA, :gaps_L) - s1.add_reference(gB, :gaps_L) - s1.add_reference(gC, :gaps_L) - s1.add_reference(gD, :gaps_L) - s1.add_reference(gE, :gaps_L) - assert_equal([gA, gB, gC, gD, gE], s1.gaps_L) - s1.update_references(gA, gnewA, :sid1) - assert_equal([gnewA, gB, gC, gD, gE], s1.gaps_L) - s1.update_references(gX, :newX, :sid1) - assert_equal([gnewA, gB, gC, gD, gE], s1.gaps_L) - s1.update_references(gB, nil, :sid1) - assert_equal([gnewA, gC, gD, gE], s1.gaps_L) - end - -end diff --git a/test/test_unit_line_dynamic_fields.rb b/test/test_unit_line_dynamic_fields.rb deleted file mode 100644 index 25b1d66..0000000 --- a/test/test_unit_line_dynamic_fields.rb +++ /dev/null @@ -1,91 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::DynamicFields < Test::Unit::TestCase - - def test_respond_to - l = RGFA::Line::Edge::Link.new(["1","+","2","-","*","zz:Z:yes","KC:i:100"]) - # record_type - assert(l.respond_to?(:record_type)) - # reqfields - assert(l.respond_to?(:from)) - assert(l.respond_to?(:from=)) - # predefined tags - assert(l.respond_to?(:KC)) - assert(l.respond_to?(:KC!)) - assert(l.respond_to?(:KC=)) - # custom tags - assert(l.respond_to?(:zz)) - assert(l.respond_to?(:zz!)) - assert(l.respond_to?(:zz=)) - # not-yet-existing tags - assert(l.respond_to?(:aa)) - assert(l.respond_to?(:aa!)) - assert(l.respond_to?(:aa=)) - end - - def test_field_getters_positional_fields - l = RGFA::Line::Segment::Factory.new(["12","*","xx:i:13","KC:i:10"]) - assert_equal(:"12", l.name) - assert_raise(NoMethodError) { l.zzz } - end - - def test_field_getters_existing_tags - l = RGFA::Line::Segment::Factory.new(["12","*","xx:i:13","KC:i:10"]) - assert_equal(:xx, l.tagnames[0]) - assert_equal("13", l.field_to_s(:xx)) - assert_equal(13, l.xx) - assert_equal(13, l.xx!) - assert_equal("10", l.field_to_s(:KC)) - assert_equal(10, l.KC) - assert_equal(10, l.KC!) - end - - def test_field_getters_not_existing_tags - l = RGFA::Line::Header.new(["xx:i:13","VN:Z:HI"]) - assert_equal(nil, l.zz) - assert_raise(RGFA::NotFoundError) { l.zz! } - end - - def test_field_setters_positional_fields - l = RGFA::Line::Segment::Factory.new(["12","*","xx:i:13","KC:i:1200"]) - assert_raise(RGFA::FormatError) { l.name = "A\t1"; - l.validate_field(:name) } - l.name = "14" - assert_equal(:"14", l.name) - end - - def test_field_setters_existing_tags - l = RGFA::Line::Header.new(["xx:i:13","VN:Z:HI"], vlevel: 3) - assert_equal(13, l.xx) - l.xx = 15 - assert_equal(15, l.xx) - assert_raise(RGFA::FormatError) { l.xx = "1A" } - assert_nothing_raised { l.set_datatype(:xx, :Z); l.xx = "1A" } - assert_equal("HI", l.VN) - l.VN = "HO" - assert_equal("HO", l.VN) - end - - def test_field_setters_not_existing_tags - l = RGFA::Line::Header.new(["xx:i:13","VN:Z:HI"]) - assert_nothing_raised { l.zz="1" } - assert_equal("1", l.zz) - assert_nothing_raised { l.zi=1 } - assert_equal(1, l.zi) - assert_nothing_raised { l.zf=1.0 } - assert_equal(1.0, l.zf) - assert_nothing_raised { l.bf=[1.0,1.0] } - assert_equal([1.0,1.0], l.bf) - assert_nothing_raised { l.bi=[1.0,1.0] } - assert_equal([1,1], l.bi) - assert_nothing_raised { l.ba=[1.0,1] } - assert_equal([1.0,1], l.ba) - assert_nothing_raised { l.bh={:a => 1.0, :b => 1} } - assert_equal({"a"=>1.0,"b"=>1}, l.to_s.to_rgfa_line.bh) - assert_raise(NoMethodError) { l.zzz="1" } - end - -end diff --git a/test/test_unit_line_equivalence.rb b/test/test_unit_line_equivalence.rb deleted file mode 100644 index 2cdee8e..0000000 --- a/test/test_unit_line_equivalence.rb +++ /dev/null @@ -1,160 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::LineEquivalence < Test::Unit::TestCase - - @@a = "S\tA\t*\tLN:i:8\txx:Z:a".to_rgfa_line - @@b = "S\tB\t*\tLN:i:10".to_rgfa_line - @@c = "C\tA\t+\tB\t+\t10\t*".to_rgfa_line - @@l = "L\tA\t+\tB\t+\t*".to_rgfa_line - @@e = "E\t1\tA+\tB-\t0\t100$\t20\t121\t*".to_rgfa_line - - @@a_ln = "S\tA\t*\tLN:i:10\txx:Z:a".to_rgfa_line - @@a_seq = "S\tA\tACCTTCGT\tLN:i:8\txx:Z:a".to_rgfa_line - @@a_gfa2 = "S\tA\t8\tACCTTCGT\txx:Z:a".to_rgfa_line - @@a_noxx = "S\tA\t*\tLN:i:8".to_rgfa_line - @@a_yy = "S\tA\t*\tLN:i:8\txx:Z:a\tyy:Z:b".to_rgfa_line - @@l_from = "L\tC\t+\tB\t+\t*".to_rgfa_line - @@e_name = "E\t2\tA+\tB-\t0\t100$\t20\t121\t*".to_rgfa_line - - @@h_a = {:record_type => :S, - :name => :A, - :LN => 8, - :xx => "a"} - @@h_a_rt = @@h_a.clone - @@h_a_rt[:record_type] = :X - @@h_a_pl = @@h_a.clone - @@h_a_pl[:name] = RGFA::Placeholder.new - @@h_a_name = @@h_a.clone - @@h_a_name[:name] = :B - @@h_a_seq = @@h_a.clone - @@h_a_seq[:sequence] = "ACCTTCGT" - @@h_a_ln = @@h_a.clone - @@h_a_ln[:LN] = 10 - @@h_a_LNstr = @@h_a.clone - @@h_a_LNstr[:LN] = "8" - @@h_a_noxx = @@h_a.clone - @@h_a_noxx.delete(:xx) - @@h_a_yy = @@h_a.clone - @@h_a_yy[:yy] = "b" - @@h_a_gfa2 = {:record_type => :S, - :sid => :A, - :slen => 8, - :xx => "a"} - - - def test_line_placeholder - assert(!@@a.placeholder?) - assert(!@@b.placeholder?) - end - - def test_line_diff_two_segments - adiffb = [[:different, :positional_field, :name, "A", "B"], - [:exclusive, :<, :tag, :xx, :Z, "a"], - [:different, :tag, :LN, :i, "8", :i, "10"]] - assert_equal(adiffb, @@a.diff(@@b)) - bdiffa = [[:different, :positional_field, :name, "B", "A"], - [:exclusive, :>, :tag, :xx, :Z, "a"], - [:different, :tag, :LN, :i, "10", :i, "8"]] - assert_equal(bdiffa, @@b.diff(@@a)) - assert_equal([], @@a.diff(@@a)) - assert_equal([], @@b.diff(@@b)) - end - - def test_line_diffscript_two_segments - acpy = @@a.clone - eval(acpy.diffscript(@@b, "acpy")) - assert_not_equal(@@b.to_s, @@a.to_s) - assert_equal(@@b.to_s, acpy.to_s) - bcpy = @@b.clone - eval(bcpy.diffscript(@@a, "bcpy")) - assert_not_equal(@@a.to_s, @@b.to_s) - assert_equal(@@a.to_s, bcpy.to_s) - end - - def test_equal - # == - assert(!(@@a == @@b)) - assert(!(@@a == @@a_ln)) - assert(!(@@a == @@a_seq)) - assert(!(@@a == @@a_gfa2)) - assert(!(@@a == @@a_noxx)) - assert(@@b == @@b.clone) - assert(@@a == @@a.clone) - end - - def test_pointer_equality - # eql? - assert(@@a.eql?(@@a)) - assert(!@@a.eql?(@@a.clone)) - # equal? - assert(@@a.equal?(@@a)) - assert(!@@a.equal?(@@a.clone)) - end - - def test_eql_fields - # same object - assert(@@a.eql_fields?(@@a)) - # clone - assert(@@a.eql_fields?(@@a.clone)) - # positional field difference - assert(!@@l.eql_fields?(@@l_from)) - assert(@@l.eql_fields?(@@l_from, [:from])) - # positional field difference: name alias - assert(!@@e.eql_fields?(@@e_name)) - assert(@@e.eql_fields?(@@e_name, [:eid])) - assert(@@e.eql_fields?(@@e_name, [:name])) - # positional field difference: placeholder in line - assert(@@a.eql_fields?(@@a_seq)) - # positional field difference: placeholder in reference - assert(@@a_seq.eql_fields?(@@a)) - # tag difference - assert(!@@a.eql_fields?(@@a_ln)) - assert(@@a.eql_fields?(@@a_ln, [:LN])) - # additional tag in line - assert(@@a.eql_fields?(@@a_noxx)) - assert(!@@a_noxx.eql_fields?(@@a)) - # missing tag in line - assert(!@@a.eql_fields?(@@a_yy)) - assert(@@a_yy.eql_fields?(@@a)) - assert(@@a.eql_fields?(@@a_yy, [:yy])) - # gfa1 vs gfa2 - assert(@@a.eql_fields?(@@a_gfa2, [:slen])) - assert(@@a_gfa2.eql_fields?(@@a, [:LN])) - # record_type - assert(!@@c.eql_fields?(@@l)) - assert(!@@l.eql_fields?(@@c)) - assert(@@c.eql_fields?(@@l, [:record_type])) - assert(@@l.eql_fields?(@@c, [:record_type, :pos])) - end - - def test_field_values - assert(@@a.field_values?(@@h_a)) - # record_type difference - assert(!@@a.field_values?(@@h_a_rt)) - assert(@@a.field_values?(@@h_a_rt, [:record_type])) - # positional field difference - assert(!@@a.field_values?(@@h_a_name)) - assert(@@a.field_values?(@@h_a_name, [:name])) - # positional field difference: placeholder in line - assert(@@a.field_values?(@@h_a_seq)) - # positional field difference: placeholder in hash is compared - assert(!@@a.field_values?(@@h_a_pl)) - assert(@@a.field_values?(@@h_a_pl, [:name])) - # tag difference - assert(!@@a.field_values?(@@h_a_ln)) - assert(@@a.field_values?(@@h_a_ln, [:LN])) - # encoded value - assert(@@a.field_values?(@@h_a_LNstr)) - # additional tag in line - assert(@@a.field_values?(@@h_a_noxx)) - # missing tag in line - assert(!@@a.field_values?(@@h_a_yy)) - assert(@@a.field_values?(@@h_a_yy, [:yy])) - # gfa1 vs gfa2 - assert(@@a.field_values?(@@h_a_gfa2, [:slen])) - end - -end diff --git a/test/test_unit_lines_finders.rb b/test/test_unit_lines_finders.rb deleted file mode 100644 index 3b399d3..0000000 --- a/test/test_unit_lines_finders.rb +++ /dev/null @@ -1,77 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestUnit ||= Module.new - -# note: public methods are tested in test_api_lines_finders -class TestUnit::LinesFinders < Test::Unit::TestCase - - @@l_gfa1 = ["S\t1\t*", - "S\t2\t*", - "S\t3\t*", - "S\t4\tCGAT", - "L\t1\t+\t2\t+\t*", - "L\t1\t-\t3\t+\t10M", - "C\t1\t-\t4\t-\t1\t*", - "P\tp1\t1+,2+\t*"].map(&:to_rgfa_line) - @@l_gfa2 = ["S\t1\t100\t*", - "S\t2\t110\t*", - "E\te1\t1+\t2-\t0\t100$\t10\t110$\t*", - "G\tg1\t1-\t2-\t1000\t*", - "O\to1\t1+ 2-", - "U\tu1\t1 e1", - "F\t1\tread1-\t0\t10\t102\t122\t*", - "F\t1\tread1-\t30\t100$\t180\t255\t*", - "F\t2\tread1-\t40\t50\t52\t64\t*", - "X\tx1\txx:Z:A", - "X\tx2", - "G\t*\t1+\t2+\t2000\t*"].map(&:to_rgfa_line) - @@gfa1 = @@l_gfa1.to_rgfa - @@gfa2 = @@l_gfa2.to_rgfa - - def test_search_link - # search using the direct link - assert_equal(@@l_gfa1[4], @@gfa1.search_link(OL[:"1",:+], OL[:"2",:+], "*")) - # search using the complement link - assert_equal(@@l_gfa1[4], @@gfa1.search_link(OL[:"2",:-], OL[:"1",:-], "*")) - # with cigar parameter, but placeholder in line - assert_equal(@@l_gfa1[4], - @@gfa1.search_link(OL[:"1",:+], OL[:"2",:+], "10M")) - # with cigar parameter, and cigar in line - assert_equal(@@l_gfa1[5], - @@gfa1.search_link(OL[:"1",:-], OL[:"3",:+], "10M")) - assert_equal(nil, - @@gfa1.search_link(OL[:"1",:-], OL[:"3",:+], "12M")) - # with placeholder parameter, and cigar in line - assert_equal(@@l_gfa1[5], - @@gfa1.search_link(OL[:"1",:-], OL[:"3",:+], "*")) - end - - def test_search_duplicate_gfa1 - # link - assert_equal(@@l_gfa1[4], @@gfa1.search_duplicate(@@l_gfa1[4])) - # complement link - assert_equal(@@l_gfa1[4], @@gfa1.search_duplicate(@@l_gfa1[4].complement)) - # containment - assert_equal(nil, @@gfa1.search_duplicate(@@l_gfa1[6])) - # segment - assert_equal(@@l_gfa1[0], @@gfa1.search_duplicate(@@l_gfa1[0])) - # path - assert_equal(@@l_gfa1[7], @@gfa1.search_duplicate(@@l_gfa1[7])) - end - - def test_search_duplicate_gfa2 - # line with mandatory name - assert_equal(@@l_gfa2[0], @@gfa2.search_duplicate(@@l_gfa2[0])) - # line with optional name, present - assert_equal(@@l_gfa2[2], @@gfa2.search_duplicate(@@l_gfa2[2])) - assert_equal(@@l_gfa2[3], @@gfa2.search_duplicate(@@l_gfa2[3])) - assert_equal(@@l_gfa2[4], @@gfa2.search_duplicate(@@l_gfa2[4])) - assert_equal(@@l_gfa2[5], @@gfa2.search_duplicate(@@l_gfa2[5])) - # line with optional name, not present - assert_equal(nil, @@gfa2.search_duplicate(@@l_gfa2[11])) - # line with no name - assert_equal(nil, @@gfa2.search_duplicate(@@l_gfa2[6])) - assert_equal(nil, @@gfa2.search_duplicate(@@l_gfa2[9])) - end - -end diff --git a/test/test_unit_multiplication.rb b/test/test_unit_multiplication.rb deleted file mode 100644 index e393ed8..0000000 --- a/test/test_unit_multiplication.rb +++ /dev/null @@ -1,52 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" -TestUnit ||= Module.new - -class TestUnit::Multiplication < Test::Unit::TestCase - - def test_auto_select_distribute_end_lB_eq_lE - g = RGFA.new - # lB == lE == 1 - assert_equal(nil, g.send(:auto_select_distribute_end, 4, 1, 1, false)) - # lB == lE == factor - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 4, 4, false)) - # lB == lE; factor - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 2, 2, false)) - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 6, 6, false)) - end - - def test_auto_select_distribute_end_l_1 - g = RGFA.new - # lB or lE == 1, other factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 2, 1, false)) - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 4, 1, false)) - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 6, 1, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 1, 2, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 1, 4, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 1, 6, false)) - end - - def test_auto_select_distribute_end_eq_factor - g = RGFA.new - # one =, one > factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 4, 5, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 5, 4, false)) - # one =, one < factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 4, 3, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 3, 4, false)) - end - - def test_auto_select_distribute_end_diff_factor - g = RGFA.new - # both > 1; both < factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 3, 2, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 2, 3, false)) - # both > 1; both > factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 5, 6, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 6, 5, false)) - # both > 1; one <, one > factor - assert_equal(:L, g.send(:auto_select_distribute_end, 4, 3, 5, false)) - assert_equal(:R, g.send(:auto_select_distribute_end, 4, 5, 3, false)) - end - -end diff --git a/test/test_unit_numeric_array.rb b/test/test_unit_numeric_array.rb deleted file mode 100644 index ce8438d..0000000 --- a/test/test_unit_numeric_array.rb +++ /dev/null @@ -1,26 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -# note: API public methods are tested in test_api_tags -class TestUnit::NumericArray < Test::Unit::TestCase - - def test_integer_type - v = {} - [8,16,32,64,128].each do |b| - v[b] = 1 << (b/2) - end - assert_equal("C", RGFA::NumericArray.integer_type(0..v[8])) - assert_equal("c", RGFA::NumericArray.integer_type(-1..v[8])) - assert_equal("S", RGFA::NumericArray.integer_type(0..v[16])) - assert_equal("s", RGFA::NumericArray.integer_type(-1..v[16])) - assert_equal("I", RGFA::NumericArray.integer_type(0..v[32])) - assert_equal("i", RGFA::NumericArray.integer_type(-1..v[32])) - assert_raise(RGFA::ValueError) {RGFA::NumericArray.integer_type(0..v[64])} - assert_raise(RGFA::ValueError) {RGFA::NumericArray.integer_type(-1..v[64])} - assert_raise(RGFA::ValueError) {RGFA::NumericArray.integer_type(0..v[128])} - assert_raise(RGFA::ValueError) {RGFA::NumericArray.integer_type(-1..v[128])} - end - -end diff --git a/test/test_unit_oriented_line.rb b/test/test_unit_oriented_line.rb deleted file mode 100644 index 1511d6b..0000000 --- a/test/test_unit_oriented_line.rb +++ /dev/null @@ -1,126 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::OrientedLine < Test::Unit::TestCase - - @@sym = :a - @@ref = "S\ta\t*\txx:Z:1.0".to_rgfa_line - @@invalid_sym = :"a\ta" - @@invalid_ref = [] - @@ol_s = RGFA::OrientedLine.new(@@sym, :+) - @@ol_r = RGFA::OrientedLine.new(@@ref, :-) - @@ol_inv = RGFA::OrientedLine.new(@@ref, :x) - @@ol_s_str = "a+" - @@ol_r_str = "a-" - - def test_new - assert_nothing_raised { RGFA::OrientedLine.new(@@sym, :+) } - # no validation on creation: - assert_nothing_raised { RGFA::OrientedLine.new(@@invalid_sym, :X) } - end - - def test_OL - assert_equal(@@ol_s, OL[:a, :+]) - assert_kind_of(RGFA::OrientedLine, OL[:a, :+]) - assert_raise(ArgumentError) {OL[:a]} - assert_raise(ArgumentError) {OL[:a,:+,:+]} - end - - def test_to_oriented_line - assert_equal(@@ol_s, @@ol_s.to_oriented_line) - assert(@@ol_s.eql?(@@ol_s.to_oriented_line)) - assert_equal(@@ol_s, "a+".to_oriented_line) - assert_kind_of(RGFA::OrientedLine, "a+".to_oriented_line) - assert_equal(@@ol_s, [:a, :+].to_oriented_line) - assert_kind_of(RGFA::OrientedLine, [:a, :+].to_oriented_line) - end - - def test_line - assert_equal(@@sym, @@ol_s.line) - assert_equal(@@ref, @@ol_r.line) - ol2 = RGFA::OrientedLine.new(@@sym, :-) - ol2.line = @@ref - assert_equal(@@ref, ol2.line) - end - - def test_orient - assert_equal(:+, @@ol_s.orient) - assert_equal(:-, @@ol_r.orient) - ol2 = RGFA::OrientedLine.new(@@sym, :+) - ol2.orient = :- - assert_equal(:-, ol2.orient) - end - - def test_name - assert_equal(@@sym, @@ol_s.name) - assert_equal(@@sym, @@ol_r.name) - end - - def test_validate - assert_nothing_raised { @@ol_s.validate } - assert_nothing_raised { @@ol_r.validate } - ol1 = RGFA::OrientedLine.new(:a, :X) - ol2 = RGFA::OrientedLine.new(@@invalid_ref, :+) - ol3 = RGFA::OrientedLine.new(@@invalid_sym, :+) - assert_raise(RGFA::ValueError) { ol1.validate } - assert_raise(RGFA::TypeError) { ol2.validate } - assert_raise(RGFA::FormatError) { ol3.validate } - end - - def test_invert - inv_s = @@ol_s.invert - assert_equal(@@ol_s.line, inv_s.line) - assert_equal(:-, inv_s.orient) - inv_r = @@ol_r.invert - assert_equal(@@ol_r.line, inv_r.line) - assert_equal(:+, inv_r.orient) - assert_raise(RGFA::ValueError) { @@ol_inv.invert } - end - - def test_to_s - assert_equal(@@ol_s_str, @@ol_s.to_s) - assert_equal(@@ol_r_str, @@ol_r.to_s) - end - - def test_equal - ol2 = RGFA::OrientedLine.new(@@sym, :+) - ol3 = RGFA::OrientedLine.new(@@ref, :-) - assert(ol2 == @@ol_s) - assert(ol3 == @@ol_r) - # only name and orient equivalence is checked, not line - assert(@@ol_r != @@ol_s) - assert(@@ol_r.invert == @@ol_s) - # equivalence to string - assert(@@ol_s == "a+") - assert(@@ol_r == "a-") - # equivalence to symbol - assert(@@ol_s == :"a+") - assert(@@ol_r == :"a-") - # equivalence to array - assert(@@ol_s == [:a, :+]) - assert(@@ol_r == [:a, :-]) - end - - def test_block - ol = RGFA::OrientedLine.new(:a, :+) - assert_nothing_raised {ol.line = :b} - assert_nothing_raised {ol.orient = :-} - ol.block - assert_raise(RGFA::RuntimeError) {ol.line = :b} - assert_raise(RGFA::RuntimeError) {ol.orient = :-} - ol.unblock - assert_nothing_raised {ol.line = :b} - assert_nothing_raised {ol.orient = :-} - end - - def test_delegate_methods - assert_equal("*", @@ol_r.field_to_s(:sequence)) - assert_equal("1.0", @@ol_r.xx) - ol = RGFA::OrientedLine.new("S\ta\t*".to_rgfa_line, "+") - ol.set("xx", 1) - assert_equal("S\ta\t*\txx:i:1", ol.line.to_s) - end - -end diff --git a/test/test_unit_rgfa_lines.rb b/test/test_unit_rgfa_lines.rb deleted file mode 100644 index 5fc10ee..0000000 --- a/test/test_unit_rgfa_lines.rb +++ /dev/null @@ -1,68 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::RGFALines < Test::Unit::TestCase - - def test_register_line_merge - g = RGFA.new(version: :gfa1) - l = RGFA::Line::Header.new({:xx => 1}, version: :gfa1) - l.instance_variable_set("@rgfa", g) - assert_nothing_raised { g.register_line(l) } - assert_equal(1, g.header.xx) - assert_raise(RGFA::AssertionError) { g.unregister_line(l) } - end - - def test_register_line_name_present - g = RGFA.new(version: :gfa1) - l = RGFA::Line::Segment::GFA1.new({:name => :x}, version: :gfa1) - l.instance_variable_set("@rgfa", g) - assert_nothing_raised { g.register_line(l) } - assert_equal([l], g.segments) - assert_equal(l, g.line(:x)) - assert_equal([:x], g.segment_names) - assert_nothing_raised { g.unregister_line(l) } - assert_equal([], g.segments) - assert_equal(nil, g.line(:x)) - assert_equal([], g.segment_names) - end - - def test_register_line_name_absent - g = RGFA.new(version: :gfa2) - l = RGFA::Line::Edge::GFA2.new({:eid => RGFA::Placeholder.new}, - version: :gfa2) - l.instance_variable_set("@rgfa", g) - assert_nothing_raised { g.register_line(l) } - assert_equal([l], g.edges) - assert_equal([], g.edge_names) - assert_nothing_raised { g.unregister_line(l) } - assert_equal([], g.edges) - end - - def test_register_line_external - g = RGFA.new(version: :gfa2) - l = RGFA::Line::Fragment.new({:external => OL[:x, :+]}, - version: :gfa2) - l.instance_variable_set("@rgfa", g) - assert_nothing_raised { g.register_line(l) } - assert_equal([l], g.fragments) - assert_equal([l], g.fragments_for_external(:x)) - assert_equal([:x], g.external_names) - assert_nothing_raised { g.unregister_line(l) } - assert_equal([], g.fragments) - assert_equal([], g.fragments_for_external(:x)) - assert_equal([], g.external_names) - end - - def test_register_line_unnamed - g = RGFA.new(version: :gfa1) - l = RGFA::Line::Edge::Link.new({}, version: :gfa1) - l.instance_variable_set("@rgfa", g) - assert_nothing_raised { g.register_line(l) } - assert_equal([l], g.dovetails) - assert_nothing_raised { g.unregister_line(l) } - assert_equal([], g.dovetails) - end - -end diff --git a/test/test_unit_segment_end.rb b/test/test_unit_segment_end.rb deleted file mode 100644 index b319498..0000000 --- a/test/test_unit_segment_end.rb +++ /dev/null @@ -1,113 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::SegmentEnd < Test::Unit::TestCase - - @@sym = :a - @@ref = "S\ta\t*\txx:Z:1.0".to_rgfa_line - @@invalid_sym = :"a\ta" - @@invalid_ref = [] - @@se_s = RGFA::SegmentEnd.new(@@sym, :L) - @@se_r = RGFA::SegmentEnd.new(@@ref, :R) - @@se_s_str = "aL" - @@se_r_str = "aR" - @@se_s_sym = :"aL" - @@se_r_sym = :"aR" - - def test_new - assert_nothing_raised { RGFA::SegmentEnd.new(@@sym, :L) } - # no validation on creation: - assert_nothing_raised { RGFA::SegmentEnd.new(@@invalid_sym, :X) } - end - - def test_to_segment_end - assert_equal(@@se_s, @@se_s.to_segment_end) - assert(@@se_s.eql?(@@se_s.to_segment_end)) - assert_equal(@@se_s, [:a, :L].to_segment_end) - assert_kind_of(RGFA::SegmentEnd, [:a, :L].to_segment_end) - assert_raise(RGFA::ValueError) {[:a, :L, :L].to_segment_end} - # to_segment_end from array performs validation: - assert_raise(RGFA::ValueError) {[:a, :X].to_segment_end} - end - - def test_segment - assert_equal(@@sym, @@se_s.segment) - assert_equal(@@ref, @@se_r.segment) - se2 = RGFA::SegmentEnd.new(@@sym, :R) - se2.segment = @@ref - assert_equal(@@ref, se2.segment) - end - - def test_end_type - assert_equal(:L, @@se_s.end_type) - assert_equal(:R, @@se_r.end_type) - se2 = RGFA::SegmentEnd.new(@@sym, :L) - se2.end_type = :R - assert_equal(:R, se2.end_type) - end - - def test_name - assert_equal(@@sym, @@se_s.name) - assert_equal(@@sym, @@se_r.name) - end - - def test_validate - assert_nothing_raised { @@se_s.validate } - assert_nothing_raised { @@se_r.validate } - se1 = RGFA::SegmentEnd.new(:a, :X) - assert_raise(RGFA::ValueError) { se1.validate } - end - - def test_invert - inv_s = @@se_s.invert - assert_equal(@@se_s.segment, inv_s.segment) - assert_equal(:R, inv_s.end_type) - inv_r = @@se_r.invert - assert_equal(@@se_r.segment, inv_r.segment) - assert_equal(:L, inv_r.end_type) - end - - def test_to_s - assert_equal(@@se_s_str, @@se_s.to_s) - assert_equal(@@se_r_str, @@se_r.to_s) - end - - def test_to_sym - assert_equal(@@se_s_sym, @@se_s.to_sym) - assert_equal(@@se_r_sym, @@se_r.to_sym) - end - - def to_a - assert_equal([:a, :L], @@se_s.to_a) - end - - def test_equal - se2 = RGFA::SegmentEnd.new(@@sym, :L) - se3 = RGFA::SegmentEnd.new(@@ref, :R) - assert(se2 == @@se_s) - assert(se3 == @@se_r) - # only name and end_type equivalence is checked, not segment - assert(@@se_r != @@se_s) - assert(@@se_r.invert == @@se_s) - # equivalence to array - assert(@@se_s == [:a,:L]) - assert(@@se_r == [:a,:R]) - end - - def test_comparison - assert_equal(-1, [:a,:L].to_segment_end <=> [:b,:L].to_segment_end) - assert_equal(0, [:a,:L].to_segment_end <=> [:a,:L].to_segment_end) - assert_equal(1, [:b,:L].to_segment_end <=> [:a,:L].to_segment_end) - assert_equal(-1, [:a,:L].to_segment_end <=> [:a,:R].to_segment_end) - assert_equal(0, [:a,:R].to_segment_end <=> [:a,:R].to_segment_end) - assert_equal(1, [:a,:R].to_segment_end <=> [:a,:L].to_segment_end) - end - - def test_segment_ends_path - sep = RGFA::SegmentEndsPath.new([[:a,:L],[:b,:R]].map(&:to_segment_end)) - assert_equal([[:b,:L],[:a,:R]], sep.reverse) - end - -end diff --git a/test/test_unit_symbol_invert.rb b/test/test_unit_symbol_invert.rb deleted file mode 100644 index a953a76..0000000 --- a/test/test_unit_symbol_invert.rb +++ /dev/null @@ -1,22 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::SymbolInvert < Test::Unit::TestCase - - def test_invert_orientations - assert_equal(:+, :-.invert) - assert_equal(:-, :+.invert) - end - - def test_invert_segment_ends - assert_equal(:L, :R.invert) - assert_equal(:R, :L.invert) - end - - def test_invert_invalid - assert_raise(RGFA::ValueError) { :xx.invert } - end - -end diff --git a/test/test_unit_unknown.rb b/test/test_unit_unknown.rb deleted file mode 100644 index 2c6e40d..0000000 --- a/test/test_unit_unknown.rb +++ /dev/null @@ -1,29 +0,0 @@ -require_relative "../lib/rgfa.rb" -require "test/unit" - -TestUnit ||= Module.new - -class TestUnit::Unknown < Test::Unit::TestCase - - @@u = RGFA::Line::Unknown.new(["a"]) - - def test_new - assert_nothing_raised { RGFA::Line::Unknown.new(["a"]) } - end - - def test_to_s - assert_equal("?record_type?\ta\tco:Z:line_created_by_RGFA", @@u.to_s) - end - - def test_tags - assert_raise(NoMethodError) {@@u.xx} - assert_nil(@@u.get(:xx)) - assert_raise(NoMethodError) {@@u.xx = 1} - assert_raise(RGFA::RuntimeError) {@@u.set(:xx, 1)} - end - - def test_virtual - assert(@@u.virtual?) - end - -end diff --git a/gfapy_tests/__init__.py b/tests/__init__.py similarity index 100% rename from gfapy_tests/__init__.py rename to tests/__init__.py diff --git a/gfapy_tests/extension.py b/tests/extension.py similarity index 100% rename from gfapy_tests/extension.py rename to tests/extension.py diff --git a/gfapy_tests/test_api_alignment.py b/tests/test_api_alignment.py similarity index 100% rename from gfapy_tests/test_api_alignment.py rename to tests/test_api_alignment.py diff --git a/gfapy_tests/test_api_comments.py b/tests/test_api_comments.py similarity index 100% rename from gfapy_tests/test_api_comments.py rename to tests/test_api_comments.py diff --git a/gfapy_tests/test_api_custom_records.py b/tests/test_api_custom_records.py similarity index 100% rename from gfapy_tests/test_api_custom_records.py rename to tests/test_api_custom_records.py diff --git a/gfapy_tests/test_api_extensions.py b/tests/test_api_extensions.py similarity index 100% rename from gfapy_tests/test_api_extensions.py rename to tests/test_api_extensions.py diff --git a/gfapy_tests/test_api_gfa1_lines.py b/tests/test_api_gfa1_lines.py similarity index 100% rename from gfapy_tests/test_api_gfa1_lines.py rename to tests/test_api_gfa1_lines.py diff --git a/gfapy_tests/test_api_gfa2_lines.py b/tests/test_api_gfa2_lines.py similarity index 100% rename from gfapy_tests/test_api_gfa2_lines.py rename to tests/test_api_gfa2_lines.py diff --git a/gfapy_tests/test_api_gfa_basics.py b/tests/test_api_gfa_basics.py similarity index 100% rename from gfapy_tests/test_api_gfa_basics.py rename to tests/test_api_gfa_basics.py diff --git a/gfapy_tests/test_api_header.py b/tests/test_api_header.py similarity index 100% rename from gfapy_tests/test_api_header.py rename to tests/test_api_header.py diff --git a/gfapy_tests/test_api_linear_paths.py b/tests/test_api_linear_paths.py similarity index 100% rename from gfapy_tests/test_api_linear_paths.py rename to tests/test_api_linear_paths.py diff --git a/gfapy_tests/test_api_linear_paths_extended.py b/tests/test_api_linear_paths_extended.py similarity index 100% rename from gfapy_tests/test_api_linear_paths_extended.py rename to tests/test_api_linear_paths_extended.py diff --git a/gfapy_tests/test_api_lines_collections.py b/tests/test_api_lines_collections.py similarity index 100% rename from gfapy_tests/test_api_lines_collections.py rename to tests/test_api_lines_collections.py diff --git a/gfapy_tests/test_api_lines_creators.py b/tests/test_api_lines_creators.py similarity index 100% rename from gfapy_tests/test_api_lines_creators.py rename to tests/test_api_lines_creators.py diff --git a/gfapy_tests/test_api_lines_destructors.py b/tests/test_api_lines_destructors.py similarity index 100% rename from gfapy_tests/test_api_lines_destructors.py rename to tests/test_api_lines_destructors.py diff --git a/gfapy_tests/test_api_lines_finders.py b/tests/test_api_lines_finders.py similarity index 100% rename from gfapy_tests/test_api_lines_finders.py rename to tests/test_api_lines_finders.py diff --git a/gfapy_tests/test_api_multiplication.py b/tests/test_api_multiplication.py similarity index 100% rename from gfapy_tests/test_api_multiplication.py rename to tests/test_api_multiplication.py diff --git a/gfapy_tests/test_api_placeholders.py b/tests/test_api_placeholders.py similarity index 100% rename from gfapy_tests/test_api_placeholders.py rename to tests/test_api_placeholders.py diff --git a/gfapy_tests/test_api_positionals.py b/tests/test_api_positionals.py similarity index 100% rename from gfapy_tests/test_api_positionals.py rename to tests/test_api_positionals.py diff --git a/gfapy_tests/test_api_positions.py b/tests/test_api_positions.py similarity index 100% rename from gfapy_tests/test_api_positions.py rename to tests/test_api_positions.py diff --git a/gfapy_tests/test_api_references_edge_gfa1.py b/tests/test_api_references_edge_gfa1.py similarity index 100% rename from gfapy_tests/test_api_references_edge_gfa1.py rename to tests/test_api_references_edge_gfa1.py diff --git a/gfapy_tests/test_api_references_edge_gfa2.py b/tests/test_api_references_edge_gfa2.py similarity index 100% rename from gfapy_tests/test_api_references_edge_gfa2.py rename to tests/test_api_references_edge_gfa2.py diff --git a/gfapy_tests/test_api_references_f_g_lines.py b/tests/test_api_references_f_g_lines.py similarity index 100% rename from gfapy_tests/test_api_references_f_g_lines.py rename to tests/test_api_references_f_g_lines.py diff --git a/gfapy_tests/test_api_references_groups.py b/tests/test_api_references_groups.py similarity index 100% rename from gfapy_tests/test_api_references_groups.py rename to tests/test_api_references_groups.py diff --git a/gfapy_tests/test_api_references_virtual.py b/tests/test_api_references_virtual.py similarity index 100% rename from gfapy_tests/test_api_references_virtual.py rename to tests/test_api_references_virtual.py diff --git a/gfapy_tests/test_api_rename_lines.py b/tests/test_api_rename_lines.py similarity index 100% rename from gfapy_tests/test_api_rename_lines.py rename to tests/test_api_rename_lines.py diff --git a/gfapy_tests/test_api_tags.py b/tests/test_api_tags.py similarity index 100% rename from gfapy_tests/test_api_tags.py rename to tests/test_api_tags.py diff --git a/gfapy_tests/test_api_version.py b/tests/test_api_version.py similarity index 100% rename from gfapy_tests/test_api_version.py rename to tests/test_api_version.py diff --git a/gfapy_tests/test_api_version_conversion.py b/tests/test_api_version_conversion.py similarity index 100% rename from gfapy_tests/test_api_version_conversion.py rename to tests/test_api_version_conversion.py diff --git a/gfapy_tests/test_gfapy_alignment.py b/tests/test_gfapy_alignment.py similarity index 100% rename from gfapy_tests/test_gfapy_alignment.py rename to tests/test_gfapy_alignment.py diff --git a/gfapy_tests/test_gfapy_byte_array.py b/tests/test_gfapy_byte_array.py similarity index 100% rename from gfapy_tests/test_gfapy_byte_array.py rename to tests/test_gfapy_byte_array.py diff --git a/gfapy_tests/test_gfapy_cigar.py b/tests/test_gfapy_cigar.py similarity index 100% rename from gfapy_tests/test_gfapy_cigar.py rename to tests/test_gfapy_cigar.py diff --git a/gfapy_tests/test_gfapy_line_containment.py b/tests/test_gfapy_line_containment.py similarity index 100% rename from gfapy_tests/test_gfapy_line_containment.py rename to tests/test_gfapy_line_containment.py diff --git a/gfapy_tests/test_gfapy_line_edge.py b/tests/test_gfapy_line_edge.py similarity index 100% rename from gfapy_tests/test_gfapy_line_edge.py rename to tests/test_gfapy_line_edge.py diff --git a/gfapy_tests/test_gfapy_line_header.py b/tests/test_gfapy_line_header.py similarity index 100% rename from gfapy_tests/test_gfapy_line_header.py rename to tests/test_gfapy_line_header.py diff --git a/gfapy_tests/test_gfapy_line_link.py b/tests/test_gfapy_line_link.py similarity index 100% rename from gfapy_tests/test_gfapy_line_link.py rename to tests/test_gfapy_line_link.py diff --git a/gfapy_tests/test_gfapy_line_path.py b/tests/test_gfapy_line_path.py similarity index 100% rename from gfapy_tests/test_gfapy_line_path.py rename to tests/test_gfapy_line_path.py diff --git a/gfapy_tests/test_gfapy_line_segment.py b/tests/test_gfapy_line_segment.py similarity index 100% rename from gfapy_tests/test_gfapy_line_segment.py rename to tests/test_gfapy_line_segment.py diff --git a/gfapy_tests/test_gfapy_line_version.py b/tests/test_gfapy_line_version.py similarity index 100% rename from gfapy_tests/test_gfapy_line_version.py rename to tests/test_gfapy_line_version.py diff --git a/gfapy_tests/test_gfapy_numeric_array.py b/tests/test_gfapy_numeric_array.py similarity index 100% rename from gfapy_tests/test_gfapy_numeric_array.py rename to tests/test_gfapy_numeric_array.py diff --git a/gfapy_tests/test_gfapy_segment_references.py b/tests/test_gfapy_segment_references.py similarity index 100% rename from gfapy_tests/test_gfapy_segment_references.py rename to tests/test_gfapy_segment_references.py diff --git a/gfapy_tests/test_gfapy_sequence.py b/tests/test_gfapy_sequence.py similarity index 100% rename from gfapy_tests/test_gfapy_sequence.py rename to tests/test_gfapy_sequence.py diff --git a/gfapy_tests/test_gfapy_trace.py b/tests/test_gfapy_trace.py similarity index 100% rename from gfapy_tests/test_gfapy_trace.py rename to tests/test_gfapy_trace.py diff --git a/gfapy_tests/test_graphop_artifacts.py b/tests/test_graphop_artifacts.py similarity index 100% rename from gfapy_tests/test_graphop_artifacts.py rename to tests/test_graphop_artifacts.py diff --git a/gfapy_tests/test_graphop_copy_number.py b/tests/test_graphop_copy_number.py similarity index 100% rename from gfapy_tests/test_graphop_copy_number.py rename to tests/test_graphop_copy_number.py diff --git a/gfapy_tests/test_internals_field_parser.py b/tests/test_internals_field_parser.py similarity index 100% rename from gfapy_tests/test_internals_field_parser.py rename to tests/test_internals_field_parser.py diff --git a/gfapy_tests/test_internals_field_validator.py b/tests/test_internals_field_validator.py similarity index 100% rename from gfapy_tests/test_internals_field_validator.py rename to tests/test_internals_field_validator.py diff --git a/gfapy_tests/test_internals_field_writer.py b/tests/test_internals_field_writer.py similarity index 100% rename from gfapy_tests/test_internals_field_writer.py rename to tests/test_internals_field_writer.py diff --git a/gfapy_tests/test_internals_tag_datatype.py b/tests/test_internals_tag_datatype.py similarity index 100% rename from gfapy_tests/test_internals_tag_datatype.py rename to tests/test_internals_tag_datatype.py diff --git a/gfapy_tests/test_unit_alignment.py b/tests/test_unit_alignment.py similarity index 100% rename from gfapy_tests/test_unit_alignment.py rename to tests/test_unit_alignment.py diff --git a/gfapy_tests/test_unit_field_array.py b/tests/test_unit_field_array.py similarity index 100% rename from gfapy_tests/test_unit_field_array.py rename to tests/test_unit_field_array.py diff --git a/gfapy_tests/test_unit_gfa_lines.py b/tests/test_unit_gfa_lines.py similarity index 100% rename from gfapy_tests/test_unit_gfa_lines.py rename to tests/test_unit_gfa_lines.py diff --git a/gfapy_tests/test_unit_header.py b/tests/test_unit_header.py similarity index 100% rename from gfapy_tests/test_unit_header.py rename to tests/test_unit_header.py diff --git a/gfapy_tests/test_unit_line.py b/tests/test_unit_line.py similarity index 100% rename from gfapy_tests/test_unit_line.py rename to tests/test_unit_line.py diff --git a/gfapy_tests/test_unit_line_cloning.py b/tests/test_unit_line_cloning.py similarity index 100% rename from gfapy_tests/test_unit_line_cloning.py rename to tests/test_unit_line_cloning.py diff --git a/gfapy_tests/test_unit_line_connection.py b/tests/test_unit_line_connection.py similarity index 100% rename from gfapy_tests/test_unit_line_connection.py rename to tests/test_unit_line_connection.py diff --git a/gfapy_tests/test_unit_line_dynamic_fields.py b/tests/test_unit_line_dynamic_fields.py similarity index 100% rename from gfapy_tests/test_unit_line_dynamic_fields.py rename to tests/test_unit_line_dynamic_fields.py diff --git a/gfapy_tests/test_unit_line_equivalence.py b/tests/test_unit_line_equivalence.py similarity index 100% rename from gfapy_tests/test_unit_line_equivalence.py rename to tests/test_unit_line_equivalence.py diff --git a/gfapy_tests/test_unit_lines_finders.py b/tests/test_unit_lines_finders.py similarity index 100% rename from gfapy_tests/test_unit_lines_finders.py rename to tests/test_unit_lines_finders.py diff --git a/gfapy_tests/test_unit_multiplication.py b/tests/test_unit_multiplication.py similarity index 100% rename from gfapy_tests/test_unit_multiplication.py rename to tests/test_unit_multiplication.py diff --git a/gfapy_tests/test_unit_numeric_array.py b/tests/test_unit_numeric_array.py similarity index 100% rename from gfapy_tests/test_unit_numeric_array.py rename to tests/test_unit_numeric_array.py diff --git a/gfapy_tests/test_unit_oriented_line.py b/tests/test_unit_oriented_line.py similarity index 100% rename from gfapy_tests/test_unit_oriented_line.py rename to tests/test_unit_oriented_line.py diff --git a/gfapy_tests/test_unit_segment_end.py b/tests/test_unit_segment_end.py similarity index 100% rename from gfapy_tests/test_unit_segment_end.py rename to tests/test_unit_segment_end.py diff --git a/gfapy_tests/test_unit_symbol_invert.py b/tests/test_unit_symbol_invert.py similarity index 100% rename from gfapy_tests/test_unit_symbol_invert.py rename to tests/test_unit_symbol_invert.py diff --git a/gfapy_tests/test_unit_unknown.py b/tests/test_unit_unknown.py similarity index 100% rename from gfapy_tests/test_unit_unknown.py rename to tests/test_unit_unknown.py