* 2001/06/21

* completely rewrited to use db.rb * rename/add/delete some methods to satisfy the API of db.rb
bioruby · Jun 21, 2001 · 75a041e · 75a041e
1 parent d5f08e5
commit 75a041e
Showing 1 changed file with 68 additions and 166 deletions.
diff --git a/lib/bio/db/kegg/genes.rb b/lib/bio/db/kegg/genes.rb
@@ -1,7 +1,7 @@
 #
-# bio/db/genes.rb - KEGG/GENES database class
+# bio/db/kegg/genes.rb - KEGG/GENES database class
 #
-#   Copyright (C) 2000, 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
 #
 #  This library is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Library General Public
@@ -13,238 +13,140 @@
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 #  Library General Public License for more details.
 #
+#  $Id: genes.rb,v 0.7 2001/06/21 02:39:23 katayama Exp $
+#
 
-require 'bio/sequence'
+require 'bio/db'
 
-class GENES
+class GENES < KEGGDB
 
-  DELIMITER = "\n///\n"
-  TAGSIZE = 12
+  DELIMITER	= RS = "\n///\n"
+  TAGSIZE	= 12
 
   def initialize(entry)
-    @orig = {}					# Hash of the original entry
-    @data = {}					# Hash of the parsed entry
-
-    tag = ''					# temporal key
-    @orig[tag] = ''
-
-    entry.each_line do |line|
-      next if line =~ /^$/
-
-      if line =~ /^\w/
-        tag = tag_get(line)
-        @orig[tag] = '' unless @orig[tag]	# String
-      end
-      @orig[tag] << line
-    end
+    super(entry, TAGSIZE)
   end
 
-
-  ### general method to return block of the tag and contens as is
-  def get(tag)
-    @orig[tag]			# returns nil when not found
-  end
-
-
-  ### general method to return contens without tag and extra white spaces
-  def fetch(tag)
-    if get(tag)
-      str = ''
-      get(tag).each_line do |line|
-        str << tag_cut(line)
-      end
-      return truncate(str)
-    else
-      return nil		# compatible with get()
-    end
-  end
-
-
   def entry(key = nil)
     unless @data['ENTRY']
-      @data['ENTRY'] = {}
-
-      if @orig['ENTRY']
-        @data['ENTRY']['id']      = @orig['ENTRY'][12..29].strip
-        @data['ENTRY']['type']    = @orig['ENTRY'][30..39].strip
-        @data['ENTRY']['species'] = @orig['ENTRY'][40..80].strip
+      hash = {}
+      if @orig['ENTRY'].length > 30
+	hash['id']       = @orig['ENTRY'][12..29].strip
+	hash['division'] = @orig['ENTRY'][30..39].strip
+	hash['organism'] = @orig['ENTRY'][40..80].strip
       end
+      @data['ENTRY'] = hash
     end
 
-    if key
-      @data['ENTRY'][key]
-    elsif block_given?
+    if block_given?
       @data['ENTRY'].each do |k, v|
-        yield(k, v)
+        yield(k, v)			# each contents of ENTRY
       end
+    elsif key
+      @data['ENTRY'][key]		# contents of key's ENTRY
     else
-      @data['ENTRY']
+      @data['ENTRY']			# Hash of ENTRY
     end
   end
-  alias each_entry entry
-
   def id
-    entry('id')
+    entry('id')				# ENTRY ID
+  end
+  def division
+    entry('division')			# CDS, tRNA etc.
+  end
+  def organism
+    entry('organism')			# H.sapiens etc.
   end
 
   def name
-    @data['NAME'] = fetch('NAME') unless @data['NAME']
-    @data['NAME']
+    field_fetch('NAME')
+  end
+  def gene
+    name.split(', ')
   end
 
   def definition
-    @data['DEFINITION'] = fetch('DEFINITION') unless @data['DEFINITION']
-    @data['DEFINITION']
+    field_fetch('DEFINITION')
   end
 
-  def class
-    @data['CLASS'] = fetch('CLASS') unless @data['CLASS']
-    @data['CLASS']
+  def keggclass
+    field_fetch('CLASS')
   end
 
   def position
-    @data['POSITION'] = fetch('POSITION') unless @data['POSITION']
-    @data['POSITION'].gsub!(/\s+/, '') if @data['POSITION']
+    unless @data['POSITION']
+      @data['POSITION'] = field_fetch('POSITION').gsub(/\s/, '')
+    end
     @data['POSITION']
   end
 
-  def dblinks(key = nil)
+  def dblinks(db = nil)
     unless @data['DBLINKS']
-      @data['DBLINKS'] = {}
-      if @orig['DBLINKS']
-        @orig['DBLINKS'].scan(/(\S+):\s+(\S+)\n/).each do |k, v|
-          @data['DBLINKS'][k] = v
-        end
+      hash = {}
+      @orig['DBLINKS'].scan(/(\S+):\s*(\S+)\n/).each do |k, v|
+	hash[k] = v
       end
+      @data['DBLINKS'] = hash
     end
 
-    if key
-      @data['DBLINKS'][key]
-    elsif block_given?
+    if block_given?
       @data['DBLINKS'].each do |k, v|
-        yield(k, v)
+        yield(k, v)			# each DB:ID pair in DBLINKS
       end
+    elsif db
+      @data['DBLINKS'][db]		# ID of the DB
     else
-      @data['DBLINKS']
+      @data['DBLINKS']			# Hash of DB:ID in DBLINKS (default)
     end
   end
-  alias each_link dblinks
 
   def codon_usage(codon = nil)
     unless @data['CODON_USAGE']
-      return @data['CODON_USAGE'] unless @orig['CODON_USAGE']	# nil
-
-      @data['CODON_USAGE'] = []					# data in Array
-
-      @orig['CODON_USAGE'].sub(/.*/,'').each_line do |l|	# cut 1st line
-        l.chomp.sub(/^.{11}/, '').scan(/.{4}/) do |x|
-          @data['CODON_USAGE'].push(x.to_i)
+      ary = []
+      @orig['CODON_USAGE'].sub(/.*/,'').each_line do |line|	# cut 1st line
+        line.scan(/\d+/).each do |cu|
+          ary.push(cu.to_i)
         end
       end
+      @data['CODON_USAGE'] = ary
     end
 
-    h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
-
-    if codon
-      codon.downcase!
-      key = h[codon[0].chr] * 16 + h[codon[1].chr] * 4 + h[codon[2].chr]
-      @data['CODON_USAGE'][key]
-    elsif block_given?
-      @data['CODON_USAGE'].each do |x|
-        yield(x)
+    if block_given?
+      @data['CODON_USAGE'].each do |cu|
+        yield(cu)			# each CODON_USAGE
       end
+    elsif codon
+      h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
+      x, y, z = codon.downcase.scan(/\w/)
+      codon_num = h[x] * 16 + h[y] * 4 + h[z]
+      @data['CODON_USAGE'][codon_num]	# CODON_USAGE of the codon
     else
-      return @data['CODON_USAGE']
+      return @data['CODON_USAGE']	# Array of CODON_USAGE (default)
     end
   end
-  alias cu codon_usage
 
   def aaseq
     unless @data['AASEQ']
-      if @orig['AASEQ']
-        @data['AASEQ'] = fetch('AASEQ').gsub(/[\s\d\/]+/, '')
-      else
-        @data['AASEQ'] = ''
-      end
+      @data['AASEQ'] = AAseq.new(field_fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
     end
-    return AAseq.new(@data['AASEQ'])
+    @data['AASEQ']
   end
-  alias aa aaseq
 
   def aalen
-    unless @data['AALEN']
-      if @orig['AASEQ']
-        @data['AALEN'] = tag_cut(@orig['AASEQ'][/.*/]).to_i
-      else
-        @data['AALEN'] = 0
-      end
-    end
-    @data['AALEN']
+    @data['AALEN'] = aaseq.length
   end
 
   def ntseq
     unless @data['NTSEQ']
-      if @orig['NTSEQ']
-        @data['NTSEQ'] = fetch('NTSEQ').gsub(/[\s\d\/]+/, '')
-      else
-        @data['NTSEQ'] = ''
-      end
+      @data['NTSEQ'] = NAseq.new(field_fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
     end
-    return NAseq.new(@data['NTSEQ'])
+    @data['NTSEQ']
   end
-  alias na ntseq
+  alias naseq ntseq
 
   def ntlen
-    unless @data['NTLEN']
-      if @orig['NTSEQ']
-        @data['NTLEN'] = tag_cut(@orig['NTSEQ'][/.*/]).to_i
-      else
-        @data['NTLEN'] = 0
-      end
-    end
-    @data['NTLEN']
+    @data['NTLEN'] = ntseq.length
   end
   alias nalen ntlen
 
-
-  ### change the default to private method below the line
-  private
-
-  # remove extra white spaces
-  def truncate(str)
-    return str.gsub(/\s+/, ' ').strip
-  end
-
-  def truncate!(str)
-    # do not chain these lines to avoid performing on nil
-    str.gsub!(/\s+/, ' ')
-    str.strip!
-    return str
-  end
-
-
-  # remove tag field from the line
-  def tag_cut(str)
-    if str.length > TAGSIZE
-      return str[TAGSIZE..str.length]
-    else
-      return ''			# to avoid returning nil
-    end
-  end
-
-  def tag_cut!(str)
-    str[0,tag_size] = ''
-    return str
-  end
-
-
-  # get tag field of the line
-  def tag_get(str)
-    if str.length > TAGSIZE
-      return str[0,TAGSIZE].strip
-    else
-      return ''			# to avoid returning nil
-    end
-  end
-
 end