In [None]:
require 'bio'
require 'rest-client'   # this is how you access the Web

In [None]:
def fetch(url, headers = {accept: "*/*"}, user = "", pass="")
	response = RestClient::Request.execute({
	  method: :get,
	  url: url.to_s,
	  user: user,
	  password: pass,
	  headers: headers})
	return response
	
	rescue RestClient::ExceptionWithResponse => e
	  $stderr.puts e.inspect
	  response = false
	  return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
	rescue RestClient::Exception => e
	  $stderr.puts e.inspect
	  response = false
	  return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
	rescue Exception => e
	  $stderr.puts e.inspect
	  response = false
	  return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
  end

In [None]:
address = 'http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=ensemblgenomesgene&format=embl&id=At3g54340'
response = fetch(address)
record = response.body
# create a local file with this data
File.open('./embl_records/At3g54340.embl', 'w') do |myfile|  # w makes it writable
    myfile.puts record
  end
end

In [None]:
datafile2 = Bio::FlatFile.auto('./embl_records/At3g54340.embl')
puts datafile2.class 

datafile2.each_entry do |entry| # the FILE is not the same as the RECORD - multiple records can exist in a file
    # shows accession and organism
      next unless entry.accession # Specific DB ID. 
    
      puts entry.class # Bio::EMBL
      puts "# #{entry.accession} - #{entry.species}"
    
      # iterates over each element in 'features' # features method finds all features
      entry.features.each do |feature|
        next unless feature.feature == "exon"
        position = feature.position # Look at Bio::Feature object
        puts "\n\n\n\nPOSITION = #{position}"
        puts position.class
        puts "\n\n\n\nFeature = #{feature.feature}"
        qual = feature.assoc            # feature.assoc gives you a hash of Bio::Feature::Qualifier objects 
                                        # i.e. qualifier['key'] = value  for example qualifier['gene'] = "CYP450")
        puts "Associations = #{qual}"
        # skips the entry if "/translation=" is not found
        # next unless qual['translation']    # this is an indication that the feature is a transcript
    
        # collects gene name and so on and joins it into a string
        gene_info = [
          qual['gene'], qual['product'], qual['note'], qual['function']
        ].compact.join(', ')
        puts "TRANSCRIPT FOUND!\nGene Info:  #{gene_info}"
        # shows nucleic acid sequence
        puts "\n\n>Exon('#{position}') : #{gene_info}" # Transcripts are the only things that have translation. 
        # Each transcript position tells the splice structure relative to entire seq
        puts entry.naseq.class   # this is a Bio::Sequence::NA    Look at the documentation to understand the .splicing() method
        puts entry.naseq.splice(position)  # http://bioruby.org/rdoc/Bio/Sequence/Common.html#method-i-splice
      end
    
      
      puts "\n\nNumber of features #{entry.features.length}" # How many features are there
      
    
    end

In [1]:
require './Gene.rb'
gene = Gene.new(id: "AT2G46340")
puts




In [7]:
gene.embl.accession

"chromosome:TAIR10:2:19022154:19027528:1"

In [13]:
acc_regex = Regexp.new(/chromosome:TAIR10:(\d):(\d*):(\d*):1/)
match = acc_regex.match(gene.embl.accession)
puts match[1]

2


In [17]:
puts 19022154 + 1990 - 1
puts 19022154 + 1995 - 1

19024143
19024148


In [30]:
from, to = gene.features['exons']['AT2G46340.1.exon1'].locations.span

[5009, 5375]

In [41]:
puts from
puts to

1394
1540


In [31]:
seq = gene.sequence.splicing(gene.features['exons']['AT2G46340.1.exon1'].position)

"cccgaagatcccagatatcgagagagatcacaatcagatcatacaccgaagcggtgtggagaagaaacctcttgaagaaagcaaattcaaaaacacacagaagaagaagaagaagaagaagaaaggtgaggctataaatttgtcatatactattgataattacatataatatctcttttgtattctgtgtatagtcgaagaaacacaaatgaatctctctttctctatagacttttgcacaattcgttcttcattccttattttctgattttttattttaatgattaatttaggctttgaagccacatatcaaattttctttattctggcggacaattttttgaacattattttatttaattttttc"

In [36]:
gene.sequence.splicing("#{101 + 5009 - 1}..#{106 + 5009 - 1}")

"aagaag"

In [10]:
gene.sequence.splicing("complement(F10M23_4:122538..122685)")

TypeError: no implicit conversion of nil into String

In [7]:
seq.splicing("5121	5126")

NameError: undefined local variable or method `seq' for #<Object:0x0000000113bc88d8>
Did you mean?  send

In [16]:
gene.features['exons'].each do |id, value|
    to, from = value.locations.span
    puts to
end
puts

918
1394
1
771
3265
4814
5009
1743
10
2806



In [45]:
repeats_array = Array.new
gene.sequence.features.each do |feature|
    next unless feature.feature == 'repeat'
    puts feature.assoc['note'].class
    repeats << feature.assoc['note']
    # next if gene.features['exons'].key?(idx)
    # puts gene.features['exons'][idx]
end
puts

{"note"=>"AT2G46340.2.exon6"}


NameError: undefined local variable or method `repeats' for #<Object:0x0000000113188e18>
Did you mean?  repeat

In [57]:
puts repeats.uniq.join(' ')
puts 
puts gene.features["exons"].keys.join(' ')

AT2G46340.2.exon6 AT2G46340.1.exon8 AT2G46340.2.exon1 AT2G46340.1.exon1 AT2G46340.2.exon4 AT2G46340.2.exon8

AT2G46340.2.exon6 AT2G46340.2.exon5 AT2G46340.1.exon8 AT2G46340.2.exon7 AT2G46340.2.exon2 AT2G46340.2.exon1 AT2G46340.1.exon1 AT2G46340.2.exon4 AT2G46340.2.exon8 AT2G46340.2.exon3


In [58]:
gene.features["exons"].keys - repeats.uniq

["AT2G46340.2.exon5", "AT2G46340.2.exon7", "AT2G46340.2.exon2", "AT2G46340.2.exon3"]

In [46]:
gene.features['exons']["AT2G46340.2.exon7"]

#<Bio::Feature:0x0000000158b93508 @feature="exon", @position="complement(771..830)", @qualifiers=[#<Bio::Feature::Qualifier:0x0000000158b93288 @qualifier="note", @value="exon_id=AT2G46340.2.exon7">]>

In [43]:
gene.features["exons"].keys

["AT2G46340.2.exon6", "AT2G46340.2.exon5", "AT2G46340.1.exon8", "AT2G46340.2.exon7", "AT2G46340.2.exon2", "AT2G46340.2.exon1", "AT2G46340.1.exon1", "AT2G46340.2.exon4", "AT2G46340.2.exon8", "AT2G46340.2.exon3"]

In [None]:
# Search exons for CTTCTT
repeat = "CTTCTT"
repeat_regex = Regexp.new(/cttctt/i)
gene.features["exons"].each do |exon_id, exon|
    # sequence = gene.embl.naseq.splicing(exon.position)
    # For testing
    sequence = "atgcttcttaaacttctt"
    matches = repeat_regex.match(sequence)
    if matches
        puts matches.to_a
        abort
        matches.each do |match|

            puts match.offset[1]
            puts sequence[matches.offset[0]...matches.offset[1]]
        end
    end
end
puts

In [None]:
repeat = Bio::Sequence::NA.new("CTTCTT")
puts repeat.complement

In [77]:
# Search exons for CTTCTT
repeat = Bio::Sequence::NA.new("CTTCTT")
reverse_comp = "aagaag"
repeat_regex = Regexp.new(/(?=#{repeat})/i)
reverse_repeat_regex = Regexp.new(/(?=#{repeat.complement})/i)
gene.features["exons"].each do |exon_id, exon|
    #sequence = gene.sequence.splice(exon.position)
    # For testing
    sequence = "cttaacttcttctt"
    # sequence = "aagaagaagtttaag"
    match_datas = sequence.to_enum(:scan, repeat_regex).map {Regexp.last_match}
    if match_datas
        match_datas.each do |match|
            # puts match.offset(0)[0]
            # puts match.offset(0)[1]
            position = "#{match.offset(0)[0]+1}..#{match.offset(0)[0]+1 + 5}"
            puts position
            # puts sequence.splicing(position)
            # repeat_ft = Bio::Feature::new('repeat', position)
            # repeat_ft.append(Bio::Feature::Qualifier.new('note', exon_id))

            # gene.sequence.features << repeat_ft 
            # puts exon.position
            # puts match.offset(0)
            # puts sequence[match.offset(0)[0]...match.offset(0)[1]]
            # puts sequence[match.offset(0)[0]...match.offset(0)[1]]
            # puts
       end
    end

    rev_match_datas = sequence.to_enum(:scan, reverse_repeat_regex).map {Regexp.last_match}
    if rev_match_datas
        rev_match_datas.each do |match|
            position = "complement(#{match.offset(0)[0]+1}..#{match.offset(0)[0]+1 + 5})"
            puts position
            # # puts sequence.splicing(position)
            # rev_repeat_ft = Bio::Feature::new('repeat', position)
            # rev_repeat_ft.append(Bio::Feature::Qualifier.new('note', exon_id))

            # gene.sequence.features << rev_repeat_ft
            # puts exon.position
            # puts match.offset(0)
            # puts sequence[match.offset(0)[0]...match.offset(0)[1]]
            # puts sequence[match.offset(0)[0]...match.offset(0)[1]]
            # puts
       end
    end
end
puts

5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14
5
5
6..11
8
8
9..14



In [27]:
def matching_substrings(string, regex)
    string.size.times.each_with_object([]) do |start_index, maching_substrings|
      start_index.upto(string.size.pred) do |end_index|
        substring = string[start_index..end_index]
        maching_substrings.push(substring) if substring =~ /^#{regex}$/
      end
    end
end

:matching_substrings

In [50]:
tests = 'cttcttcttctt'
repeat_regex = Regexp.new(/ctt/i)
match_datas = tests.to_enum(:scan, repeat_regex).map {Regexp.last_match}
match_datas.each_cons(2).to_a

[#<MatchData "ctt">, #<MatchData "ctt">]

In [None]:
gene.sequence.features.each do |feature|
    puts feature
end
puts

In [None]:
puts sequence.length
puts sequence.splicing('complement(258..263)').complement

In [None]:
repeat = "CTTCTT"
sequence = "atgcttcttaaacttctt"
repeat_regex = Regexp.new(/#{repeat]/i)

In [None]:
s = Bio::Sequence::NA.new("atgcttcttaaacttctt")
puts s
puts s.splicing('4..9')
puts s.splicing('complement(4..9)')

In [None]:
# iterates over each element in 'features' # features method finds all features
features_hash = Hash.new

entry.features.each do |feature|
    # FILTER EXONS
    next unless feature.feature == "exon"
    puts feature.class
    puts "FEATURE = #{feature.feature}"
    position = feature.position # Look at Bio::Feature object
    puts "POSITION = #{position}"
    qual = feature.assoc # 
    puts "Associations = #{qual}"
    puts "Associations note = #{qual["note"]}"

    exon_id_regex = Regexp.new(/exon_id=(.*)/)
    if exon_id_regex.match(qual["note"])
        features_hash[$1] = feature
    end
    puts
end
puts 'end'

In [None]:
puts features_hash.keys

In [None]:
features_hash["AT2G46340.2.exon8"].class

In [1]:
require './Gene.rb'
gene = Gene.new(id: "AT5G15850")
puts

990..995
993..998
1657..1662
complement(1449..1454)
291..296
381..386
384..389



In [2]:
puts gene.write_report


