# Assignment 2. Intensive integration using Web APIs
## Gema Castillo García

### Files I have created:
Scripts defining each Class:

    Interaction_Network.rb
    Uso_General_Annotation.rb

**The 2 Classes are interconnected. The values of some Object Properties are other Objects: the networks are the key to link Interaction_Network > Uso_General_Annotation.** 


The main script that uses the previous 2 scripts to make the report is:

    make_report.rb


### Final program
    $ ruby make_report.rb
    
    
    

In [1]:
require 'rest-client'  

#creating a function called "fetch" that we can re-use everywhere in our code ()
def fetch(url, headers = {accept: "*/*"}, user = "", pass="") #the definition of fetch was written by Mark Wilkinson
  response = RestClient::Request.execute({
    method: :get,
    url: url.to_s,
    user: user,
    password: pass,
    headers: headers})
  return response
  
  rescue RestClient::ExceptionWithResponse => e
    $stderr.puts e.inspect
    response = false
    return response  #now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue RestClient::Exception => e
    $stderr.puts e.inspect
    response = false
    return response  #now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue Exception => e
    $stderr.puts e.inspect
    response = false
    return response  #now we are returning 'False', and we will check that with an \"if\" statement in our main code
end

:fetch

In [2]:
#the code of this box is in Interaction_Network.rb file
#'ArabidopsisSubNetwork_GeneList.txt' contains 168 genes

#beginning a class definition to show the Interaction Network
class Interaction_Network
  #creating "attribute accessors" to read and write objects' properties
  attr_accessor :agi_locus1
  attr_accessor :agi_locus2
  attr_accessor :interactors
  
  #initialization inside the class
  def initialize(agi_locus1 ,agi_locus2 ,interactors ) 
    @agi_locus1 = agi_locus1 #this attribute is for the genes of the list that I have used to do the search in BAR database
    @agi_locus2 = agi_locus2 #this attribute is for the genes of the list that have some interactors in common with agi_locus1
    @interactors = interactors #this attribute is for the interactors of each gene that are not present in the list
  end
  
  
  def Interaction_Network.retrieve_networks #defining a method to store the interactors of each gene in an array called 'gene_interactors'
    puts "\nSearching for interactions in BAR database from UToronto...\n"
    agi = [] #creating a new array that will contain the genes' AGI of "ArabidopsisSubNetwork_GeneList.txt"
    @@networks = [] #creating an array to store the Interaction_Network Objects
    agi = File.read("ArabidopsisSubNetwork_GeneList.txt").split #retrieving the genes' AGI of "ArabidopsisSubNetwork_GeneList.txt"
    taxid = 'taxid:3702' #taxon ID for A.thaliana
    cutoff = 0.485 #MIscore cutoff value for optimal score predictions found by calcuating the maximal Matthews correlation coefficient (MCC) in https://europepmc.org/article/MED/25652942
    for $g in 0..agi.length-1 #for each gene of the list do...
      agi[$g]["T"]= "t" #changing 'T' for 't' to follow the BAR database nomenclature 
      gene_interactors = [] #creating an array for the AGI code of each interactor
      res = fetch("http://bar.utoronto.ca:9090/psicquic/webservices/current/search/query/#{agi[$g]}?format=tab25") #accessing genes URLs
      if res  #res is either the response object, or False, so you can test it with 'if'
        body = res.body  #getting the "body" of the response
        if body.empty?
          puts "There is no entry for #{agi[$g]} in BAR database from UToronto." #printing a friendly message if the gene has not any interactor
        else #formatting the URL content to retrieve the required information
          lines = res.body.split("\n")
          lines.each do |l|
            fields = l.split("\t")
            if fields[9].include? taxid and fields[10].include? taxid and fields[14][15..19].to_f > cutoff #selecting A.thaliana genes and interactions with optimal quality
              if fields[0] != fields[1] #not including the interactor if the protein interacts with itself
                if fields[2] =~ /tair:([^"]+)/ #searching for reg expressions in the first column of interactors
                  if $1 != agi[$g] #selecting only the interactors, NOT the query
                    gene_interactors << $1
                  end
                end
                if fields[3] =~ /tair:([^"]+)/ #searching for reg expressions in the second column of interactors
                  if $1 != agi[$g]  #selecting only the interactors, NOT the query
                    gene_interactors << $1
                  end
                end
              end
            end
          end
        end
      end
      gene_interactors = gene_interactors.uniq #removing repeated interactors
      #gene_interactors = agi & gene_interactors ##to remove AGI not present in "ArabidopsisSubNetwork_GeneList.txt"
                                                 ##(it is hidden because I am also considering the interactors that are not present in the list)  
      for $i in 0..gene_interactors.length-1
        #puts "#{agi[$g]} interacts with #{gene_interactors[$i]}"
        @@networks << Interaction_Network.new(agi[$g],"none",gene_interactors) #creating an array with the new Objects
      end
    end
  end
  
  
  def Interaction_Network.show_networks #defining a method to work with Interaction_Network Class objects
    return @@networks
  end
  
  
  def Interaction_Network.binding_networks #defining a method to connect different objects to make bigger networks
    puts "\nBinding networks...\n"
    @@all_networks = [] #creating an array to store the new Interaction_Network Objects
    common_interactors = [] #creating an array to store the genes of the list that interact with each other
    Interaction_Network.show_networks.each do |n1|
      Interaction_Network.show_networks.each do |n2|
        unless n1.agi_locus1 == n2.agi_locus1 #ignoring comparisons from a gene with itself
          unless n1.interactors & n2.interactors == [] #dismissing the genes of the list without interactors in common
            if n2.interactors.include? n1.agi_locus1 #searching direct interactions between the genes of the list
              common_interactors << [n1.agi_locus1, n2.agi_locus1, [n1.interactors & n2.interactors, "direct"]]
            else #searching indirect interactions between the genes of the list
              common_interactors << [n1.agi_locus1, n2.agi_locus1, n1.interactors & n2.interactors]
            end
          else #searching direct interactions between the genes of the list without interactors in common
            if n2.interactors.include? n1.agi_locus1
              common_interactors << [n1.agi_locus1, n2.agi_locus1, "direct"]
            end   
          end
        end 
  
      end
    end

    common_interactors = common_interactors.uniq #deleting the duplicated networks
    common_interactors2 = [] #creating an array to store definitive networks without duplicates
    common_interactors.each do |c|
      common_interactors2 << c.sort_by do |cc| #sorting by alphabet the members of each network to compare different networks and delete the duplicates
        cc.class == Array ? cc.to_s : cc
      end
    end
    
    common_interactors2 = common_interactors2.uniq
    common_interactors2.each do |ci|
      @@all_networks << Interaction_Network.new(ci[0],ci[1],ci[2]) #creating an array with the new Objects with bigger networks
    end
  end
  
  
  def Interaction_Network.show_all_networks #defining a method to work with Interaction_Network Class networks
    return @@all_networks
  end
  
  
end

:show_all_networks

In [3]:
#the code of this box is in Uso_General_Annotation.rb file 
##BONUS: this Class can hold any functional annotation as long as you create the corresponding attribute and function

require 'json'

class Uso_General_Annotation < Interaction_Network #Uso_General_Annotation is inheriting the properties of Interaction_Network
  #creating "attribute accessors" to read and write objects' properties
  attr_accessor :network_members 
  attr_accessor :kegg 
  attr_accessor :go 
  
  #initialization inside the class
  def initialize(network_members ,kegg ,go ) 
    @network_members = network_members #this attribute will contain the objects with the networks created in Interaction_Network Class
    @kegg = kegg #this attribute will contain the KEGG pathways of all members of each network
    @go = go #this attribute will contain the GO terms of all members of each network
  end
  
  
  def Uso_General_Annotation.GO_annotation
    puts "\nRetrieving GO terms...\n"
    goterms = [] #creating an array for the GO terms of each interactor
    @@allgoterms = [] #creating an array for the GO terms of each network
    Interaction_Network.show_all_networks.each do |net|
      res = fetch("https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb&id=#{net.agi_locus1}&style=raw") #accessing the URL of the first gene of the networks
      if res#res is either the response object, or False, so you can test it with 'if'
        body = res.body #getting the "body" of the response
        if body.empty?
          goterms = []
        else
          lines = res.body.split("\n")
          lines.each do |l|
            if l =~ /GO:([^"]+); P:([^"]+);/ #selecting the GO terms from the biological process part of the GO Ontology
              goterms << ["#{$1}", "#{$2}"] #storing the GO IDs and Terms, respectively
              goterms = goterms.uniq #deleting duplicated GO terms present in the same URL
            end
          end
        end
      end
      
      res = fetch("https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb&id=#{net.agi_locus2}&style=raw") #accessing the URL of the second gene of the networks
      if res #res is either the response object, or False, so you can test it with 'if'
        body = res.body #getting the "body" of the response
        if body.empty?
          goterms = []
        else
          lines = res.body.split("\n")
          lines.each do |l|
            if l =~ /GO:([^"]+); P:([^"]+);/ #selecting the GO terms from the biological process part of the GO Ontology
              goterms << ["#{$1}", "#{$2}"] #storing the GO IDs and Terms, respectively
              goterms = goterms.uniq #deleting duplicated GO terms present in the same URL
            end
          end
        end
      end
      
      #this block is very long because it has to access all the interactors (they are in a different format)
      if net.interactors.class == Array #accessing the URLs of the interactors in common of the networks
        net.interactors.each do |int|
          unless int.class == String
            int.each do |intt|
              res = fetch("https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb&id=#{intt}&style=raw") #accessing interactor's URLs
              if res #res is either the response object, or False, so you can test it with 'if'
                body = res.body #getting the "body" of the response
                if body.empty?
                  goterms = []
                else
                  lines = res.body.split("\n")
                  lines.each do |l|
                    if l =~ /GO:([^"]+); P:([^"]+);/ #selecting the GO terms from the biological process part of the GO Ontology
                      goterms << ["#{$1}", "#{$2}"] #storing the GO IDs and Terms, respectively
                      goterms = goterms.uniq #deleting duplicated GO terms present in the same URL
                    end
                  end
                end
              end
            end
          else
            unless int == "direct"
              res = fetch("https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb&id=#{int}&style=raw") #accessing interactor's URLs
              if res#res is either the response object, or False, so you can test it with 'if'
                body = res.body #getting the "body" of the response
                if body.empty?
                  goterms = []
                else
                  lines = res.body.split("\n")
                  lines.each do |l|
                    if l =~ /GO:([^"]+); P:([^"]+);/ #selecting the GO terms from the biological process part of the GO Ontology
                      goterms << ["#{$1}", "#{$2}"] #storing the GO IDs and Terms, respectively
                      goterms = goterms.uniq #deleting duplicated GO terms present in the same URL
                    end
                  end
                end
              end
            end
          end
        end
      end
      @@allgoterms << goterms #storing the GO terms of each network in an array
      goterms = []
    end
  end
  
  
  def Interaction_Network.show_go #defining a method to work with GO annotations
    return @@allgoterms
  end
  
  
  def Uso_General_Annotation.KEGG_annotation
    puts "\nRetrieving KEGG pathways...\n"
    kegg = [] #creating an array for the KEGG pathways of each interactor
    @@allkegg = [] #creating an array for the KEGG pathways of each network
    Interaction_Network.show_all_networks.each do |net|
      address = "http://togows.org/entry/kegg-genes/ath:#{net.agi_locus1}/pathways.json" #accessing the URL of the first gene of the networks
      response = RestClient::Request.execute(
        method: :get,
        url: address)
      kegg_data = JSON.parse(response.body)
      for elem in kegg_data[0].each
        if kegg_data[0].empty?
          kegg = []
        elsif kegg_data[0]
          kegg << [elem[0],elem[1]] #storing the KEGG IDs and Pathways, respectively
        end
      end
      
      address = "http://togows.org/entry/kegg-genes/ath:#{net.agi_locus2}/pathways.json" #accessing the URL of the second gene of the networks
      response = RestClient::Request.execute(
        method: :get,
        url: address)
      kegg_data = JSON.parse(response.body)
      for elem in kegg_data[0].each
        if kegg_data[0].empty?
          kegg = []
        elsif kegg_data[0]
          kegg << [elem[0],elem[1]] #storing the KEGG IDs and Pathways, respectively
        end
      end
      
      if net.interactors.class == Array
        net.interactors.each do |int|
          unless int.class == String
            int.each do |intt|
              address = "http://togows.org/entry/kegg-genes/ath:#{intt}/pathways.json" #accessing interactor's URLs
              response = RestClient::Request.execute(
                method: :get,
                url: address)
              kegg_data = JSON.parse(response.body)
              for elem in kegg_data[0].each
                if kegg_data[0].empty?
                  kegg = []
                elsif kegg_data[0]
                  kegg << [elem[0],elem[1]] #storing the KEGG IDs and Pathways, respectively
                end
              end
            end
          else
            unless int == "direct"
              address = "http://togows.org/entry/kegg-genes/ath:#{int}/pathways.json" #accessing interactor's URLs
              response = RestClient::Request.execute(
                method: :get,
                url: address)
              kegg_data = JSON.parse(response.body)
              for elem in kegg_data[0].each
                if kegg_data[0].empty?
                  kegg = []
                elsif kegg_data[0]
                  kegg << [elem[0],elem[1]] #storing the KEGG IDs and Pathways, respectively
                end
              end
            end
          end
        end
      end
      kegg = kegg.uniq #deleting duplicated KEGGs
      @@allkegg << kegg #storing the KEGG pathways of each network in an array
      kegg = []
    end
  end
  
  
  def Interaction_Network.show_kegg #defining a method to work with KEGG annotations
    return @@allkegg
  end
  
  
  def Uso_General_Annotation.object_annotation #defining a method to create objects with a network and its GO/KEGG annotations
    @@annotated_objects = [] #creating an array to store the Uso_General_Annotation Objects
    for i in 0..Interaction_Network.show_all_networks.length-1
      @@annotated_objects << Uso_General_Annotation.new(Interaction_Network.show_all_networks[i],Uso_General_Annotation.show_kegg[i],Uso_General_Annotation.show_go[i]) 
    end
  end
  
  
  def Interaction_Network.show_annotated_objects #defining a method to work with Uso_General_Annotation Class objects
    return @@annotated_objects
  end
  
  
end  

:show_annotated_objects

## Create an “InteractionNetwork” Object to contain the members of each network and	annotate it with any KEGG Pathways the interaction network members are part of

In [None]:
#only run if you haven't run the boxes above
require './Interaction_Network.rb' #using Interaction_Network Class
require './Uso_General_Annotation.rb' #using Uso_General_Annotation Class

In [4]:
Interaction_Network.retrieve_networks #creating objects from 'SubNetwork.txt' (it only contains a few genes, puts $ruby make_report.rb in terminal to see the interactions of the 168 genes) 
Interaction_Network.binding_networks #connecting the previous objects to make bigger networks and introducing them into new objects
Uso_General_Annotation.KEGG_annotation #retrieving KEGG pathways
Uso_General_Annotation.GO_annotation #retrieving GO terms
Uso_General_Annotation.object_annotation #creating new objects with the final networks and its annotations


Searching for interactions in BAR database from UToronto...

There is no entry for At4g27030 in BAR database from UToronto.
There is no entry for At1g21400 in BAR database from UToronto.
There is no entry for At5g19120 in BAR database from UToronto.
There is no entry for At1g22690 in BAR database from UToronto.
There is no entry for At4g09650 in BAR database from UToronto.
There is no entry for At5g55620 in BAR database from UToronto.


#<Errno::ECONNREFUSED: Failed to open TCP connection to bar.utoronto.ca:9090 (Connection refused - connect(2) for "bar.utoronto.ca" port 9090)>


There is no entry for At3g28270 in BAR database from UToronto.
There is no entry for At5g64040 in BAR database from UToronto.
There is no entry for At1g29660 in BAR database from UToronto.
There is no entry for At1g64500 in BAR database from UToronto.
There is no entry for At2g20670 in BAR database from UToronto.
There is no entry for At3g47470 in BAR database from UToronto.
There is no entry for At5g64410 in BAR database from UToronto.
There is no entry for At1g15980 in BAR database from UToronto.
There is no entry for At1g12250 in BAR database from UToronto.
There is no entry for At2g21210 in BAR database from UToronto.
There is no entry for At4g28660 in BAR database from UToronto.
There is no entry for At1g52230 in BAR database from UToronto.
There is no entry for At3g18890 in BAR database from UToronto.
There is no entry for At2g04039 in BAR database from UToronto.
There is no entry for At3g48420 in BAR database from UToronto.
There is no entry for At5g07020 in BAR database from UT

0..150

In [5]:
Interaction_Network.show_annotated_objects

[#<Uso_General_Annotation:0x0000561fbcc0d2a8 @network_members=#<Interaction_Network:0x0000561fbd819b50 @agi_locus1="At2g45170", @agi_locus2="At5g54270", @interactors=["At4g37930", "At5g26780"]>, @kegg=[["ath04136", "Autophagy - other"], ["ath00196", "Photosynthesis - antenna proteins"], ["ath01100", "Metabolic pathways"], ["ath00260", "Glycine, serine and threonine metabolism"], ["ath00460", "Cyanoamino acid metabolism"], ["ath00630", "Glyoxylate and dicarboxylate metabolism"], ["ath00670", "One carbon pool by folate"], ["ath01110", "Biosynthesis of secondary metabolites"], ["ath01200", "Carbon metabolism"], ["ath01230", "Biosynthesis of amino acids"], ["ath01240", "Biosynthesis of cofactors"]], @go=[["0006914", "autophagy"], ["0006995", "cellular response to nitrogen starvation"], ["0009267", "cellular response to starvation"], ["0015031", "protein transport"], ["0045471", "response to ethanol"], ["0015979", "photosynthesis"], ["0009768", "photosynthesis, light harvesting in photosyst

In [6]:
##creating the final report with the networks and its annotations

File.open("report.txt", "a") { |f| f.write("
### ========================================================================== REPORT ========================================================================== ###

Analysis of interactions between the 168 co-expressed genes of the list. It has been applied filters for species and MIscore cutoff to the results of BAR database:
    taxon ID for Arabidopsis thaliana = taxid:3702
    cutoff = 0.485 (value for optimal score predictions found by calcuating the maximal Matthews correlation coefficient (MCC) in https://europepmc.org/article/MED/25652942)

There are two kinds of interactions:
    direct: between two or more genes of the list
    indirect: two genes of the list has one or more interactors (not in the list) in common

Although some genes of the list are present in more than one of the following networks (probably because they form a protein complex with many proteins of the list), I have created NETWORKS THAT HAVE ONLY 2 GENES OF THE LIST that interact with each other directly or/and indirectly through other interactors.


### ========================================================================= NETWORKS ========================================================================= ###
") }



for i in 0..Interaction_Network.show_annotated_objects.length-1
  if Interaction_Network.show_annotated_objects[i].network_members.interactors == "direct"
    File.open("report.txt", "a") { |f| f.write("\n\nNETWORK ##{i+1}") }
    File.open("report.txt", "a") { |f| f.write("\nGene #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus1} interacts directly with #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus2}")}
    Interaction_Network.show_annotated_objects[i].kegg.each do |k|
      File.open("report.txt", "a") { |f| f.write("\n\tKEGG ID: #{k[0]}  Pathway Name: #{k[1]}") }
    end
    Interaction_Network.show_annotated_objects[i].go.each do |g|
      File.open("report.txt", "a") { |f| f.write("\n\tGO ID: #{g[0]} GO Term: #{g[1]}") }
    end
  else
    File.open("report.txt", "a") { |f| f.write("\n\nNETWORK ##{i+1}") }
    Interaction_Network.show_annotated_objects[i].network_members.interactors.each do |intn|
      if intn.class == Array
        intn.each do |intt|
          if Interaction_Network.show_annotated_objects[i].network_members.interactors[0].class == String
            File.open("report.txt", "a") { |f| f.write("\nGene #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus1} interacts indirectly with #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus2} by #{Interaction_Network.show_annotated_objects[i].network_members.interactors[0]}") }
            Interaction_Network.show_annotated_objects[i].kegg.each do |k|
              File.open("report.txt", "a") { |f| f.write("\n\tKEGG ID: #{k[0]}  Pathway Name: #{k[1]}") }
            end
            Interaction_Network.show_annotated_objects[i].go.each do |g|
              File.open("report.txt", "a") { |f| f.write("\n\tGO ID: #{g[0]} GO Term: #{g[1]}") }
            end
          else
            File.open("report.txt", "a") { |f| f.write("\nGene #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus1} interacts indirectly with #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus2} by #{Interaction_Network.show_annotated_objects[i].network_members.interactors[0].join(", ")}") }
          end
          break
        end
      elsif intn == "direct"
        File.open("report.txt", "a") { |f| f.write("\nGene #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus1} interacts indirectly with #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus2} by #{Interaction_Network.show_annotated_objects[i].network_members.interactors[0]}") }
        Interaction_Network.show_annotated_objects[i].kegg.each do |k|
          File.open("report.txt", "a") { |f| f.write("\n\tKEGG ID: #{k[0]}  Pathway Name: #{k[1]}") }
        end
        Interaction_Network.show_annotated_objects[i].go.each do |g|
          File.open("report.txt", "a") { |f| f.write("\n\tGO ID: #{g[0]} GO Term: #{g[1]}") }
        end
        break
      else
        File.open("report.txt", "a") { |f| f.write("\nGene #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus1} interacts indirectly with #{Interaction_Network.show_annotated_objects[i].network_members.agi_locus2} by #{Interaction_Network.show_annotated_objects[i].network_members.interactors.join(", ")}") }
        Interaction_Network.show_annotated_objects[i].kegg.each do |k|
          File.open("report.txt", "a") { |f| f.write("\n\tKEGG ID: #{k[0]}  Pathway Name: #{k[1]}") }
        end
        Interaction_Network.show_annotated_objects[i].go.each do |g|
          File.open("report.txt", "a") { |f| f.write("\n\tGO ID: #{g[0]} GO Term: #{g[1]}") }
        end
        break
      end
    end
  end
end

puts "\nReport is ready! Look at 'report.txt' file\n"


Report is ready! Look at 'report.txt' file

