In [1]:
class Gene
#Gene_ID	Gene_name	mutant_phenotype
  attr_accessor :geneid  
  attr_accessor :name

  @@all_genes = Hash.new
  
  def initialize(geneid:, name:, phenotype:)
    @geneid = geneid
    abort "\ngene id does not match AGI locus code.  Aborting\n" unless @geneid.match(/at\dg\d+/i) # case insensitive match -= put 'i' after regex
    @name = name
    @phenotype = phenotype
    
    @@all_genes[geneid] = self  # add to Class variable hash of all instances
    @linked_genes = Hash.new # genes + chisq values for genes linked to this one (as gene objects - starts as empty)
  end


  def self.all_genes
    return @@all_genes
  end
  
  
  def self.find_gene_by_id(id)  # lookiup function
    @@all_genes.each do |geneid, gene|
      return gene if geneid == id
    end
  end
 
  
####GET UNIPROT ID 
 def uniprot_id
    unless @uniprot_id
      # Fetch UniProt ID using the UniProt REST API
      response = fetch_uniprot_id(@geneid)
      if response['Result'].any?
        @uniprot_id = response['Result'][0]['id']
      else
        @uniprot_id = nil
      end
    end
    @uniprot_id
  end

  private

  def fetch_uniprot_id(agi_id)
    base_url = 'https://www.ebi.ac.uk/proteins/api'
    endpoint = "/agi-locus/#{agi_id}"
    url = base_url + endpoint

    uri = URI(url)
    response = Net::HTTP.get(uri)
    JSON.parse(response)
  end

  
end


:fetch_uniprot_id

In [2]:
require 'rest-client'  

# Create a function called "fetch" that we can re-use everywhere in our code

def fetch(url, headers = {accept: "*/*"}, user = "", pass="")
  response = RestClient::Request.execute({
    method: :get,
    url: url.to_s,
    user: user,
    password: pass,
    headers: headers})
  return response
  
  rescue RestClient::ExceptionWithResponse => e
    $stderr.puts e.inspect
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue RestClient::Exception => e
    $stderr.puts e.inspect
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue Exception => e
    $stderr.puts e.inspect
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
end 


:fetch

In [3]:
require 'rest-client'

class Gene
  @@genes = []
  attr_accessor :id

  def initialize(id:)
    @id = id
    @@genes << self
  end

  def self.all_genes
    @@genes
  end
end


:all_genes

In [4]:
require 'rest-client'

class AnnotatedGene
  attr_reader :id, :protein_name

  def initialize(id:, protein_name:)
    @id = id
    @protein_name = protein_name
  end

  def self.retrieve_genes_from_file(filename)
    protein_ids = []

    array_of_lines = IO.readlines(filename)
    array_of_lines[1..].each do |data|
      geneid = data.split("\t").first.chomp  # Remove newline character

      res = RestClient.get("https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=ensemblgenomesgene&format=embl&id=#{geneid}&style=raw")

      unless res
        abort "failed to retrieve #{geneid}"
      end

      record = res.body

      if result = record.match(/db_xref="Uniprot\/SWISSPROT\:([^"]+)"/)
        protein_id = result[1]
        puts "the protein ID of #{geneid} is #{protein_id}"
        protein_ids << protein_id
      else
        puts "couldn't find the protein ID of #{geneid}"
      end
    end

    protein_ids
  end

  def self.all_genes
    # Assuming you have a method that returns all annotated genes
  end
end




:all_genes

In [5]:
require 'csv'

# Specify the path to the TSV file

filename = 'ArabidopsisSubNetwork_GeneList-head5.txt'
array_of_lines = IO.readlines(filename)
array_of_lines[1..].each do |data|
geneid = data.split("\t").first.chomp
puts geneid

end

AT4g27030
AT5g54270
AT1g21400
AT5g19120
AT2g13360
AT4g05180
AT1g22690
AT2g45170
AT4g09650
AT5g55620
AT1g31330
AT1g80440
AT3g28270
AT4g12800
AT5g04140
AT5g64040
AT1g29660
AT1g15820
AT1g64500
AT1g03130
AT2g20670
AT4g17090
AT1g23740
AT2g46340
AT4g17460
AT1g07010
AT1g32220
AT2g03750
AT3g47470
AT4g12830
AT5g05690
AT5g64410
AT2g21330
AT1g15980
AT2g01890
AT1g12250
AT2g21210
AT4g28660
AT1g52230
AT3g18890
AT4g22890
AT1g11850
AT1g37130
AT2g04039
AT3g48420
AT4g19170
AT5g07020
AT5g65010
AT3g01500
AT1g25230


["AT4g27030\n", "AT5g54270\n", "AT1g21400\n", "AT5g19120\n", "AT2g13360\n", "AT4g05180\n", "AT1g22690\n", "AT2g45170\n", "AT4g09650\n", "AT5g55620\n", "AT1g31330\n", "AT1g80440\n", "AT3g28270\n", "AT4g12800\n", "AT5g04140\n", "AT5g64040\n", "AT1g29660\n", "AT1g15820\n", "AT1g64500\n", "AT1g03130\n", "AT2g20670\n", "AT4g17090\n", "AT1g23740\n", "AT2g46340\n", "AT4g17460\n", "AT1g07010\n", "AT1g32220\n", "AT2g03750\n", "AT3g47470\n", "AT4g12830\n", "AT5g05690\n", "AT5g64410\n", "AT2g21330\n", "AT1g15980\n", "AT2g01890\n", "AT1g12250\n", "AT2g21210\n", "AT4g28660\n", "AT1g52230\n", "AT3g18890\n", "AT4g22890\n", "AT1g11850\n", "AT1g37130\n", "AT2g04039\n", "AT3g48420\n", "AT4g19170\n", "AT5g07020\n", "AT5g65010\n", "AT3g01500\n", "AT1g25230"]

In [6]:
# Retrieve protein IDs from the file
protein_ids = AnnotatedGene.retrieve_genes_from_file(filename)




the protein ID of AT4g27030 is Q9SZ42
the protein ID of AT5g54270 is F4K0A6
the protein ID of AT1g21400 is Q9LPL5
couldn't find the protein ID of AT5g19120
the protein ID of AT2g13360 is Q56YA5
the protein ID of AT4g05180 is Q41932
the protein ID of AT1g22690 is Q8GWK5
the protein ID of AT2g45170 is Q8S926
the protein ID of AT4g09650 is Q9SSS9
couldn't find the protein ID of AT5g55620
the protein ID of AT1g31330 is Q9SHE8
the protein ID of AT1g80440 is Q9M8L2
the protein ID of AT3g28270 is Q9LHD9
the protein ID of AT4g12800 is Q9SUI4
the protein ID of AT5g04140 is Q9ZNZ7
the protein ID of AT5g64040 is Q8L7V3
the protein ID of AT1g29660 is Q9C7N5
couldn't find the protein ID of AT1g15820
couldn't find the protein ID of AT1g64500
the protein ID of AT1g03130 is Q9SA56
couldn't find the protein ID of AT2g20670
the protein ID of AT4g17090 is O23553
the protein ID of AT1g23740 is Q9ZUC2
the protein ID of AT2g46340 is Q9SYX2
the protein ID of AT4g17460 is P46600
the protein ID of AT1g07010 is

["Q9SZ42", "F4K0A6", "Q9LPL5", "Q56YA5", "Q41932", "Q8GWK5", "Q8S926", "Q9SSS9", "Q9SHE8", "Q9M8L2", "Q9LHD9", "Q9SUI4", "Q9ZNZ7", "Q8L7V3", "Q9C7N5", "Q9SA56", "O23553", "Q9ZUC2", "Q9SYX2", "P46600", "Q8L774", "Q9FVR6", "Q8RV79", "Q9SN90", "Q42569", "Q9FME8", "Q9SJU4", "Q9S9N5", "Q8VYZ2", "Q8H1Q1", "Q8W0Y8", "Q9M811", "Q8H0U5", "Q8H112", "P11035", "Q9SIA5", "Q5XF07", "O49675", "Q9FL44", "Q9LV77", "P27140", "Q8VYU7"]

In [7]:
protein_ids

["Q9SZ42", "F4K0A6", "Q9LPL5", "Q56YA5", "Q41932", "Q8GWK5", "Q8S926", "Q9SSS9", "Q9SHE8", "Q9M8L2", "Q9LHD9", "Q9SUI4", "Q9ZNZ7", "Q8L7V3", "Q9C7N5", "Q9SA56", "O23553", "Q9ZUC2", "Q9SYX2", "P46600", "Q8L774", "Q9FVR6", "Q8RV79", "Q9SN90", "Q42569", "Q9FME8", "Q9SJU4", "Q9S9N5", "Q8VYZ2", "Q8H1Q1", "Q8W0Y8", "Q9M811", "Q8H0U5", "Q8H112", "P11035", "Q9SIA5", "Q5XF07", "O49675", "Q9FL44", "Q9LV77", "P27140", "Q8VYU7"]

In [8]:
require 'rest-client'

class InteractomeBuilder
  PSICQUIC_BASE_URL = 'http://www.ebi.ac.uk/Tools/webservices/psicquic/intact/webservices/current/search/query/'

  def initialize(gene_list)
    @gene_list = gene_list
  end

  def build_interactome
    interactome = {}

    @gene_list.each do |query_gene|
      interactome[query_gene] = fetch_interactions(query_gene)
    end

    expand_interactome(interactome)
  end

  private

  def fetch_interactions(query_gene, quality_threshold = 0.60)
    url = "#{PSICQUIC_BASE_URL}#{query_gene}?format=tab25"
    response = RestClient.get(url)

    parse_interactions(response.body, query_gene, quality_threshold)
  rescue RestClient::Exception => e
    puts "Error fetching interactions for #{query_gene}: #{e.message}"
    []
  end

   def parse_interactions(response_body, query_gene, quality_threshold)
    interactions = []

    response_body.lines.each do |line|
      fields = line.strip.split("\t")
      next if fields.empty?

      intact_score_str = fields.last.match(/intact-miscore:(\d+\.\d+)/)&.captures&.first
      intact_score = intact_score_str.to_f if intact_score_str

      next if intact_score.nil? || intact_score < quality_threshold

      # Check if the query gene is in the first column
      next unless fields[0] == "uniprotkb:#{query_gene}"

      interacting_gene = fields[1].split(":").last

      # Skip self-interactions
      next if interacting_gene == query_gene

      # Add interacting gene only if not already present
      interactions << interacting_gene unless interactions.include?(interacting_gene)
    end

    interactions
  end

  def expand_interactome(interactome)
    expanded_interactome = {}

    interactome.each do |query_gene, interactions|
      expanded_interactome[query_gene] = {}

      interactions.each do |gene|
        expanded_interactome[query_gene][gene] = fetch_interactions(gene).uniq
      end
    end

    expanded_interactome
  end
end




:expand_interactome

In [9]:
# Example usage
#gene_list = ['Q9SYX2', 'Q41932', 'Q94BM7','Q8GWK5','F4K0A6', 'Q41932','Q9LPL5','Q39057']
interactome_builder = InteractomeBuilder.new(protein_ids)
interactome = interactome_builder.build_interactome

# Print the interactome (for demonstration purposes)
interactome.each do |query_gene, second_level_interactions|
  puts "**1st-level**: #{query_gene} interacts with:"
  second_level_interactions.each do |second_gene, third_level_interactions|
    puts "2nd-level:  #{second_gene}, which interacts with:  
    3rd-level: #{third_level_interactions.join(', ')}"
  end
end

**1st-level**: Q9SZ42 interacts with:
**1st-level**: F4K0A6 interacts with:
**1st-level**: Q9LPL5 interacts with:
**1st-level**: Q56YA5 interacts with:
**1st-level**: Q41932 interacts with:
**1st-level**: Q8GWK5 interacts with:
**1st-level**: Q8S926 interacts with:
**1st-level**: Q9SSS9 interacts with:
**1st-level**: Q9SHE8 interacts with:
**1st-level**: Q9M8L2 interacts with:
**1st-level**: Q9LHD9 interacts with:
**1st-level**: Q9SUI4 interacts with:
**1st-level**: Q9ZNZ7 interacts with:
**1st-level**: Q8L7V3 interacts with:
**1st-level**: Q9C7N5 interacts with:
**1st-level**: Q9SA56 interacts with:
**1st-level**: O23553 interacts with:
**1st-level**: Q9ZUC2 interacts with:
**1st-level**: Q9SYX2 interacts with:
2nd-level:  P43254, which interacts with:  
    3rd-level: Q9LJR3, Q94BM7, Q9SYX2, Q9SPL2, Q9FE22, Q39057, O24646, Q96524, Q43125
2nd-level:  O50055, which interacts with:  
    3rd-level: Q9SYX2, Q9LJR3, Q94BM7
**1st-level**: P46600 interacts with:
2nd-level:  O04292, which in

{"Q9SZ42"=>{}, "F4K0A6"=>{}, "Q9LPL5"=>{}, "Q56YA5"=>{}, "Q41932"=>{}, "Q8GWK5"=>{}, "Q8S926"=>{}, "Q9SSS9"=>{}, "Q9SHE8"=>{}, "Q9M8L2"=>{}, "Q9LHD9"=>{}, "Q9SUI4"=>{}, "Q9ZNZ7"=>{}, "Q8L7V3"=>{}, "Q9C7N5"=>{}, "Q9SA56"=>{}, "O23553"=>{}, "Q9ZUC2"=>{}, "Q9SYX2"=>{"P43254"=>["Q9LJR3", "Q94BM7", "Q9SYX2", "Q9SPL2", "Q9FE22", "Q39057", "O24646", "Q96524", "Q43125"], "O50055"=>["Q9SYX2", "Q9LJR3", "Q94BM7"]}, "P46600"=>{"O04292"=>["P46600", "Q9LEZ3", "Q9SAD4"]}, "Q8L774"=>{}, "Q9FVR6"=>{}, "Q8RV79"=>{}, "Q9SN90"=>{}, "Q42569"=>{}, "Q9FME8"=>{}, "Q9SJU4"=>{}, "Q9S9N5"=>{}, "Q8VYZ2"=>{}, "Q8H1Q1"=>{}, "Q8W0Y8"=>{}, "Q9M811"=>{}, "Q8H0U5"=>{}, "Q8H112"=>{}, "P11035"=>{"Q84MB2"=>["Q9FHZ1", "Q8L9Y3", "Q9LV27", "O80513", "O80397", "P11035"], "O80931"=>["O04479", "P11035"]}, "Q9SIA5"=>{}, "Q5XF07"=>{}, "O49675"=>{}, "Q9FL44"=>{}, "Q9LV77"=>{}, "P27140"=>{}, "Q8VYU7"=>{}}

In [10]:
class NetworkCounter
  def initialize(interactome)
    @interactome = interactome
    @visited_genes = []
  end

  def count_networks
    networks = 0

    @interactome.each do |query_gene, interactions|
      next if interactions.empty?
      next if @visited_genes.include?(query_gene) # Skip if the gene has already been visited

      # Perform depth-first search to mark all genes in the same network
      dfs(query_gene)
      networks += 1
    end

    networks
  end

  private

  def dfs(gene)
    return if @visited_genes.include?(gene)

    @visited_genes << gene

    @interactome[gene]&.each do |neighbor_gene, _|
      dfs(neighbor_gene)
    end
  end
end

# Example usage
network_counter = NetworkCounter.new(interactome)
networks = network_counter.count_networks

puts "Number of networks with more than one gene: #{networks}"


Number of networks with more than one gene: 3


In [26]:
require 'rest-client'
require 'json'

class InteractomeProcessor
  attr_reader :networks

  def initialize(interactome)
    @interactome = interactome
    @lists = create_lists
    @networks = []
  end

   def create_lists
    lists = []

    @interactome.each do |outer_key, inner_hash|
      current_list = [outer_key]

      inner_hash.each do |inner_key, inner_values|
        current_list << inner_key
        current_list.concat(inner_values)
      end

      # Ensure unique protein IDs within each list
      current_list.uniq!

      lists << current_list
    end

    lists.reject! { |list| list.size <= 1 } # Remove lists with only one gene
    lists
  end

  def join
    joined_lists = []

    @lists.each do |list1|
      joined = false

      joined_lists.each do |list2|
        if (list1 & list2).any?
          list2.concat(list1).uniq!
          joined = true
          break
        end
      end

      joined_lists << list1 unless joined
    end

    joined_lists
  end

  def process
    @networks = join
    @networks.reject! { |network| network.size <= 1 } # Remove networks with only one gene
  end

  def print_network_report
    puts "#{@networks.size} networks have been identified:"

    @networks.each_with_index do |network, index|
      puts "Network #{index + 1}:"
      print_components(network)
    
      # The KEGG annotations will be handled by InteractomeAnnotator
      puts "\n"
    end
  end

  private

  def print_components(network)
    puts "Components:"
    network.each do |gene|
      puts "#{gene}"
    end
  end


end



:print_components

In [27]:
# Create an instance of InteractomeProcessor
processor = InteractomeProcessor.new(interactome)

# Process the interactome
processor.process

# Print the network report
processor.print_network_report

# Access the networks from the processor
networks = processor.networks



3 networks have been identified:
Network 1:
Components:
Q9SYX2
P43254
Q9LJR3
Q94BM7
Q9SPL2
Q9FE22
Q39057
O24646
Q96524
Q43125
O50055

Network 2:
Components:
P46600
O04292
Q9LEZ3
Q9SAD4

Network 3:
Components:
P11035
Q84MB2
Q9FHZ1
Q8L9Y3
Q9LV27
O80513
O80397
O80931
O04479



[["Q9SYX2", "P43254", "Q9LJR3", "Q94BM7", "Q9SPL2", "Q9FE22", "Q39057", "O24646", "Q96524", "Q43125", "O50055"], ["P46600", "O04292", "Q9LEZ3", "Q9SAD4"], ["P11035", "Q84MB2", "Q9FHZ1", "Q8L9Y3", "Q9LV27", "O80513", "O80397", "O80931", "O04479"]]

In [61]:
require 'rest-client'
require 'json'

class InteractomeAnnotator
  def initialize(networks)
    @networks = networks
    @go_annotations = Hash.new { |hash, key| hash[key] = { count: 0, genes: [] } }
    @kegg_annotations = Hash.new { |hash, key| hash[key] = { count: 0, genes: [], pathway_id: nil, pathway_description: nil } }
  end

  def annotate_with_go_and_kegg
    annotate_networks
    print_network_report
  end

  private

  def retrieve_go_annotations(protein_id)
    address = "http://togows.dbcls.jp/entry/uniprot/#{protein_id}/dr.json"
    response = RestClient::Request.execute(method: :get, url: address)
    data = JSON.parse(response.body)

    go_terms = data[0]["GO"] if data[0]["GO"]
    go_terms.each_with_object({}) do |go, hash|
      next unless (go[2] =~ /IDA:/) || (go[2] =~ /IMP:/)

      go_id = go[0]
      go_name = go[1]

      hash[go_id] = go_name
    end if go_terms.is_a?(Array)
  rescue RestClient::Exception => e
    puts "Error retrieving GO annotations for #{protein_id}: #{e.message}"
    {}
  end

  def retrieve_kegg_annotations(gene_id)
    puts "Retrieving KEGG annotations for gene #{gene_id}"

    address = "http://togows.dbcls.jp/entry/uniprot/#{gene_id}/dr.json"
    response = RestClient::Request.execute(method: :get, url: address)
    data = JSON.parse(response.body)

    if data[0].nil?
      puts "No KEGG annotations found for gene #{gene_id}."
      return []
    end

    kegg_terms = data[0]["KEGG"] if data[0]["KEGG"]

    kegg_annotations = kegg_terms.each_with_object([]) do |kegg_info, result|
      kegg_id = kegg_info[0]
      address = "http://togows.org/entry/kegg-genes/#{kegg_id}/pathways.json"

      begin
        response = RestClient::Request.execute({
          method: :get,
          url: address
        })
        data = JSON.parse(response.body)

        if data[0]&.any?
          data[0].each do |line|
            pathway_id = line[0]
            pathway_description = line[1]
            result << { pathway_id: pathway_id, pathway_description: pathway_description } 

          end
        end
      rescue RestClient::Exception => e
        puts "Error retrieving KEGG annotations for #{gene_id}: #{e.message}"
      end
    end
    puts "Retrieved KEGG annotations: #{kegg_annotations.inspect}"
    kegg_annotations
  end

 def annotate_gene(gene, network_index)
  go_terms = retrieve_go_annotations(gene)

  if go_terms.nil?
    puts "No GO annotations found for #{gene}."
    return
  end

  go_terms.each do |go_id, go_name|
    @go_annotations[go_id][:count] += 1
    @go_annotations[go_id][:genes] << { gene: gene, network: network_index + 1, go_name: go_name }
  end

  kegg_terms = retrieve_kegg_annotations(gene)

  if kegg_terms.nil?
    puts "No KEGG annotations found for #{gene}."
    return
  end

  kegg_terms.each do |kegg_entry|  # Change variable name to avoid conflict
    kegg_id = kegg_entry[:id]
    pathway_id = kegg_entry[:pathway_id]
    pathway_description = kegg_entry[:pathway_description]

    # Initialize @kegg_annotations for each KEGG ID
    @kegg_annotations[kegg_id] ||= { count: 0, genes: [], pathway_id: nil, pathway_description: nil }

    @kegg_annotations[kegg_id][:count] += 1
    @kegg_annotations[kegg_id][:pathway_id] = pathway_id
    @kegg_annotations[kegg_id][:pathway_description] = pathway_description
    @kegg_annotations[kegg_id][:genes] << { gene: gene, network: network_index + 1 }
  end
end

  def annotate_network(network, network_index)
    network.each do |gene|
      annotate_gene(gene, network_index)
    end
  end

  def annotate_networks
    @networks.each_with_index do |network, index|
      annotate_network(network, index)
    end
  end

  def print_network_report
    puts "Network Report:"
    puts " "
    @networks.each_with_index do |network, index|
      puts "Network #{index + 1}:"
      print_components(network)
      print_go_terms(network)
      print_kegg_terms(network)
      puts "\n"
    end
  end

  private

  def print_components(network)
    puts "Components:"
    network.each do |gene|
      puts "#{gene}"
    end
  end

  def print_go_terms(network)
    puts "GO terms:"
    sorted_go_annotations = @go_annotations.sort_by { |_, info| -info[:count] }

    sorted_go_annotations.each do |go_id, info|
      next unless info[:genes].any? { |gene_info| network.include?(gene_info[:gene]) }

      puts "#{go_id} (#{info[:count]} occurrences): #{info[:genes].first[:go_name]}"
      info[:genes].each do |gene_info|
        next unless network.include?(gene_info[:gene])

        puts "  Gene: #{gene_info[:gene]}"
      end
    end
  end

def print_kegg_terms(network)
  puts "KEGG terms:"
  sorted_kegg_annotations = @kegg_annotations.sort_by { |_, info| -info[:count] }

  sorted_kegg_annotations.each do |pathway_id, info|
    next unless info[:genes].any? { |gene_info| network.include?(gene_info[:gene]) }

    if info[:pathway_id] && info[:pathway_description]
      puts "#{info[:pathway_id]} (#{info[:count]} occurrences): #{info[:pathway_description]}"
      info[:genes].each do |gene_info|
        next unless network.include?(gene_info[:gene])

        puts "  Gene: #{gene_info[:gene]}"
      end
    else
      puts "Invalid KEGG entry for pathway_id: #{info[:pathway_id]}"
    end
  end
end
end


:print_kegg_terms

In [62]:

# Create an instance of InteractomeAnnotator
annotator = InteractomeAnnotator.new(networks)

# Annotate and print the network report
annotator.annotate_with_go_and_kegg

Retrieving KEGG annotations for gene Q9SYX2
Retrieved KEGG annotations: [{:pathway_id=>"ath04712", :pathway_description=>"Circadian rhythm - plant"}]
Retrieving KEGG annotations for gene P43254
Retrieved KEGG annotations: [{:pathway_id=>"ath04120", :pathway_description=>"Ubiquitin mediated proteolysis"}, {:pathway_id=>"ath04712", :pathway_description=>"Circadian rhythm - plant"}]
Retrieving KEGG annotations for gene Q9LJR3
Retrieved KEGG annotations: [{:pathway_id=>"ath04712", :pathway_description=>"Circadian rhythm - plant"}]
Retrieving KEGG annotations for gene Q94BM7
Retrieved KEGG annotations: [{:pathway_id=>"ath04712", :pathway_description=>"Circadian rhythm - plant"}]
Retrieving KEGG annotations for gene Q9SPL2
Retrieved KEGG annotations: []
Retrieving KEGG annotations for gene Q9FE22
Retrieved KEGG annotations: []
Retrieving KEGG annotations for gene Q39057
Retrieved KEGG annotations: [{:pathway_id=>"ath04712", :pathway_description=>"Circadian rhythm - plant"}]
Retrieving KEGG a

  Gene: Q43125
GO:0010117 (1 occurrences): P:photoprotection
  Gene: Q43125
GO:0099402 (1 occurrences): P:plant organ development
  Gene: Q43125
GO:1901529 (1 occurrences): P:positive regulation of anion channel activity
  Gene: Q43125
GO:1900426 (1 occurrences): P:positive regulation of defense response to bacterium
  Gene: Q43125
GO:1902448 (1 occurrences): P:positive regulation of shade avoidance
  Gene: Q43125
GO:1901672 (1 occurrences): P:positive regulation of systemic acquired resistance
  Gene: Q43125
GO:0046777 (1 occurrences): P:protein autophosphorylation
  Gene: Q43125
GO:0010468 (1 occurrences): P:regulation of gene expression
  Gene: Q43125
GO:0010310 (1 occurrences): P:regulation of hydrogen peroxide metabolic process
  Gene: Q43125
GO:2000377 (1 occurrences): P:regulation of reactive oxygen species metabolic process
  Gene: Q43125
GO:2000652 (1 occurrences): P:regulation of secondary cell wall biogenesis
  Gene: Q43125
GO:0051510 (1 occurrences): P:regulation of unidime

[["Q9SYX2", "P43254", "Q9LJR3", "Q94BM7", "Q9SPL2", "Q9FE22", "Q39057", "O24646", "Q96524", "Q43125", "O50055"], ["P46600", "O04292", "Q9LEZ3", "Q9SAD4"], ["P11035", "Q84MB2", "Q9FHZ1", "Q8L9Y3", "Q9LV27", "O80513", "O80397", "O80931", "O04479"]]

In [55]:
###NO SALE KEGG
require 'rest-client'
require 'json'

class MyInteractomeAnnotator
  def initialize(networks)
    @networks = networks
    @go_annotations = Hash.new { |hash, key| hash[key] = { count: 0, genes: [] } }
    @kegg_annotations = Hash.new { |hash, key| hash[key] = { count: 0, genes: [] } }
  end

  def annotate_with_go_and_kegg
    annotate_networks
    print_network_report
  end

  private

  def retrieve_go_annotations(protein_id)
    address = "http://togows.dbcls.jp/entry/uniprot/#{protein_id}/dr.json"
    response = RestClient::Request.execute(method: :get, url: address)
    data = JSON.parse(response.body)

    go_terms = data[0]["GO"] if data[0]["GO"]
    go_terms.each_with_object({}) do |go, hash|
      next unless (go[2] =~ /IDA:/) || (go[2] =~ /IMP:/)

      go_id = go[0]
      go_name = go[1]

      hash[go_id] = go_name
    end if go_terms.is_a?(Array)
  rescue RestClient::Exception => e
    puts "Error retrieving GO annotations for #{protein_id}: #{e.message}"
    {}
  end

  def retrieve_kegg_annotations(gene_id)
    address = "http://togows.dbcls.jp/entry/uniprot/#{gene_id}/dr.json"
    response = RestClient::Request.execute(method: :get, url: address)
    data = JSON.parse(response.body)
    kegg_terms = data[0]["KEGG"] if data[0]["KEGG"]

    kegg_terms.each_with_object([]) do |kegg_info, result|
      kegg_id = kegg_info[0]

      address = "http://togows.org/entry/kegg-genes/#{kegg_id}/pathways.json"

      begin
        response = RestClient::Request.execute(method: :get, url: address)
        data = JSON.parse(response.body)

        result << { 'id' => kegg_id, 'description' => data[0] } if data[0]&.any?
      rescue RestClient::Exception => e
        puts "Error retrieving KEGG annotations for #{gene_id}: #{e.message}"
      end
    end
  end

  def annotate_gene(gene, network_index)
    go_terms = retrieve_go_annotations(gene)

    if go_terms.nil?
      puts "No GO annotations found for #{gene}."
      return
    end

    go_terms.each do |go_id, go_name|
      @go_annotations[go_id][:count] += 1
      @go_annotations[go_id][:genes] << { gene: gene, network: network_index + 1, go_name: go_name }
    end

    kegg_terms = retrieve_kegg_annotations(gene)

    if kegg_terms.nil?
      puts "No KEGG annotations found for #{gene}."
      return
    end

    kegg_terms.each do |kegg_info|
      kegg_id = kegg_info['id']
      kegg_data = kegg_info['description']

      @kegg_annotations[kegg_id][:count] += 1
      @kegg_annotations[kegg_id][:genes] << { gene: gene, network: network_index + 1, kegg_data: kegg_data }
    end
  end

  def annotate_network(network, network_index)
    network.each do |gene|
      annotate_gene(gene, network_index)
    end
  end

  def annotate_networks
    @networks.each_with_index do |network, index|
      annotate_network(network, index)
    end
  end

  private 
  
  def print_network_report
    puts "Network Report:"
    puts " "
    @networks.each_with_index do |network, index|
      puts "Network #{index + 1}:"
      print_components(network)
      print_go_terms(network)
      print_kegg_terms(network)
      puts "\n"
    end
  end

  private

  def print_components(network)
    puts "Components:"
    network.each do |gene|
      puts "#{gene}"
    end
  end

  def print_go_terms(network)
    puts "GO terms:"
    sorted_go_annotations = @go_annotations.sort_by { |_, info| -info[:count] }

    sorted_go_annotations.each do |go_id, info|
      next unless info[:genes].any? { |gene_info| network.include?(gene_info[:gene]) }

      puts "#{go_id} (#{info[:count]} occurrences): #{info[:genes].first[:go_name]}"
      info[:genes].each do |gene_info|
        next unless network.include?(gene_info[:gene])

        puts "  Gene: #{gene_info[:gene]}"
      end
    end
  end

def print_kegg_terms(network)
  puts "KEGG terms:"
  sorted_kegg_annotations = @kegg_annotations.sort_by { |_, info| -info[:count] }

  sorted_kegg_annotations.each do |kegg_id, info|
    next unless info[:genes].any? { |gene_info| network.include?(gene_info[:gene]) }

    puts "#{kegg_id} (#{info[:count]} occurrences): #{info[:pathway_description]}"
    info[:genes].each do |gene_info|
      next unless network.include?(gene_info[:gene])
      puts "  Gene: #{gene_info[:gene]}"
    end
  end
end

end

:print_kegg_terms

In [56]:
# Create an instance of InteractomeAnnotator
annotator = MyInteractomeAnnotator.new(networks)

# Annotate and print the network report
annotator.annotate_with_go_and_kegg

Network Report:
 
Network 1:
Components:
Q9SYX2
P43254
Q9LJR3
Q94BM7
Q9SPL2
Q9FE22
Q39057
O24646
Q96524
Q43125
O50055
GO terms:
GO:0005634 (12 occurrences): C:nucleus
  Gene: Q9SYX2
  Gene: P43254
  Gene: Q9FE22
  Gene: Q39057
  Gene: O24646
  Gene: Q96524
  Gene: Q43125
GO:0016604 (4 occurrences): C:nuclear body
  Gene: Q9SYX2
  Gene: P43254
  Gene: Q96524
  Gene: Q43125
GO:0009637 (4 occurrences): P:response to blue light
  Gene: Q9SYX2
  Gene: Q94BM7
  Gene: Q96524
  Gene: Q43125
GO:0005737 (3 occurrences): C:cytoplasm
  Gene: Q9SPL2
  Gene: Q96524
  Gene: Q43125
GO:0010218 (3 occurrences): P:response to far red light
  Gene: Q9FE22
  Gene: Q39057
  Gene: Q43125
GO:0009909 (3 occurrences): P:regulation of flower development
  Gene: Q39057
  Gene: Q96524
  Gene: O50055
GO:0009416 (3 occurrences): P:response to light stimulus
  Gene: Q96524
  Gene: Q43125
GO:0009640 (2 occurrences): P:photomorphogenesis
  Gene: Q9SYX2
  Gene: Q43125
GO:0010017 (2 occurrences): P:red or far-red light s

  Gene: Q9FHZ1
  Gene: O80931
  Gene: O04479
GO:0009416 (3 occurrences): P:response to light stimulus
  Gene: P11035
GO:0009944 (3 occurrences): P:polarity specification of adaxial/abaxial axis
  Gene: O80931
  Gene: O04479
GO:0009943 (2 occurrences): P:adaxial/abaxial axis specification
  Gene: O04479
GO:0003729 (1 occurrences): F:mRNA binding
  Gene: P11035
GO:0009703 (1 occurrences): F:nitrate reductase (NADH) activity
  Gene: P11035
GO:0008940 (1 occurrences): F:nitrate reductase activity
  Gene: P11035
GO:0042128 (1 occurrences): P:nitrate assimilation
  Gene: P11035
GO:0006809 (1 occurrences): P:nitric oxide biosynthetic process
  Gene: P11035
GO:0090610 (1 occurrences): P:bundle sheath cell fate specification
  Gene: Q9FHZ1
GO:0048366 (1 occurrences): P:leaf development
  Gene: Q9FHZ1
GO:0005768 (1 occurrences): C:endosome
  Gene: Q9LV27
GO:0016020 (1 occurrences): C:membrane
  Gene: Q9LV27
GO:0048571 (1 occurrences): P:long-day photoperiodism
  Gene: Q9LV27
GO:1900088 (1 occurr

[["Q9SYX2", "P43254", "Q9LJR3", "Q94BM7", "Q9SPL2", "Q9FE22", "Q39057", "O24646", "Q96524", "Q43125", "O50055"], ["P46600", "O04292", "Q9LEZ3", "Q9SAD4"], ["P11035", "Q84MB2", "Q9FHZ1", "Q8L9Y3", "Q9LV27", "O80513", "O80397", "O80931", "O04479"]]