In [2]:
###------ Read the genefile and append genes to a file -----###

gene_list = Array.new
File.foreach("./documents/ArabidopsisSubNetwork_GeneList.txt"){ |line|
    gene = line.gsub("\n",'')        # We eliminate metacharacter \n
    unless gene.match(/AT\dG\d{5}/i) # Check if genes belong to Arabidopsis and save each gene in the array.
        abort("ERROR: the gene list have some errors. #{gene} has not correct format") 
    end
    gene_list <<  gene.upcase}              
                                
print gene_list

["AT4G27030", "AT5G54270", "AT1G21400", "AT5G19120", "AT2G13360", "AT4G05180", "AT1G22690", "AT2G45170", "AT4G09650", "AT5G55620", "AT1G31330", "AT1G80440", "AT3G28270", "AT4G12800", "AT5G04140", "AT5G64040", "AT1G29660", "AT1G15820", "AT1G64500", "AT1G03130", "AT2G20670", "AT4G17090", "AT1G23740", "AT2G46340", "AT4G17460", "AT1G07010", "AT1G32220", "AT2G03750", "AT3G47470", "AT4G12830", "AT5G05690", "AT5G64410", "AT2G21330", "AT1G15980", "AT2G01890", "AT1G12250", "AT2G21210", "AT4G28660", "AT1G52230", "AT3G18890", "AT4G22890", "AT1G11850", "AT1G37130", "AT2G04039", "AT3G48420", "AT4G19170", "AT5G07020", "AT5G65010", "AT3G01500", "AT1G25230", "AT2G34430", "AT1G18060", "AT2G30600", "AT4G33010", "AT1G65490", "AT3G21670", "AT4G27260", "AT1G12780", "AT1G55480", "AT2G17880", "AT3G49160", "AT4G21210", "AT5G14740", "AT4G37980", "AT1G29395", "AT2G39730", "AT1G20020", "AT2G34420", "AT4G37800", "AT1G70820", "AT3G26740", "AT4G28780", "AT1G12900", "AT1G64680", "AT2G25200", "AT3G55800", "AT4G21280"

In [3]:
require 'rest-client'

def self.find_interactions(gene_list)

  intact_hash = Hash.new

  gene_list.each do |gene_id|
    gene_id.upcase!       # Convert gene ID to upercase
    address = "http://www.ebi.ac.uk/Tools/webservices/psicquic/intact/webservices/current/search/interactor/#{gene_id}?format=tab25"
    response = RestClient::Request.execute(method: :get, url: address)
    intact_data = response.body.split(/\n/)

    record_list = Array.new

    intact_data.each do |record|
      intact_gene1 = record.split(/\t/)[2].split(/\|/).grep(/^ensemblplants/)  # We filter to extract ensembleplants id
      intact_gene2 = record.split(/\t/)[3].split(/\|/).grep(/^ensemblplants/)  # from columns 2 and 3 of each record.
      intact_genes = intact_gene1 + intact_gene2                               # Combine records from both columns. 

      score = record.split(/\t/).pop.split(/\:/).pop   # Extract the interaction score of each record.

      # For some genes, we obtain interaction reports for different splicing variants (.1, .2, .3)
      # For each gene record, we extract only the gene ID using regular expressions and convert gene IDs
      # into symbol format.

      intact_genes_filtered = Array.new
      intact_genes.each {|splicing| intact_genes_filtered << splicing.match(/AT\dG\d*/).to_s.to_sym}
        #if gene_list.include?(gene) then
        #end}
        
      # Finally, we remove duplicates from our final interaction records.

      intact_genes_filtered.uniq!

  ''' With this process, we obtain a set of inteaction records for each gene, each record containing one
      or more interactors. Different records might share common interactors and even contain the query gene.
      We want to obtain a final list of unique interactors for each query gene, so we need to remove interactor
      redundancy and interaction of the query gene with itself.
  '''
      # Introduce a quality filter

      unless score.to_f < 0.5
        record_list += intact_genes_filtered        # Combine the interactors from different records
        record_list.uniq!                           # Remove redundant interactors
        record_list.delete(gene_id.to_sym)          # Remove the query gene from its interactor list
        record_list.delete("".to_sym)               # Remove empty gene IDs (observed within the results)
        
        # Append the results for every query gene to a Hash
        intact_hash[gene_id.to_sym] = record_list unless record_list.empty?  
      end
    end

    #print "\n\n", gene_id.to_s, " interactors: ", record_list.length
    #print "\n", Hash[gene_id.to_sym => record_list]

  end

  return intact_hash
  # Results are contained in intact_hash
  # We can omit genes without reported interactions:
  # clean_interactions = first_interactions.delete_if {|key,value| value.empty? }
end

interaction_dict = find_interactions(gene_list)

{:AT4G09650=>[:AT4G37260, :AT4G32570], :AT5G04140=>[:AT4G37930], :AT1G03130=>[:AT4G37260], :AT4G17460=>[:AT1G70920, :AT2G44910, :AT3G60390, :AT3G15030, :AT2G22800], :AT5G05690=>[:AT3G54130], :AT1G37130=>[:AT5G18930, :AT1G31880, :AT4G32570, :AT2G37630, :AT5G60120], :AT4G19170=>[:AT5G17790], :AT5G15850=>[:AT2G46340, :AT3G15354, :AT1G53090, :AT4G10240], :AT5G17300=>[:AT5G24520], :AT1G54040=>[:AT4G23810], :AT3G21870=>[:AT3G48750], :AT3G48360=>[:AT4G09570], :AT2G43010=>[:AT1G09530, :AT2G18790, :AT1G09570], :AT1G71030=>[:AT4G09820, :AT1G60250], :AT3G45780=>[:AT5G64330], :AT3G62410=>[:AT3G26650], :AT1G76080=>[:AT5G19000, :AT5G67300, :AT1G31880], :AT4G02770=>[:AT4G37260, :AT1G31880], :AT4G39710=>[:AT4G03280], :AT3G62550=>[:AT1G25490, :AT4G32010], :AT1G14280=>[:AT1G35560, :AT1G31880]}

In [4]:
interaction_dict.values

[[:AT4G37260, :AT4G32570], [:AT4G37930], [:AT4G37260], [:AT1G70920, :AT2G44910, :AT3G60390, :AT3G15030, :AT2G22800], [:AT3G54130], [:AT5G18930, :AT1G31880, :AT4G32570, :AT2G37630, :AT5G60120], [:AT5G17790], [:AT2G46340, :AT3G15354, :AT1G53090, :AT4G10240], [:AT5G24520], [:AT4G23810], [:AT3G48750], [:AT4G09570], [:AT1G09530, :AT2G18790, :AT1G09570], [:AT4G09820, :AT1G60250], [:AT5G64330], [:AT3G26650], [:AT5G19000, :AT5G67300, :AT1G31880], [:AT4G37260, :AT1G31880], [:AT4G03280], [:AT1G25490, :AT4G32010], [:AT1G35560, :AT1G31880]]

Find interactions between proteins from our gene_list. We develop a recursive function that 

In [13]:
max_depth = 0

def self.build_networks(gene, interaction_dict)
 
    max_depth = max_depth + 1
    
    return [] if max_depth > 2
    
    network = [gene.to_sym]
    interactors = interaction_dict[gene.to_sym]

    unless interactors.nil?
      network += interactors
      interactors.each {|int| network += build_networks(int, interaction_dict)}
    end
    return network.uniq

end

network_list = Array.new


gene_list.each {|gene|
  network = build_networks(gene = gene, interaction_dict = interaction_dict)
  network_list << network
  print network
  puts
}

NoMethodError: undefined method `+' for nil:NilClass

In [22]:
network_list = Array.new


gene_list.each {|gene|
  network = build_networks(gene = gene, interaction_dict = interaction_dict)
  network_list << network
  print network
  puts
}




NoMethodError: undefined method `+' for nil:NilClass

In [19]:
network_list.each {|network| print network if network.length > 2}

[]