Skip to content
This repository
Browse code

* Addition of the :required_attributes option to a biomart search. [C…

…loses #10]

* Cleanup and expansion of the test suite.
  • Loading branch information...
commit 3a2f0f541f8657630d750855bdd25ee36a81535c 1 parent 556462f
authored June 10, 2010
65  lib/biomart/dataset.rb
@@ -64,7 +64,7 @@ def list_attributes
64 64
     # optional arguments:
65 65
     # 
66 66
     #   {
67  
-    #     :timeout => integer,      # set a timeout length for the request (secs)
  67
+    #     :timeout => integer,     # set a timeout length for the request (secs)
68 68
     #     :filters => {}           # hash of key-value pairs (filter => search term)
69 69
     #   }
70 70
     def count( args={} )
@@ -72,6 +72,10 @@ def count( args={} )
72 72
         raise Biomart::ArgumentError, "You cannot federate a count query."
73 73
       end
74 74
       
  75
+      if args[:required_attributes]
  76
+        raise Biomart::ArgumentError, "The :required_attributes option is not allowed on count queries."
  77
+      end
  78
+      
75 79
       result = request(
76 80
         :method  => 'post',
77 81
         :url     => @url,
@@ -90,10 +94,11 @@ def count( args={} )
90 94
     # optional arguments:
91 95
     # 
92 96
     #   {
93  
-    #     :process_results => true/false,   # convert search results to object
94  
-    #     :timeout         => integer,      # set a timeout length for the request (secs)
95  
-    #     :filters         => {},           # hash of key-value pairs (filter => search term)
96  
-    #     :attributes      => [],           # array of attributes to retrieve
  97
+    #     :process_results     => true/false,   # convert search results to object
  98
+    #     :timeout             => integer,      # set a timeout length for the request (secs)
  99
+    #     :filters             => {},           # hash of key-value pairs (filter => search term)
  100
+    #     :attributes          => [],           # array of attributes to retrieve
  101
+    #     :required_attributes => [],           # array of attributes that are required
97 102
     #     :federate => [
98 103
     #       {
99 104
     #         :dataset    => Biomart::Dataset, # A dataset object to federate with
@@ -102,9 +107,13 @@ def count( args={} )
102 107
     #       }
103 108
     #     ]
104 109
     #   }
  110
+    #
105 111
     # Note, if you do not pass any filters or attributes arguments, the defaults 
106 112
     # for the dataset shall be used.
107 113
     #
  114
+    # Also, using the :required_attributes option - this performs AND logic and will require 
  115
+    # data to be returned in all of the listed attributes in order for it to be returned.
  116
+    #
108 117
     # By default will return a hash with the following:
109 118
     # 
110 119
     #   {
@@ -115,6 +124,10 @@ def count( args={} )
115 124
     # But with the :process_results option will return an array of hashes, 
116 125
     # where each hash represents a row of results (keyed by the attribute name).
117 126
     def search( args={} )
  127
+      if args[:required_attributes] and !args[:required_attributes].is_a?(Array)
  128
+        raise Biomart::ArgumentError, "The :required_attributes option must be passed as an array."
  129
+      end
  130
+      
118 131
       response = request(
119 132
         :method  => 'post',
120 133
         :url     => @url,
@@ -123,10 +136,50 @@ def search( args={} )
123 136
       )
124 137
       
125 138
       result = process_tsv( args, response )
  139
+      result = filter_data_rows( args, result ) if args[:required_attributes]
126 140
       result = conv_results_to_a_of_h( result ) if args[:process_results]
127 141
       return result
128 142
     end
129 143
     
  144
+    def filter_data_rows( args, result )
  145
+      # Get the list of attributes searched for...
  146
+      attributes = args[:attributes] ? args[:attributes] : []
  147
+      if attributes.empty?
  148
+        self.attributes.each do |name,attribute|
  149
+          if attribute.default?
  150
+            attributes.push(name)
  151
+          end
  152
+        end
  153
+      end
  154
+      
  155
+      # Work out which attribute positions we need to test...
  156
+      positions_to_test = []
  157
+      attributes.each_index do |index|
  158
+        if args[:required_attributes].include?(attributes[index])
  159
+          positions_to_test.push(index)
  160
+        end
  161
+      end
  162
+      
  163
+      # Now go through the results and filter out the unwanted data...
  164
+      filtered_data = []
  165
+      result[:data].each do |data_row|
  166
+        save_row_count = 0
  167
+        
  168
+        positions_to_test.each do |position|
  169
+          save_row_count = save_row_count + 1 unless data_row[position].nil?
  170
+        end
  171
+        
  172
+        if save_row_count == positions_to_test.size
  173
+          filtered_data.push(data_row)
  174
+        end
  175
+      end
  176
+      
  177
+      return {
  178
+        :headers => result[:headers],
  179
+        :data    => filtered_data
  180
+      }
  181
+    end
  182
+    
130 183
     # Utility function to build the Biomart query XML
131 184
     def generate_xml( args={} )
132 185
       biomart_xml = ""
@@ -204,7 +257,7 @@ def process_xml_args( args={} )
204 257
             raise Biomart::ArgumentError, "The :federate option must be passed as an array."
205 258
           end
206 259
 
207  
-          unless args[:federate].size === 1
  260
+          unless args[:federate].size == 1
208 261
             raise Biomart::ArgumentError, "Sorry, we can only federate two datasets at present.  This limitation shall be lifted in version 0.8 of biomart."
209 262
           end
210 263
 
215  test/test_biomart.rb
@@ -26,9 +26,11 @@ def setup
26 26
     end
27 27
     
28 28
     should "have basic metadata" do
  29
+      true_false  = [true,false]
29 30
       assert( @htgt_database.display_name, "Biomart::Database does not have a 'display name'." )
30 31
       assert( @htgt_database.name, "Biomart::Database does not have a 'name'." )
31 32
       assert( @htgt_database.visible != nil, "Biomart::Database does not have a 'visible' flag." )
  33
+      assert( true_false.include?( @htgt_database.visible? ), "Biomart::Database.visible? is not returning true/false." )
32 34
     end
33 35
     
34 36
     should "have datasets" do
@@ -45,6 +47,7 @@ def setup
45 47
       @kermits   = @htgt.datasets["kermits"]
46 48
       @ensembl   = @htgt.datasets["mmusculus_gene_ensembl"]
47 49
       @emma      = Biomart::Dataset.new( "http://www.emmanet.org/biomart", { :name => "strains" } )
  50
+      @dcc       = Biomart::Dataset.new( "http://www.i-dcc.org/biomart", { :name => "dcc" } )
48 51
     end
49 52
     
50 53
     should "have basic metadata" do
@@ -65,9 +68,34 @@ def setup
65 68
       assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
66 69
     end
67 70
     
68  
-    should "perform count/search queries" do
69  
-      perform_count_queries()
70  
-      perform_search_queries()
  71
+    should "perform count queries" do
  72
+      htgt_count = @htgt_targ.count()
  73
+      assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
  74
+      assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
  75
+
  76
+      htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
  77
+      assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
  78
+      assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
  79
+
  80
+      htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
  81
+      assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
  82
+      assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
  83
+    end
  84
+    
  85
+    should "perform search queries" do
  86
+      search = @htgt_trap.search()
  87
+      assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
  88
+      assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
  89
+
  90
+      search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
  91
+      assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
  92
+      assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
  93
+      assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
  94
+
  95
+      search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
  96
+      assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
  97
+      assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
  98
+      assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
71 99
     end
72 100
     
73 101
     should "perform search queries whilst altering the timeout property" do
@@ -115,7 +143,7 @@ def setup
115 143
     end
116 144
     
117 145
     should "perform federated search queries" do
118  
-      results = @htgt_targ.search(
  146
+      search_opts = {
119 147
         :filters => {
120 148
           "status" => [
121 149
             "Mice - Genotype confirmed", "Mice - Germline transmission",
@@ -130,43 +158,135 @@ def setup
130 158
             :attributes => []
131 159
           }
132 160
         ]
133  
-      )
  161
+      }
  162
+      
  163
+      results = @htgt_targ.search( search_opts )
134 164
       
135 165
       assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [federated search]" )
136 166
       assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [federated search]" )
137 167
       assert( results[:data][0].size === 3, "Biomart::Dataset.search[:data] is not returning 3 attributes. [federated search]" )
138 168
       assert( results[:headers].size === 3, "Biomart::Dataset.search[:headers] is not returning 3 elements. [federated search]" )
  169
+
  170
+      assert_raise( Biomart::ArgumentError ) { @htgt_targ.count( search_opts ) }
  171
+      
  172
+      assert_raise Biomart::ArgumentError do
  173
+        search_opts[:federate] = [
  174
+          {
  175
+            :dataset => "mmusculus_gene_ensembl",
  176
+            :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
  177
+            :attributes => []
  178
+          }
  179
+        ]
  180
+        results = @htgt_targ.search( search_opts )
  181
+      end
  182
+      
  183
+      assert_raise Biomart::ArgumentError do
  184
+        search_opts[:federate] = {
  185
+          :dataset => "mmusculus_gene_ensembl",
  186
+          :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
  187
+          :attributes => []
  188
+        }
  189
+        results = @htgt_targ.search( search_opts )
  190
+      end
  191
+      
  192
+      assert_raise Biomart::ArgumentError do
  193
+        search_opts[:federate] = [
  194
+          {
  195
+            :dataset => @ensembl,
  196
+            :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
  197
+            :attributes => []
  198
+          },
  199
+          {
  200
+            :dataset => @ensembl,
  201
+            :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
  202
+            :attributes => []
  203
+          }
  204
+        ]
  205
+        results = @htgt_targ.search( search_opts )
  206
+      end
  207
+    end
  208
+    
  209
+    should "perform search queries with the :required_attributes option" do
  210
+      search_opts = {
  211
+        :filters => {
  212
+          "chromosome_name" => "1",
  213
+          "start"           => "1",
  214
+          "end"             => "10000000"
  215
+        },
  216
+        :attributes => [
  217
+          "ensembl_gene_id", "ensembl_transcript_id",
  218
+          "mouse_paralog_ensembl_gene", "mouse_paralog_chromosome"
  219
+        ],
  220
+        :required_attributes => ["mouse_paralog_ensembl_gene"]
  221
+      }
  222
+      
  223
+      results = @ensembl.search( search_opts )
  224
+      
  225
+      assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [required_attributes search]" )
  226
+      assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [required_attributes search]" )
  227
+      results[:data].each do |data_row|
  228
+        assert_equal( false, data_row[2].nil?, "The required_attributes search has not filtered out nil values." )
  229
+      end
  230
+      
  231
+      assert_raise( Biomart::ArgumentError ) { @ensembl.count( search_opts ) }
  232
+      assert_raise Biomart::ArgumentError do
  233
+        search_opts[:required_attributes] = "mouse_paralog_ensembl_gene"
  234
+        @ensembl.search( search_opts )
  235
+      end
  236
+      
  237
+      results = @dcc.search(
  238
+        :filters => {
  239
+          "marker_symbol" => [
  240
+            "Lrrc32", "Dub3", "Hs3st4", "Hs3st4", "Hs3st4", "Hs3st4",
  241
+            "Hs3st4", "Hs3st4", "Hs3st4", "Tcrg-C", "Gm5195", "Gm5198",
  242
+            "Gm5199", "Gm5625", "Rpl13-ps2", "Gm5664", "Gm5928", "Gm6035",
  243
+            "Gm6049"
  244
+          ]
  245
+        },
  246
+        :required_attributes => ["ikmc_project","ikmc_project_id"],
  247
+        :process_results => true
  248
+      )
  249
+      
  250
+      results.each do |data_row|
  251
+        assert_equal( false, data_row["ikmc_project"].nil?, "The required_attributes search has not filtered out nil values." )
  252
+        assert_equal( false, data_row["ikmc_project_id"].nil?, "The required_attributes search has not filtered out nil values." )
  253
+      end
139 254
     end
140 255
   end
141 256
   
142  
-  def perform_count_queries()
143  
-    htgt_count = @htgt_targ.count()
144  
-    assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
145  
-    assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
146  
-    
147  
-    htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
148  
-    assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
149  
-    assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
150  
-    
151  
-    htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
152  
-    assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
153  
-    assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
  257
+  context "A Biomart::Attribute instance" do
  258
+    setup do
  259
+      @kermits = @htgt.datasets["kermits"]
  260
+    end
  261
+    
  262
+    should "have basic metadata" do
  263
+      true_false  = [true,false]
  264
+      ens_gene_id = @kermits.attributes["ensembl_gene_id"]
  265
+      
  266
+      assert( !ens_gene_id.name.nil?, "Biomart::Attribute.name is nil." )
  267
+      assert( !ens_gene_id.display_name.nil?, "Biomart::Attribute.display_name is nil." )
  268
+      
  269
+      assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Attribute.hidden? is not returning true/false." )
  270
+      assert( true_false.include?( ens_gene_id.default? ), "Biomart::Attribute.default? is not returning true/false." )
  271
+    end
154 272
   end
155 273
   
156  
-  def perform_search_queries()
157  
-    search = @htgt_trap.search()
158  
-    assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
159  
-    assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
160  
-    
161  
-    search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
162  
-    assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
163  
-    assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
164  
-    assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
165  
-    
166  
-    search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
167  
-    assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
168  
-    assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
169  
-    assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
  274
+  context "A Biomart::Filter instance" do
  275
+    setup do
  276
+      @kermits = @htgt.datasets["kermits"]
  277
+    end
  278
+    
  279
+    should "have basic metadata" do
  280
+      true_false  = [true,false]
  281
+      ens_gene_id = @kermits.filters["ensembl_gene_id"]
  282
+      
  283
+      assert( !ens_gene_id.name.nil?, "Biomart::Filter.name is nil." )
  284
+      assert( !ens_gene_id.display_name.nil?, "Biomart::Filter.display_name is nil." )
  285
+      
  286
+      assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Filter.hidden? is not returning true/false." )
  287
+      assert( true_false.include?( ens_gene_id.default? ), "Biomart::Filter.default? is not returning true/false." )
  288
+      assert( true_false.include?( ens_gene_id.multiple_values? ), "Biomart::Filter.multiple_values? is not returning true/false." )
  289
+    end
170 290
   end
171 291
   
172 292
   context "The Biomart module" do
@@ -184,33 +304,13 @@ def perform_search_queries()
184 304
     end
185 305
     
186 306
     should "handle user/configuration errors (i.e. incorrect URLs etc)" do
187  
-      begin
188  
-        @not_biomart.list_databases
189  
-      rescue Biomart::HTTPError => e
190  
-        http_error = e
191  
-      end
192  
-      
193  
-      assert( http_error.is_a?( Biomart::HTTPError ), "Biomart.request is not processing HTTP errors correctly." )
  307
+      assert_raise( Biomart::HTTPError ) { @not_biomart.list_databases }
194 308
     end
195 309
     
196 310
     should "handle biomart server errors gracefully" do
197  
-      begin
198  
-        @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } )
199  
-      rescue Biomart::FilterError => e
200  
-        filter_error = e
201  
-      end
202  
-      
203  
-      begin
204  
-        @htgt_targ.search( :attributes => ["wibbleblibbleblip"] )
205  
-      rescue Biomart::AttributeError => e
206  
-        attribute_error = e
207  
-      end
208  
-      
209  
-      begin
210  
-        @bad_dataset.count()
211  
-      rescue Biomart::DatasetError => e
212  
-        dataset_error = e
213  
-      end
  311
+      assert_raise( Biomart::FilterError )    { @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } ) }
  312
+      assert_raise( Biomart::AttributeError ) { @htgt_targ.search( :attributes => ["wibbleblibbleblip"] ) }
  313
+      assert_raise( Biomart::DatasetError )   { @bad_dataset.count() }
214 314
       
215 315
       begin
216 316
         @bad_dataset.count()
@@ -218,10 +318,7 @@ def perform_search_queries()
218 318
         general_error = e
219 319
       end
220 320
       
221  
-      assert( filter_error.is_a?( Biomart::FilterError ), "Biomart.request is not handling Biomart filter errors correctly." )
222  
-      assert( attribute_error.is_a?( Biomart::AttributeError ), "Biomart.request is not handling Biomart attribute errors correctly." )
223  
-      assert( dataset_error.is_a?( Biomart::DatasetError ), "Biomart.request is not handling Biomart dataset errors correctly." )
224  
-      assert( general_error.is_a?( Biomart::BiomartError ), "Biomart.request is not handling general Biomart errors correctly." )
  321
+      assert( general_error.is_a?(Biomart::BiomartError), "Biomart.request is not handling general Biomart errors correctly." )
225 322
     end
226 323
   end
227 324
 end

0 notes on commit 3a2f0f5

Please sign in to comment.
Something went wrong with that request. Please try again.