Changing the name for the new statisticaltest resource

bigmlcom · Aug 25, 2015 · 5a74ef6 · 5a74ef6
1 parent a94e860
commit 5a74ef6
Show file tree

Hide file tree

Showing 12 changed files with 309 additions and 265 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -9,6 +9,8 @@ History
 - Adding REST methods to manage new type of resource: correlations.
 - Adding REST methods to manage new type of resource: tests.
 - Adding min and max values predictions for regression models and ensembles.
+- Fixing bug: Fields object was not retrieving objective id from the
+  resource info.
 
 4.1.7 (2015-08-15)
 ~~~~~~~~~~~~~~~~~~

diff --git a/bigml/api.py b/bigml/api.py
@@ -57,7 +57,7 @@
 from bigml.projecthandler import ProjectHandler
 from bigml.samplehandler import SampleHandler
 from bigml.correlationhandler import CorrelationHandler
-from bigml.testhandler import TestHandler
+from bigml.statisticaltesthandler import StatisticalTestHandler
 
 # Repeating constants and functions for backwards compatibility
 
@@ -79,7 +79,7 @@
     BATCH_PREDICTION_PATH, CLUSTER_PATH, CENTROID_PATH, BATCH_CENTROID_PATH,
     ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH,
     SAMPLE_PATH, SAMPLE_RE, CORRELATION_PATH, CORRELATION_RE,
-    TEST_PATH, TEST_RE)
+    STATISTICAL_TEST_PATH, STATISTICAL_TEST_RE)
 
 
 from bigml.resourcehandler import (
@@ -89,7 +89,7 @@
     get_prediction_id, get_batch_prediction_id, get_batch_centroid_id,
     get_batch_anomaly_score_id, get_resource_id, resource_is_ready,
     get_status, check_resource, http_ok, get_project_id, get_sample_id,
-    get_correlation_id, get_test_id)
+    get_correlation_id, get_statistical_test_id)
 
 # Map status codes to labels
 STATUSES = {
@@ -114,7 +114,7 @@ def count(listing):
         return listing['meta']['query_total']
 
 
-class BigML(TestHandler, CorrelationHandler, SampleHandler, ProjectHandler,
+class BigML(StatisticalTestHandler, CorrelationHandler, SampleHandler, ProjectHandler,
             BatchAnomalyScoreHandler, BatchCentroidHandler,
             BatchPredictionHandler, EvaluationHandler, AnomalyScoreHandler,
             AnomalyHandler, CentroidHandler, ClusterHandler, PredictionHandler,
@@ -177,7 +177,7 @@ def __init__(self, username=None, api_key=None, dev_mode=False,
         ProjectHandler.__init__(self)
         SampleHandler.__init__(self)
         CorrelationHandler.__init__(self)
-        TestHandler.__init__(self)
+        StatisticalTestHandler.__init__(self)
 
         self.getters = {}
         for resource_type in RESOURCE_RE:
@@ -233,8 +233,8 @@ def _get_fields_key(resource):
                     return resource['object']['clusters']['fields']
                 elif CORRELATION_RE.match(resource_id):
                     return resource['object']['correlations']['fields']
-                elif TEST_RE.match(resource_id):
-                    return resource['object']['tests']['fields']
+                elif STATISTICAL_TEST_RE.match(resource_id):
+                    return resource['object']['statistical_tests']['fields']
                 elif SAMPLE_RE.match(resource_id):
                     return dict([(field['id'], field) for field in
                                  resource['object']['sample']['fields']])

diff --git a/bigml/fields.py b/bigml/fields.py
@@ -56,11 +56,11 @@
 ANOMALY_TYPE = 'anomaly'
 SAMPLE_TYPE = 'sample'
 CORRELATION_TYPE = 'correlation'
-TEST_TYPE = 'test'
+STATISTICAL_TEST_TYPE = 'statisticaltest'
 
 RESOURCES_WITH_FIELDS = [SOURCE_TYPE, DATASET_TYPE, MODEL_TYPE,
                          PREDICTION_TYPE, CLUSTER_TYPE, ANOMALY_TYPE,
-                         SAMPLE_TYPE, CORRELATION_TYPE, TEST_TYPE]
+                         SAMPLE_TYPE, CORRELATION_TYPE, STATISTICAL_TEST_TYPE]
 DEFAULT_MISSING_TOKENS = ["", "N/A", "n/a", "NULL", "null", "-", "#DIV/0",
                           "#REF!", "#NAME?", "NIL", "nil", "NA", "na",
                           "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?"]
@@ -93,16 +93,26 @@ def get_fields_structure(resource):
             fields = resource['object']['clusters']['fields']
         elif resource_type == CORRELATION_TYPE:
             fields = resource['object']['correlations']['fields']
-        elif resource_type == TEST_TYPE:
-            fields = resource['object']['tests']['fields']
+        elif resource_type == STATISTICAL_TEST_TYPE:
+            fields = resource['object']['statistical_tests']['fields']
         elif resource_type == SAMPLE_TYPE:
             fields = dict([(field['id'], field) for field in
                            resource['object']['sample']['fields']])
         else:
             fields = resource['object']['fields']
-        return fields, resource_locale, missing_tokens
+        # Check whether there's an objective id
+        objective_column = None
+        if resource_type == DATASET_TYPE:
+            objective_column = resource['object'].get( \
+                'objective_field', {}).get('id')
+        elif resource_type == MODEL_TYPE:
+            objective_id = resource['object'].get( \
+                'objective_fields', [None])[0]
+            objective_column = fields.get( \
+                objective_id, {}).get('column_number')
+        return fields, resource_locale, missing_tokens, objective_column
     else:
-        return None, None, None
+        return None, None, None, None
 
 
 class Fields(object):
@@ -121,7 +131,8 @@ def __init__(self, resource_or_fields, missing_tokens=None,
             resource_info = get_fields_structure(resource_or_fields)
             (self.fields,
              resource_locale,
-             resource_missing_tokens) = resource_info
+             resource_missing_tokens,
+             objective_column) = resource_info
             if data_locale is None:
                 data_locale = resource_locale
             if missing_tokens is None:
@@ -135,6 +146,7 @@ def __init__(self, resource_or_fields, missing_tokens=None,
                 data_locale = DEFAULT_LOCALE
             if missing_tokens is None:
                 missing_tokens = DEFAULT_MISSING_TOKENS
+            objective_column = None
         if self.fields is None:
             raise ValueError("No fields structure was found.")
         self.fields_by_name = invert_dictionary(self.fields, 'name')
@@ -152,6 +164,11 @@ def __init__(self, resource_or_fields, missing_tokens=None,
         self.objective_field = None
         self.objective_field_present = None
         self.filtered_indexes = None
+        # if the objective field is not set by the user
+        # use the one extracted from the resource info
+        if not objective_field and objective_column is not None:
+            objective_field = objective_column
+            objective_field_present = True
         self.update_objective_field(objective_field, objective_field_present)
 
     def update_objective_field(self, objective_field, objective_field_present,

diff --git a/bigml/resourcehandler.py b/bigml/resourcehandler.py
@@ -44,7 +44,7 @@
 PROJECT_PATH = 'project'
 SAMPLE_PATH = 'sample'
 CORRELATION_PATH = 'correlation'
-TEST_PATH = 'test'
+STATISTICAL_TEST_PATH = 'statisticaltest'
 
 # Resource Ids patterns
 ID_PATTERN = '[a-f0-9]{24}'
@@ -72,7 +72,8 @@
 PROJECT_RE = re.compile(r'^%s/%s$' % (PROJECT_PATH, ID_PATTERN))
 SAMPLE_RE = re.compile(r'^%s/%s$' % (SAMPLE_PATH, ID_PATTERN))
 CORRELATION_RE = re.compile(r'^%s/%s$' % (CORRELATION_PATH, ID_PATTERN))
-TEST_RE = re.compile(r'^%s/%s$' % (TEST_PATH, ID_PATTERN))
+STATISTICAL_TEST_RE = re.compile(r'^%s/%s$' % \
+    (STATISTICAL_TEST_PATH, ID_PATTERN))
 
 RESOURCE_RE = {
     'source': SOURCE_RE,
@@ -91,13 +92,15 @@
     'project': PROJECT_RE,
     'sample': SAMPLE_RE,
     'correlation': CORRELATION_RE,
-    'test': TEST_RE}
+    'statisticaltest': STATISTICAL_TEST_RE}
 
 RENAMED_RESOURCES = {
     'batchprediction': 'batch_prediction',
     'batchcentroid': 'batch_centroid',
     'anomalyscore': 'anomaly_score',
-    'batchanomalyscore': 'batch_anomaly_score'}
+    'batchanomalyscore': 'batch_anomaly_score',
+    'statisticaltest': 'statistical_test'
+}
 
 NO_QS = [EVALUATION_RE, PREDICTION_RE, BATCH_PREDICTION_RE,
          CENTROID_RE, BATCH_CENTROID_RE, ANOMALY_SCORE_RE,
@@ -296,11 +299,11 @@ def get_correlation_id(correlation):
     return get_resource(CORRELATION_RE, correlation)
 
 
-def get_test_id(test):
-    """Returns a test/id.
+def get_statistical_test_id(statistical_test):
+    """Returns a statisticaltest/id.
 
     """
-    return get_resource(TEST_RE, test)
+    return get_resource(STATISTICAL_TEST_RE, statistical_test)
 
 
 def get_resource_id(resource):

diff --git a/bigml/statisticaltesthandler.py b/bigml/statisticaltesthandler.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+#
+# Copyright 2015 BigML
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""Base class for statisticaltests' REST calls
+
+   https://bigml.com/developers/statisticaltests
+
+"""
+
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
+
+from bigml.resourcehandler import ResourceHandler
+from bigml.resourcehandler import (check_resource_type,
+                                   get_statistical_test_id, get_resource_type,
+                                   get_dataset_id, check_resource)
+from bigml.resourcehandler import (STATISTICAL_TEST_PATH, DATASET_PATH,
+                                   TINY_RESOURCE)
+
+
+class StatisticalTestHandler(ResourceHandler):
+    """This class is used by the BigML class as
+       a mixin that provides the statistical tests' REST calls. It should not
+       be instantiated independently.
+
+    """
+    def __init__(self):
+        """Initializes the StatisticalTestHandler. This class is intended to be
+           used as a mixin on ResourceHandler, that inherits its
+           attributes and basic method from BigMLConnection, and must not be
+           instantiated independently.
+
+        """
+        self.statistical_test_url = self.url + STATISTICAL_TEST_PATH
+
+    def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10):
+        """Creates a statistical test from a `dataset`.
+
+        """
+        dataset_id = None
+        resource_type = get_resource_type(dataset)
+        if resource_type == DATASET_PATH:
+            dataset_id = get_dataset_id(dataset)
+            check_resource(dataset_id,
+                           query_string=TINY_RESOURCE,
+                           wait_time=wait_time, retries=retries,
+                           raise_on_error=True, api=self)
+        else:
+            raise Exception("A dataset id is needed to create a"
+                            " statistical test. %s found." % resource_type)
+
+        create_args = {}
+        if args is not None:
+            create_args.update(args)
+        create_args.update({
+            "dataset": dataset_id})
+
+        body = json.dumps(create_args)
+        return self._create(self.statistical_test_url, body)
+
+    def get_statistical_test(self, statistical_test, query_string=''):
+        """Retrieves a statistical test.
+
+           The statistical test parameter should be a string containing the
+           statisticaltest id or the dict returned by create_statistical_test.
+           As an statistical test is an evolving object that is processed
+           until it reaches the FINISHED or FAULTY state, the function will
+           return a dict that encloses the statistical test values and state
+           info available at the time it is called.
+        """
+        check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
+                            message="A statistical test id is needed.")
+        statistical_test_id = get_statistical_test_id(statistical_test)
+        if statistical_test_id:
+            return self._get("%s%s" % (self.url, statistical_test_id),
+                             query_string=query_string)
+
+    def list_statistical_tests(self, query_string=''):
+        """Lists all your statistical tests.
+
+        """
+        return self._list(self.statistical_test_url, query_string)
+
+    def update_statistical_test(self, statistical_test, changes):
+        """Updates an statistical test.
+
+        """
+        check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
+                            message="A statistical test id is needed.")
+        statistical_test_id = get_statistical_test_id(statistical_test)
+        if statistical_test_id:
+            body = json.dumps(changes)
+            return self._update("%s%s" % (self.url, statistical_test_id), body)
+
+    def delete_statistical_test(self, statistical_test):
+        """Deletes a statistical test.
+
+        """
+        check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
+                            message="A statistical test id is needed.")
+        statistical_test_id = get_statistical_test_id(statistical_test)
+        if statistical_test_id:
+            return self._delete("%s%s" % (self.url, statistical_test_id))