Adding REST API calls for samples

bigmlcom · Feb 12, 2015 · f983766 · f983766
1 parent 8e04727
commit f983766
Show file tree

Hide file tree

Showing 32 changed files with 291 additions and 8 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,11 @@
 History
 -------
 
+2.0.0 (2015-02-12)
+~~~~~~~~~~~~~~~~~~
+
+- Adding REST API calls for samples.
+
 1.10.8 (2015-02-10)
 ~~~~~~~~~~~~~~~~~~~
 

diff --git a/bigml/__init__.py b/bigml/__init__.py
@@ -1 +1 @@
-__version__ = '1.10.8'
+__version__ = '2.0.0'
diff --git a/bigml/api.py b/bigml/api.py
@@ -61,6 +61,7 @@
 from bigml.batchcentroidhandler import BatchCentroidHandler
 from bigml.batchanomalyscorehandler import BatchAnomalyScoreHandler
 from bigml.projecthandler import ProjectHandler
+from bigml.samplehandler import SampleHandler
 
 # Repeating constants and functions for backwards compatibility
 
@@ -80,7 +81,8 @@
     BATCH_ANOMALY_SCORE_RE, ANOMALY_SCORE_RE, PROJECT_RE, SOURCE_PATH,
     DATASET_PATH, MODEL_PATH, PREDICTION_PATH, EVALUATION_PATH, ENSEMBLE_PATH,
     BATCH_PREDICTION_PATH, CLUSTER_PATH, CENTROID_PATH, BATCH_CENTROID_PATH,
-    ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH)
+    ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH,
+    SAMPLE_PATH, SAMPLE_RE)
 
 
 from bigml.resourcehandler import (
@@ -89,7 +91,7 @@
     get_cluster_id, get_centroid_id, get_anomaly_id, get_anomaly_score_id,
     get_prediction_id, get_batch_prediction_id, get_batch_centroid_id,
     get_batch_anomaly_score_id, get_resource_id, resource_is_ready,
-    get_status, check_resource, http_ok, get_project_id)
+    get_status, check_resource, http_ok, get_project_id, get_sample_id)
 
 # Map status codes to labels
 STATUSES = {
@@ -133,13 +135,14 @@ def count(listing):
         return listing['meta']['query_total']
 
 
-class BigML(ProjectHandler, BatchAnomalyScoreHandler, BatchCentroidHandler,
+class BigML(SampleHandler, ProjectHandler, BatchAnomalyScoreHandler,
+            BatchCentroidHandler,
             BatchPredictionHandler, EvaluationHandler, AnomalyScoreHandler,
             AnomalyHandler, CentroidHandler, ClusterHandler, PredictionHandler,
             EnsembleHandler, ModelHandler, DatasetHandler,
             SourceHandler, ResourceHandler, BigMLConnection):
     """Entry point to create, retrieve, list, update, and delete
-    sources, datasets, models and predictions.
+    sources, datasets, models, predictions, evaluations, etc.
 
     Full API documentation on the API can be found from BigML at:
         https://bigml.com/developers
@@ -193,6 +196,7 @@ def __init__(self, username=None, api_key=None, dev_mode=False,
         BatchCentroidHandler.__init__(self)
         BatchAnomalyScoreHandler.__init__(self)
         ProjectHandler.__init__(self)
+        SampleHandler.__init__(self)
 
         self.getters = {}
         for resource_type in RESOURCE_RE:

diff --git a/bigml/resourcehandler.py b/bigml/resourcehandler.py
@@ -42,6 +42,7 @@
 ANOMALY_SCORE_PATH = 'anomalyscore'
 BATCH_ANOMALY_SCORE_PATH = 'batchanomalyscore'
 PROJECT_PATH = 'project'
+SAMPLE_PATH = 'sample'
 
 
 # Resource Ids patterns
@@ -67,7 +68,7 @@
 BATCH_ANOMALY_SCORE_RE = re.compile(r'^%s/%s$' % (BATCH_ANOMALY_SCORE_PATH,
                                                   ID_PATTERN))
 PROJECT_RE = re.compile(r'^%s/%s$' % (PROJECT_PATH, ID_PATTERN))
-
+SAMPLE_RE = re.compile(r'^%s/%s$' % (SAMPLE_PATH, ID_PATTERN))
 
 RESOURCE_RE = {
     'source': SOURCE_RE,
@@ -83,7 +84,8 @@
     'anomaly': ANOMALY_RE,
     'anomalyscore': ANOMALY_SCORE_RE,
     'batchanomalyscore': BATCH_ANOMALY_SCORE_RE,
-    'project': PROJECT_RE}
+    'project': PROJECT_RE,
+    'sample': SAMPLE_RE}
 
 RENAMED_RESOURCES = {
     'batchprediction': 'batch_prediction',
@@ -274,6 +276,13 @@ def get_project_id(project):
     return get_resource(PROJECT_RE, project)
 
 
+def get_sample_id(sample):
+    """Returns a sample/id.
+
+    """
+    return get_resource(SAMPLE_RE, sample)
+
+
 def get_resource_id(resource):
     """Returns the resource id if it falls in one of the registered types
 

diff --git a/bigml/samplehandler.py b/bigml/samplehandler.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+#
+# Copyright 2015 BigML
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""Base class for samples' REST calls
+
+   https://bigml.com/developers/samples
+
+"""
+
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
+
+from bigml.resourcehandler import ResourceHandler
+from bigml.resourcehandler import (check_resource_type, resource_is_ready,
+                                   get_sample_id, get_resource_type,
+                                   get_dataset_id, check_resource)
+from bigml.resourcehandler import (SAMPLE_PATH, DATASET_PATH, LOGGER,
+                                   TINY_RESOURCE)
+
+
+class SampleHandler(ResourceHandler):
+    """This class is used by the BigML class as
+       a mixin that provides the REST calls samples. It should not
+       be instantiated independently.
+
+    """
+    def __init__(self):
+        """Initializes the SampleHandler. This class is intended to be
+           used as a mixin on ResourceHandler, that inherits its
+           attributes and basic method from BigMLConnection, and must not be
+           instantiated independently.
+
+        """
+        self.sample_url = self.url + SAMPLE_PATH
+
+    def create_sample(self, dataset, args=None, wait_time=3, retries=10):
+        """Creates a sample from a `dataset`.
+
+        """
+        dataset_id = None
+        resource_type = get_resource_type(dataset)
+        if resource_type == DATASET_PATH:
+            dataset_id = get_dataset_id(dataset)
+            check_resource(dataset_id,
+                           query_string=TINY_RESOURCE,
+                           wait_time=wait_time, retries=retries,
+                           raise_on_error=True, api=self)
+        else:
+            raise Exception("A dataset id is needed to create a"
+                            " sample. %s found." % resource_type)
+
+        create_args = {}
+        if args is not None:
+            create_args.update(args)
+        create_args.update({
+            "dataset": dataset_id})
+
+        body = json.dumps(create_args)
+        return self._create(self.sample_url, body)
+
+    def get_sample(self, sample, query_string=''):
+        """Retrieves a sample.
+
+           The sample parameter should be a string containing the
+           sample id or the dict returned by create_sample.
+           As sample is an evolving object that is processed
+           until it reaches the FINISHED or FAULTY state, the function will
+           return a dict that encloses the sample values and state info
+           available at the time it is called.
+        """
+        check_resource_type(sample, SAMPLE_PATH,
+                            message="A sample id is needed.")
+        sample_id = get_sample_id(sample)
+        if sample_id:
+            return self._get("%s%s" % (self.url, sample_id),
+                             query_string=query_string)
+
+    def list_samples(self, query_string=''):
+        """Lists all your samples.
+
+        """
+        return self._list(self.sample_url, query_string)
+
+    def update_sample(self, sample, changes):
+        """Updates a sample.
+
+        """
+        check_resource_type(sample, SAMPLE_PATH,
+                            message="A sample id is needed.")
+        sample_id = get_sample_id(sample)
+        if sample_id:
+            body = json.dumps(changes)
+            return self._update("%s%s" % (self.url, sample_id), body)
+
+    def delete_sample(self, sample):
+        """Deletes a sample.
+
+        """
+        check_resource_type(sample, SAMPLE_PATH,
+                            message="A sample id is needed.")
+        sample_id = get_sample_id(sample)
+        if sample_id:
+            return self._delete("%s%s" % (self.url, sample_id))
diff --git a/docs/index.rst b/docs/index.rst
@@ -851,6 +851,23 @@ Each node in an isolation tree can have multiple predicates.
 For the node to be a valid branch when evaluated with a data point, all of its
 predicates must be true.
 
+Samples
+-------
+
+To provide quick access to your row data you can create a ``sample``. Samples
+are in-memory objects that can be queried for subsets of data by limiting
+their size, the fields or the rows returned. The structure of a sample would
+be::
+
+Samples are not permanent objects. Once they are created, they will be
+available as long as GETs are requested within periods smaller than
+a pre-established TTL (Time to Live). The expiration timer of a sample is
+reset every time a new GET is received.
+
+If requested, a sample can also perform linear regression and compute
+Pearson's and Spearman's correlations for either one numeric field
+against all other numeric fields or between two specific numeric fields.
+
 Creating Resources
 ------------------
 
@@ -911,6 +928,8 @@ You can query the status of any resource with the ``status`` method::
     api.status(anomaly)
     api.status(anomaly_score)
     api.status(batch_anomaly_score)
+    api.status(project)
+    api.status(sample)
 
 Before invoking the creation of a new resource, the library checks that
 the status of the resource that is passed as a parameter is

diff --git a/tests/features/create_prediction.feature → tests/features/01.create_prediction.feature b/tests/features/create_prediction.feature → tests/features/01.create_prediction.feature
@@ -76,7 +76,7 @@ Feature: Create Predictions
 
         Examples:
         | data                | time_1  | time_2 | time_3 | data_input    | centroid  |
-        | ../data/diabetes.csv | 10      | 10     | 10     | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 |
+        | ../data/diabetes.csv | 10      | 20     | 20     | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 |
 
     Scenario: Successfully creating an anomaly score:
         Given I create a data source uploading a "<data>" file

diff --git a/tests/features/create_prediction_dev.feature → ...features/02.create_prediction_dev.feature b/tests/features/create_prediction_dev.feature → ...features/02.create_prediction_dev.feature
diff --git a/.../features/create_local_prediction.feature → ...atures/03.create_local_prediction.feature b/.../features/create_local_prediction.feature → ...atures/03.create_local_prediction.feature
diff --git a/...res/compute_multivote_predictions.feature → .../04.compute_multivote_predictions.feature b/...res/compute_multivote_predictions.feature → .../04.compute_multivote_predictions.feature
diff --git a/tests/features/compare_predictions.feature → ...s/features/05.compare_predictions.feature b/tests/features/compare_predictions.feature → ...s/features/05.compare_predictions.feature
diff --git a/.../features/create_batch_prediction.feature → ...atures/06.create_batch_prediction.feature b/.../features/create_batch_prediction.feature → ...atures/06.create_batch_prediction.feature
diff --git a/...eate_batch_prediction_multi_model.feature → ...eate_batch_prediction_multi_model.feature b/...eate_batch_prediction_multi_model.feature → ...eate_batch_prediction_multi_model.feature
diff --git a/tests/features/create_multimodel.feature → tests/features/08.create_multimodel.feature b/tests/features/create_multimodel.feature → tests/features/08.create_multimodel.feature
diff --git a/...atures/create_prediction_ensemble.feature → ...res/09.create_prediction_ensemble.feature b/...atures/create_prediction_ensemble.feature → ...res/09.create_prediction_ensemble.feature
diff --git a/.../create_prediction_local_ensemble.feature → ....create_prediction_local_ensemble.feature b/.../create_prediction_local_ensemble.feature → ....create_prediction_local_ensemble.feature
diff --git a/...res/create_prediction_multi_model.feature → .../11.create_prediction_multi_model.feature b/...res/create_prediction_multi_model.feature → .../11.create_prediction_multi_model.feature
diff --git a/...es/create_prediction_public_model.feature → ...12.create_prediction_public_model.feature b/...es/create_prediction_public_model.feature → ...12.create_prediction_public_model.feature
diff --git a/...es/create_prediction_shared_model.feature → ...13.create_prediction_shared_model.feature b/...es/create_prediction_shared_model.feature → ...13.create_prediction_shared_model.feature
diff --git a/tests/features/create_public_dataset.feature → ...features/14.create_public_dataset.feature b/tests/features/create_public_dataset.feature → ...features/14.create_public_dataset.feature
diff --git a/tests/features/create_evaluation.feature → tests/features/15.create_evaluation.feature b/tests/features/create_evaluation.feature → tests/features/15.create_evaluation.feature
diff --git a/tests/features/download_dataset.feature → tests/features/16.download_dataset.feature b/tests/features/download_dataset.feature → tests/features/16.download_dataset.feature
diff --git a/tests/features/17.sample_dataset.feature b/tests/features/17.sample_dataset.feature
@@ -0,0 +1,18 @@
+Feature: Create and update a sample from a dataset
+    In order to create a sample from a dataset
+    I need to create an origin dataset
+
+    Scenario: Successfully creating a sample from a dataset:
+        Given I create a data source uploading a "<data>" file
+        And I wait until the source is ready less than <time_1> secs
+        And I create a dataset
+        And I wait until the dataset is ready less than <time_2> secs
+        And I create a sample from a dataset
+        And I wait until the sample is ready less than <time_3> secs
+        And I update the sample name to "<sample_name>"
+        When I wait until the sample is ready less than <time_4> secs
+        Then the sample name is "<sample_name>"
+
+        Examples:
+        | data                | time_1  | time_2 | time_3 | time_4 | sample_name |
+        | ../data/iris.csv | 10      | 10     | 10     | 10 | my new sample name |
diff --git a/tests/features/split_dataset.feature → tests/features/18.split_dataset.feature b/tests/features/split_dataset.feature → tests/features/18.split_dataset.feature
diff --git a/tests/features/create_anomaly.feature → tests/features/19.create_anomaly.feature b/tests/features/create_anomaly.feature → tests/features/19.create_anomaly.feature
diff --git a/tests/features/missing_and_errors.feature → tests/features/20.missing_and_errors.feature b/tests/features/missing_and_errors.feature → tests/features/20.missing_and_errors.feature
diff --git a/.../features/rename_duplicated_names.feature → ...atures/21.rename_duplicated_names.feature b/.../features/rename_duplicated_names.feature → ...atures/21.rename_duplicated_names.feature
diff --git a/tests/features/manage_project.feature → tests/features/22.manage_project.feature b/tests/features/manage_project.feature → tests/features/22.manage_project.feature
diff --git a/tests/features/common_steps.py b/tests/features/common_steps.py
@@ -90,3 +90,7 @@ def i_want_api_dev_mode(step):
     projects = world.api.list_projects()
     assert projects['code'] == HTTP_OK
     world.init_projects_count = projects['meta']['total_count']
+
+    samples = world.api.list_samples()
+    assert samples['code'] == HTTP_OK
+    world.init_samples_count = samples['meta']['total_count']
diff --git a/tests/features/create_sample-steps.py b/tests/features/create_sample-steps.py
@@ -0,0 +1,65 @@
+import time
+import json
+import os
+from datetime import datetime, timedelta
+from lettuce import step, world
+
+from bigml.api import HTTP_CREATED
+from bigml.api import HTTP_ACCEPTED
+from bigml.api import FINISHED
+from bigml.api import FAULTY
+from bigml.api import get_status
+
+
+@step(r'the sample name is "(.*)"')
+def i_check_sample_name(step, name):
+    sample_name = world.sample['name']
+    if name == sample_name:
+        assert True
+    else:
+        assert False, ("The sample name is %s "
+                       "and the expected name is %s" %
+                       (sample_name, name))
+
+
+@step(r'I create a sample from a dataset$')
+def i_create_a_sample_from_dataset(step):
+    dataset = world.dataset.get('resource')
+    resource = world.api.create_sample(dataset, {'name': 'new sample'})
+    world.status = resource['code']
+    assert world.status == HTTP_CREATED
+    world.location = resource['location']
+    world.sample = resource['object']
+    world.samples.append(resource['resource'])
+    print "create"
+
+
+@step(r'I update the sample name to "(.*)"$')
+def i_update_sample_name(step, name):
+    resource = world.api.update_sample(world.sample['resource'],
+                                       {'name': name})
+    world.status = resource['code']
+    assert world.status == HTTP_ACCEPTED
+    world.location = resource['location']
+    world.sample = resource['object']
+    print "update"
+
+
+@step(r'I wait until the sample status code is either (\d) or (-\d) less than (\d+)')
+def wait_until_sample_status_code_is(step, code1, code2, secs):
+    start = datetime.utcnow()
+    sample_id = world.sample['resource']
+    step.given('I get the sample "{id}"'.format(id=sample_id))
+    status = get_status(world.sample)
+    while (status['code'] != int(code1) and
+           status['code'] != int(code2)):
+           time.sleep(3)
+           assert datetime.utcnow() - start < timedelta(seconds=int(secs))
+           step.given('I get the sample "{id}"'.format(id=sample_id))
+           status = get_status(world.sample)
+    assert status['code'] == int(code1)
+
+
+@step(r'I wait until the sample is ready less than (\d+)')
+def the_sample_is_finished_in_less_than(step, secs):
+    wait_until_sample_status_code_is(step, FINISHED, FAULTY, secs)
diff --git a/tests/features/read_sample-steps.py b/tests/features/read_sample-steps.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+#
+# Copyright 2015 BigML
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from lettuce import step, world
+from bigml.api import HTTP_OK
+
+@step(r'I get the sample "(.*)"')
+def i_get_the_sample(step, resource):
+    resource = world.api.get_sample(resource)
+    world.status = resource['code']
+    assert world.status == HTTP_OK
+    world.sample = resource['object']