Skip to content

Commit

Permalink
Adding REST API calls for samples
Browse files Browse the repository at this point in the history
  • Loading branch information
mmerce committed Feb 12, 2015
1 parent 8e04727 commit f983766
Show file tree
Hide file tree
Showing 32 changed files with 291 additions and 8 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
History
-------

2.0.0 (2015-02-12)
~~~~~~~~~~~~~~~~~~

- Adding REST API calls for samples.

1.10.8 (2015-02-10)
~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion bigml/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.10.8'
__version__ = '2.0.0'
12 changes: 8 additions & 4 deletions bigml/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from bigml.batchcentroidhandler import BatchCentroidHandler
from bigml.batchanomalyscorehandler import BatchAnomalyScoreHandler
from bigml.projecthandler import ProjectHandler
from bigml.samplehandler import SampleHandler

# Repeating constants and functions for backwards compatibility

Expand All @@ -80,7 +81,8 @@
BATCH_ANOMALY_SCORE_RE, ANOMALY_SCORE_RE, PROJECT_RE, SOURCE_PATH,
DATASET_PATH, MODEL_PATH, PREDICTION_PATH, EVALUATION_PATH, ENSEMBLE_PATH,
BATCH_PREDICTION_PATH, CLUSTER_PATH, CENTROID_PATH, BATCH_CENTROID_PATH,
ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH)
ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH,
SAMPLE_PATH, SAMPLE_RE)


from bigml.resourcehandler import (
Expand All @@ -89,7 +91,7 @@
get_cluster_id, get_centroid_id, get_anomaly_id, get_anomaly_score_id,
get_prediction_id, get_batch_prediction_id, get_batch_centroid_id,
get_batch_anomaly_score_id, get_resource_id, resource_is_ready,
get_status, check_resource, http_ok, get_project_id)
get_status, check_resource, http_ok, get_project_id, get_sample_id)

# Map status codes to labels
STATUSES = {
Expand Down Expand Up @@ -133,13 +135,14 @@ def count(listing):
return listing['meta']['query_total']


class BigML(ProjectHandler, BatchAnomalyScoreHandler, BatchCentroidHandler,
class BigML(SampleHandler, ProjectHandler, BatchAnomalyScoreHandler,
BatchCentroidHandler,
BatchPredictionHandler, EvaluationHandler, AnomalyScoreHandler,
AnomalyHandler, CentroidHandler, ClusterHandler, PredictionHandler,
EnsembleHandler, ModelHandler, DatasetHandler,
SourceHandler, ResourceHandler, BigMLConnection):
"""Entry point to create, retrieve, list, update, and delete
sources, datasets, models and predictions.
sources, datasets, models, predictions, evaluations, etc.
Full API documentation on the API can be found from BigML at:
https://bigml.com/developers
Expand Down Expand Up @@ -193,6 +196,7 @@ def __init__(self, username=None, api_key=None, dev_mode=False,
BatchCentroidHandler.__init__(self)
BatchAnomalyScoreHandler.__init__(self)
ProjectHandler.__init__(self)
SampleHandler.__init__(self)

self.getters = {}
for resource_type in RESOURCE_RE:
Expand Down
13 changes: 11 additions & 2 deletions bigml/resourcehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ANOMALY_SCORE_PATH = 'anomalyscore'
BATCH_ANOMALY_SCORE_PATH = 'batchanomalyscore'
PROJECT_PATH = 'project'
SAMPLE_PATH = 'sample'


# Resource Ids patterns
Expand All @@ -67,7 +68,7 @@
BATCH_ANOMALY_SCORE_RE = re.compile(r'^%s/%s$' % (BATCH_ANOMALY_SCORE_PATH,
ID_PATTERN))
PROJECT_RE = re.compile(r'^%s/%s$' % (PROJECT_PATH, ID_PATTERN))

SAMPLE_RE = re.compile(r'^%s/%s$' % (SAMPLE_PATH, ID_PATTERN))

RESOURCE_RE = {
'source': SOURCE_RE,
Expand All @@ -83,7 +84,8 @@
'anomaly': ANOMALY_RE,
'anomalyscore': ANOMALY_SCORE_RE,
'batchanomalyscore': BATCH_ANOMALY_SCORE_RE,
'project': PROJECT_RE}
'project': PROJECT_RE,
'sample': SAMPLE_RE}

RENAMED_RESOURCES = {
'batchprediction': 'batch_prediction',
Expand Down Expand Up @@ -274,6 +276,13 @@ def get_project_id(project):
return get_resource(PROJECT_RE, project)


def get_sample_id(sample):
"""Returns a sample/id.
"""
return get_resource(SAMPLE_RE, sample)


def get_resource_id(resource):
"""Returns the resource id if it falls in one of the registered types
Expand Down
120 changes: 120 additions & 0 deletions bigml/samplehandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
#
# Copyright 2015 BigML
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""Base class for samples' REST calls
https://bigml.com/developers/samples
"""

try:
import simplejson as json
except ImportError:
import json


from bigml.resourcehandler import ResourceHandler
from bigml.resourcehandler import (check_resource_type, resource_is_ready,
get_sample_id, get_resource_type,
get_dataset_id, check_resource)
from bigml.resourcehandler import (SAMPLE_PATH, DATASET_PATH, LOGGER,
TINY_RESOURCE)


class SampleHandler(ResourceHandler):
"""This class is used by the BigML class as
a mixin that provides the REST calls samples. It should not
be instantiated independently.
"""
def __init__(self):
"""Initializes the SampleHandler. This class is intended to be
used as a mixin on ResourceHandler, that inherits its
attributes and basic method from BigMLConnection, and must not be
instantiated independently.
"""
self.sample_url = self.url + SAMPLE_PATH

def create_sample(self, dataset, args=None, wait_time=3, retries=10):
"""Creates a sample from a `dataset`.
"""
dataset_id = None
resource_type = get_resource_type(dataset)
if resource_type == DATASET_PATH:
dataset_id = get_dataset_id(dataset)
check_resource(dataset_id,
query_string=TINY_RESOURCE,
wait_time=wait_time, retries=retries,
raise_on_error=True, api=self)
else:
raise Exception("A dataset id is needed to create a"
" sample. %s found." % resource_type)

create_args = {}
if args is not None:
create_args.update(args)
create_args.update({
"dataset": dataset_id})

body = json.dumps(create_args)
return self._create(self.sample_url, body)

def get_sample(self, sample, query_string=''):
"""Retrieves a sample.
The sample parameter should be a string containing the
sample id or the dict returned by create_sample.
As sample is an evolving object that is processed
until it reaches the FINISHED or FAULTY state, the function will
return a dict that encloses the sample values and state info
available at the time it is called.
"""
check_resource_type(sample, SAMPLE_PATH,
message="A sample id is needed.")
sample_id = get_sample_id(sample)
if sample_id:
return self._get("%s%s" % (self.url, sample_id),
query_string=query_string)

def list_samples(self, query_string=''):
"""Lists all your samples.
"""
return self._list(self.sample_url, query_string)

def update_sample(self, sample, changes):
"""Updates a sample.
"""
check_resource_type(sample, SAMPLE_PATH,
message="A sample id is needed.")
sample_id = get_sample_id(sample)
if sample_id:
body = json.dumps(changes)
return self._update("%s%s" % (self.url, sample_id), body)

def delete_sample(self, sample):
"""Deletes a sample.
"""
check_resource_type(sample, SAMPLE_PATH,
message="A sample id is needed.")
sample_id = get_sample_id(sample)
if sample_id:
return self._delete("%s%s" % (self.url, sample_id))
19 changes: 19 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,23 @@ Each node in an isolation tree can have multiple predicates.
For the node to be a valid branch when evaluated with a data point, all of its
predicates must be true.

Samples
-------

To provide quick access to your row data you can create a ``sample``. Samples
are in-memory objects that can be queried for subsets of data by limiting
their size, the fields or the rows returned. The structure of a sample would
be::

Samples are not permanent objects. Once they are created, they will be
available as long as GETs are requested within periods smaller than
a pre-established TTL (Time to Live). The expiration timer of a sample is
reset every time a new GET is received.

If requested, a sample can also perform linear regression and compute
Pearson's and Spearman's correlations for either one numeric field
against all other numeric fields or between two specific numeric fields.

Creating Resources
------------------

Expand Down Expand Up @@ -911,6 +928,8 @@ You can query the status of any resource with the ``status`` method::
api.status(anomaly)
api.status(anomaly_score)
api.status(batch_anomaly_score)
api.status(project)
api.status(sample)

Before invoking the creation of a new resource, the library checks that
the status of the resource that is passed as a parameter is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ Feature: Create Predictions

Examples:
| data | time_1 | time_2 | time_3 | data_input | centroid |
| ../data/diabetes.csv | 10 | 10 | 10 | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 |
| ../data/diabetes.csv | 10 | 20 | 20 | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 |

Scenario: Successfully creating an anomaly score:
Given I create a data source uploading a "<data>" file
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 18 additions & 0 deletions tests/features/17.sample_dataset.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Feature: Create and update a sample from a dataset
In order to create a sample from a dataset
I need to create an origin dataset

Scenario: Successfully creating a sample from a dataset:
Given I create a data source uploading a "<data>" file
And I wait until the source is ready less than <time_1> secs
And I create a dataset
And I wait until the dataset is ready less than <time_2> secs
And I create a sample from a dataset
And I wait until the sample is ready less than <time_3> secs
And I update the sample name to "<sample_name>"
When I wait until the sample is ready less than <time_4> secs
Then the sample name is "<sample_name>"

Examples:
| data | time_1 | time_2 | time_3 | time_4 | sample_name |
| ../data/iris.csv | 10 | 10 | 10 | 10 | my new sample name |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 4 additions & 0 deletions tests/features/common_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,7 @@ def i_want_api_dev_mode(step):
projects = world.api.list_projects()
assert projects['code'] == HTTP_OK
world.init_projects_count = projects['meta']['total_count']

samples = world.api.list_samples()
assert samples['code'] == HTTP_OK
world.init_samples_count = samples['meta']['total_count']
65 changes: 65 additions & 0 deletions tests/features/create_sample-steps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import time
import json
import os
from datetime import datetime, timedelta
from lettuce import step, world

from bigml.api import HTTP_CREATED
from bigml.api import HTTP_ACCEPTED
from bigml.api import FINISHED
from bigml.api import FAULTY
from bigml.api import get_status


@step(r'the sample name is "(.*)"')
def i_check_sample_name(step, name):
sample_name = world.sample['name']
if name == sample_name:
assert True
else:
assert False, ("The sample name is %s "
"and the expected name is %s" %
(sample_name, name))


@step(r'I create a sample from a dataset$')
def i_create_a_sample_from_dataset(step):
dataset = world.dataset.get('resource')
resource = world.api.create_sample(dataset, {'name': 'new sample'})
world.status = resource['code']
assert world.status == HTTP_CREATED
world.location = resource['location']
world.sample = resource['object']
world.samples.append(resource['resource'])
print "create"


@step(r'I update the sample name to "(.*)"$')
def i_update_sample_name(step, name):
resource = world.api.update_sample(world.sample['resource'],
{'name': name})
world.status = resource['code']
assert world.status == HTTP_ACCEPTED
world.location = resource['location']
world.sample = resource['object']
print "update"


@step(r'I wait until the sample status code is either (\d) or (-\d) less than (\d+)')
def wait_until_sample_status_code_is(step, code1, code2, secs):
start = datetime.utcnow()
sample_id = world.sample['resource']
step.given('I get the sample "{id}"'.format(id=sample_id))
status = get_status(world.sample)
while (status['code'] != int(code1) and
status['code'] != int(code2)):
time.sleep(3)
assert datetime.utcnow() - start < timedelta(seconds=int(secs))
step.given('I get the sample "{id}"'.format(id=sample_id))
status = get_status(world.sample)
assert status['code'] == int(code1)


@step(r'I wait until the sample is ready less than (\d+)')
def the_sample_is_finished_in_less_than(step, secs):
wait_until_sample_status_code_is(step, FINISHED, FAULTY, secs)
26 changes: 26 additions & 0 deletions tests/features/read_sample-steps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
#
# Copyright 2015 BigML
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from lettuce import step, world
from bigml.api import HTTP_OK

@step(r'I get the sample "(.*)"')
def i_get_the_sample(step, resource):
resource = world.api.get_sample(resource)
world.status = resource['code']
assert world.status == HTTP_OK
world.sample = resource['object']

0 comments on commit f983766

Please sign in to comment.