Skip to content

Commit

Permalink
Changing the name for the new statisticaltest resource
Browse files Browse the repository at this point in the history
  • Loading branch information
mmerce committed Aug 25, 2015
1 parent a94e860 commit 5a74ef6
Show file tree
Hide file tree
Showing 12 changed files with 309 additions and 265 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Expand Up @@ -9,6 +9,8 @@ History
- Adding REST methods to manage new type of resource: correlations.
- Adding REST methods to manage new type of resource: tests.
- Adding min and max values predictions for regression models and ensembles.
- Fixing bug: Fields object was not retrieving objective id from the
resource info.

4.1.7 (2015-08-15)
~~~~~~~~~~~~~~~~~~
Expand Down
14 changes: 7 additions & 7 deletions bigml/api.py
Expand Up @@ -57,7 +57,7 @@
from bigml.projecthandler import ProjectHandler
from bigml.samplehandler import SampleHandler
from bigml.correlationhandler import CorrelationHandler
from bigml.testhandler import TestHandler
from bigml.statisticaltesthandler import StatisticalTestHandler

# Repeating constants and functions for backwards compatibility

Expand All @@ -79,7 +79,7 @@
BATCH_PREDICTION_PATH, CLUSTER_PATH, CENTROID_PATH, BATCH_CENTROID_PATH,
ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH,
SAMPLE_PATH, SAMPLE_RE, CORRELATION_PATH, CORRELATION_RE,
TEST_PATH, TEST_RE)
STATISTICAL_TEST_PATH, STATISTICAL_TEST_RE)


from bigml.resourcehandler import (
Expand All @@ -89,7 +89,7 @@
get_prediction_id, get_batch_prediction_id, get_batch_centroid_id,
get_batch_anomaly_score_id, get_resource_id, resource_is_ready,
get_status, check_resource, http_ok, get_project_id, get_sample_id,
get_correlation_id, get_test_id)
get_correlation_id, get_statistical_test_id)

# Map status codes to labels
STATUSES = {
Expand All @@ -114,7 +114,7 @@ def count(listing):
return listing['meta']['query_total']


class BigML(TestHandler, CorrelationHandler, SampleHandler, ProjectHandler,
class BigML(StatisticalTestHandler, CorrelationHandler, SampleHandler, ProjectHandler,
BatchAnomalyScoreHandler, BatchCentroidHandler,
BatchPredictionHandler, EvaluationHandler, AnomalyScoreHandler,
AnomalyHandler, CentroidHandler, ClusterHandler, PredictionHandler,
Expand Down Expand Up @@ -177,7 +177,7 @@ def __init__(self, username=None, api_key=None, dev_mode=False,
ProjectHandler.__init__(self)
SampleHandler.__init__(self)
CorrelationHandler.__init__(self)
TestHandler.__init__(self)
StatisticalTestHandler.__init__(self)

self.getters = {}
for resource_type in RESOURCE_RE:
Expand Down Expand Up @@ -233,8 +233,8 @@ def _get_fields_key(resource):
return resource['object']['clusters']['fields']
elif CORRELATION_RE.match(resource_id):
return resource['object']['correlations']['fields']
elif TEST_RE.match(resource_id):
return resource['object']['tests']['fields']
elif STATISTICAL_TEST_RE.match(resource_id):
return resource['object']['statistical_tests']['fields']
elif SAMPLE_RE.match(resource_id):
return dict([(field['id'], field) for field in
resource['object']['sample']['fields']])
Expand Down
31 changes: 24 additions & 7 deletions bigml/fields.py
Expand Up @@ -56,11 +56,11 @@
ANOMALY_TYPE = 'anomaly'
SAMPLE_TYPE = 'sample'
CORRELATION_TYPE = 'correlation'
TEST_TYPE = 'test'
STATISTICAL_TEST_TYPE = 'statisticaltest'

RESOURCES_WITH_FIELDS = [SOURCE_TYPE, DATASET_TYPE, MODEL_TYPE,
PREDICTION_TYPE, CLUSTER_TYPE, ANOMALY_TYPE,
SAMPLE_TYPE, CORRELATION_TYPE, TEST_TYPE]
SAMPLE_TYPE, CORRELATION_TYPE, STATISTICAL_TEST_TYPE]
DEFAULT_MISSING_TOKENS = ["", "N/A", "n/a", "NULL", "null", "-", "#DIV/0",
"#REF!", "#NAME?", "NIL", "nil", "NA", "na",
"#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?"]
Expand Down Expand Up @@ -93,16 +93,26 @@ def get_fields_structure(resource):
fields = resource['object']['clusters']['fields']
elif resource_type == CORRELATION_TYPE:
fields = resource['object']['correlations']['fields']
elif resource_type == TEST_TYPE:
fields = resource['object']['tests']['fields']
elif resource_type == STATISTICAL_TEST_TYPE:
fields = resource['object']['statistical_tests']['fields']
elif resource_type == SAMPLE_TYPE:
fields = dict([(field['id'], field) for field in
resource['object']['sample']['fields']])
else:
fields = resource['object']['fields']
return fields, resource_locale, missing_tokens
# Check whether there's an objective id
objective_column = None
if resource_type == DATASET_TYPE:
objective_column = resource['object'].get( \
'objective_field', {}).get('id')
elif resource_type == MODEL_TYPE:
objective_id = resource['object'].get( \
'objective_fields', [None])[0]
objective_column = fields.get( \
objective_id, {}).get('column_number')
return fields, resource_locale, missing_tokens, objective_column
else:
return None, None, None
return None, None, None, None


class Fields(object):
Expand All @@ -121,7 +131,8 @@ def __init__(self, resource_or_fields, missing_tokens=None,
resource_info = get_fields_structure(resource_or_fields)
(self.fields,
resource_locale,
resource_missing_tokens) = resource_info
resource_missing_tokens,
objective_column) = resource_info
if data_locale is None:
data_locale = resource_locale
if missing_tokens is None:
Expand All @@ -135,6 +146,7 @@ def __init__(self, resource_or_fields, missing_tokens=None,
data_locale = DEFAULT_LOCALE
if missing_tokens is None:
missing_tokens = DEFAULT_MISSING_TOKENS
objective_column = None
if self.fields is None:
raise ValueError("No fields structure was found.")
self.fields_by_name = invert_dictionary(self.fields, 'name')
Expand All @@ -152,6 +164,11 @@ def __init__(self, resource_or_fields, missing_tokens=None,
self.objective_field = None
self.objective_field_present = None
self.filtered_indexes = None
# if the objective field is not set by the user
# use the one extracted from the resource info
if not objective_field and objective_column is not None:
objective_field = objective_column
objective_field_present = True
self.update_objective_field(objective_field, objective_field_present)

def update_objective_field(self, objective_field, objective_field_present,
Expand Down
17 changes: 10 additions & 7 deletions bigml/resourcehandler.py
Expand Up @@ -44,7 +44,7 @@
PROJECT_PATH = 'project'
SAMPLE_PATH = 'sample'
CORRELATION_PATH = 'correlation'
TEST_PATH = 'test'
STATISTICAL_TEST_PATH = 'statisticaltest'

# Resource Ids patterns
ID_PATTERN = '[a-f0-9]{24}'
Expand Down Expand Up @@ -72,7 +72,8 @@
PROJECT_RE = re.compile(r'^%s/%s$' % (PROJECT_PATH, ID_PATTERN))
SAMPLE_RE = re.compile(r'^%s/%s$' % (SAMPLE_PATH, ID_PATTERN))
CORRELATION_RE = re.compile(r'^%s/%s$' % (CORRELATION_PATH, ID_PATTERN))
TEST_RE = re.compile(r'^%s/%s$' % (TEST_PATH, ID_PATTERN))
STATISTICAL_TEST_RE = re.compile(r'^%s/%s$' % \
(STATISTICAL_TEST_PATH, ID_PATTERN))

RESOURCE_RE = {
'source': SOURCE_RE,
Expand All @@ -91,13 +92,15 @@
'project': PROJECT_RE,
'sample': SAMPLE_RE,
'correlation': CORRELATION_RE,
'test': TEST_RE}
'statisticaltest': STATISTICAL_TEST_RE}

RENAMED_RESOURCES = {
'batchprediction': 'batch_prediction',
'batchcentroid': 'batch_centroid',
'anomalyscore': 'anomaly_score',
'batchanomalyscore': 'batch_anomaly_score'}
'batchanomalyscore': 'batch_anomaly_score',
'statisticaltest': 'statistical_test'
}

NO_QS = [EVALUATION_RE, PREDICTION_RE, BATCH_PREDICTION_RE,
CENTROID_RE, BATCH_CENTROID_RE, ANOMALY_SCORE_RE,
Expand Down Expand Up @@ -296,11 +299,11 @@ def get_correlation_id(correlation):
return get_resource(CORRELATION_RE, correlation)


def get_test_id(test):
"""Returns a test/id.
def get_statistical_test_id(statistical_test):
"""Returns a statisticaltest/id.
"""
return get_resource(TEST_RE, test)
return get_resource(STATISTICAL_TEST_RE, statistical_test)


def get_resource_id(resource):
Expand Down
120 changes: 120 additions & 0 deletions bigml/statisticaltesthandler.py
@@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
#
# Copyright 2015 BigML
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""Base class for statisticaltests' REST calls
https://bigml.com/developers/statisticaltests
"""

try:
import simplejson as json
except ImportError:
import json


from bigml.resourcehandler import ResourceHandler
from bigml.resourcehandler import (check_resource_type,
get_statistical_test_id, get_resource_type,
get_dataset_id, check_resource)
from bigml.resourcehandler import (STATISTICAL_TEST_PATH, DATASET_PATH,
TINY_RESOURCE)


class StatisticalTestHandler(ResourceHandler):
"""This class is used by the BigML class as
a mixin that provides the statistical tests' REST calls. It should not
be instantiated independently.
"""
def __init__(self):
"""Initializes the StatisticalTestHandler. This class is intended to be
used as a mixin on ResourceHandler, that inherits its
attributes and basic method from BigMLConnection, and must not be
instantiated independently.
"""
self.statistical_test_url = self.url + STATISTICAL_TEST_PATH

def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10):
"""Creates a statistical test from a `dataset`.
"""
dataset_id = None
resource_type = get_resource_type(dataset)
if resource_type == DATASET_PATH:
dataset_id = get_dataset_id(dataset)
check_resource(dataset_id,
query_string=TINY_RESOURCE,
wait_time=wait_time, retries=retries,
raise_on_error=True, api=self)
else:
raise Exception("A dataset id is needed to create a"
" statistical test. %s found." % resource_type)

create_args = {}
if args is not None:
create_args.update(args)
create_args.update({
"dataset": dataset_id})

body = json.dumps(create_args)
return self._create(self.statistical_test_url, body)

def get_statistical_test(self, statistical_test, query_string=''):
"""Retrieves a statistical test.
The statistical test parameter should be a string containing the
statisticaltest id or the dict returned by create_statistical_test.
As an statistical test is an evolving object that is processed
until it reaches the FINISHED or FAULTY state, the function will
return a dict that encloses the statistical test values and state
info available at the time it is called.
"""
check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
message="A statistical test id is needed.")
statistical_test_id = get_statistical_test_id(statistical_test)
if statistical_test_id:
return self._get("%s%s" % (self.url, statistical_test_id),
query_string=query_string)

def list_statistical_tests(self, query_string=''):
"""Lists all your statistical tests.
"""
return self._list(self.statistical_test_url, query_string)

def update_statistical_test(self, statistical_test, changes):
"""Updates an statistical test.
"""
check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
message="A statistical test id is needed.")
statistical_test_id = get_statistical_test_id(statistical_test)
if statistical_test_id:
body = json.dumps(changes)
return self._update("%s%s" % (self.url, statistical_test_id), body)

def delete_statistical_test(self, statistical_test):
"""Deletes a statistical test.
"""
check_resource_type(statistical_test, STATISTICAL_TEST_PATH,
message="A statistical test id is needed.")
statistical_test_id = get_statistical_test_id(statistical_test)
if statistical_test_id:
return self._delete("%s%s" % (self.url, statistical_test_id))

0 comments on commit 5a74ef6

Please sign in to comment.