| @@ -0,0 +1,150 @@ | ||
| # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | ||
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | ||
| # All Rights Reserved | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a | ||
| # copy of this software and associated documentation files (the | ||
| # "Software"), to deal in the Software without restriction, including | ||
| # without limitation the rights to use, copy, modify, merge, publish, dis- | ||
| # tribute, sublicense, and/or sell copies of the Software, and to permit | ||
| # persons to whom the Software is furnished to do so, subject to the fol- | ||
| # lowing conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included | ||
| # in all copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | ||
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| # IN THE SOFTWARE. | ||
| # | ||
| try: | ||
| import simplejson as json | ||
| except ImportError: | ||
| import json | ||
|
|
||
| import boto.exception | ||
| import requests | ||
| import boto | ||
|
|
||
| class SearchServiceException(Exception): | ||
| pass | ||
|
|
||
|
|
||
| class CommitMismatchError(Exception): | ||
| pass | ||
|
|
||
|
|
||
| class DocumentServiceConnection(object): | ||
|
|
||
| def __init__(self, domain=None, endpoint=None): | ||
| self.domain = domain | ||
| self.endpoint = endpoint | ||
| if not self.endpoint: | ||
| self.endpoint = domain.doc_service_endpoint | ||
| self.documents_batch = [] | ||
| self._sdf = None | ||
|
|
||
| def add(self, _id, version, fields, lang='en'): | ||
| d = {'type': 'add', 'id': _id, 'version': version, 'lang': lang, | ||
| 'fields': fields} | ||
| self.documents_batch.append(d) | ||
|
|
||
| def delete(self, _id, version): | ||
| d = {'type': 'delete', 'id': _id, 'version': version} | ||
| self.documents_batch.append(d) | ||
|
|
||
| def get_sdf(self): | ||
| return self._sdf if self._sdf else json.dumps(self.documents_batch) | ||
|
|
||
| def clear_sdf(self): | ||
| self._sdf = None | ||
| self.documents_batch = [] | ||
|
|
||
| def add_sdf_from_s3(self, key_obj): | ||
| """@todo (lucas) would be nice if this could just take an s3://uri...""" | ||
| self._sdf = key_obj.get_contents_as_string() | ||
|
|
||
| def commit(self): | ||
| sdf = self.get_sdf() | ||
|
|
||
| if ': null' in sdf: | ||
| boto.log.error('null value in sdf detected. This will probably raise ' | ||
| '500 error.') | ||
| index = sdf.index(': null') | ||
| boto.log.error(sdf[index - 100:index + 100]) | ||
|
|
||
| url = "http://%s/2011-02-01/documents/batch" % (self.endpoint) | ||
|
|
||
| request_config = { | ||
| 'pool_connections': 20, | ||
| 'keep_alive': True, | ||
| 'max_retries': 5, | ||
| 'pool_maxsize': 50 | ||
| } | ||
|
|
||
| r = requests.post(url, data=sdf, config=request_config, | ||
| headers={'Content-Type': 'application/json'}) | ||
|
|
||
| return CommitResponse(r, self, sdf) | ||
|
|
||
|
|
||
| class CommitResponse(object): | ||
| """Wrapper for response to Cloudsearch document batch commit. | ||
| :type response: :class:`requests.models.Response` | ||
| :param response: Response from Cloudsearch /documents/batch API | ||
| :type doc_service: :class:`exfm.cloudsearch.DocumentServiceConnection` | ||
| :param doc_service: Object containing the documents posted and methods to | ||
| retry | ||
| :raises: :class:`boto.exception.BotoServerError` | ||
| :raises: :class:`exfm.cloudsearch.SearchServiceException` | ||
| """ | ||
| def __init__(self, response, doc_service, sdf): | ||
| self.response = response | ||
| self.doc_service = doc_service | ||
| self.sdf = sdf | ||
|
|
||
| try: | ||
| self.content = json.loads(response.content) | ||
| except: | ||
| boto.log.error('Error indexing documents.\nResponse Content:\n{}\n\n' | ||
| 'SDF:\n{}'.format(response.content, self.sdf)) | ||
| raise boto.exception.BotoServerError(self.response.status_code, '', | ||
| body=response.content) | ||
|
|
||
| self.status = self.content['status'] | ||
| if self.status == 'error': | ||
| self.errors = [e.get('message') for e in self.content.get('errors', | ||
| [])] | ||
| else: | ||
| self.errors = [] | ||
|
|
||
| self.adds = self.content['adds'] | ||
| self.deletes = self.content['deletes'] | ||
| self._check_num_ops('add', self.adds) | ||
| self._check_num_ops('delete', self.deletes) | ||
|
|
||
| def _check_num_ops(self, type_, response_num): | ||
| """Raise exception if number of ops in response doesn't match commit | ||
| :type type_: str | ||
| :param type_: Type of commit operation: 'add' or 'delete' | ||
| :type response_num: int | ||
| :param response_num: Number of adds or deletes in the response. | ||
| :raises: :class:`exfm.cloudsearch.SearchServiceException` | ||
| """ | ||
| commit_num = len([d for d in self.doc_service.documents_batch | ||
| if d['type'] == type_]) | ||
|
|
||
| if response_num != commit_num: | ||
| raise CommitMismatchError( | ||
| 'Incorrect number of {}s returned. Commit: {} Respose: {}'\ | ||
| .format(type_, commit_num, response_num)) |
| @@ -0,0 +1,52 @@ | ||
| # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | ||
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | ||
| # All Rights Reserved | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a | ||
| # copy of this software and associated documentation files (the | ||
| # "Software"), to deal in the Software without restriction, including | ||
| # without limitation the rights to use, copy, modify, merge, publish, dis- | ||
| # tribute, sublicense, and/or sell copies of the Software, and to permit | ||
| # persons to whom the Software is furnished to do so, subject to the fol- | ||
| # lowing conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included | ||
| # in all copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | ||
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| # IN THE SOFTWARE. | ||
| # | ||
|
|
||
| from .layer1 import Layer1 | ||
| from .domain import Domain | ||
|
|
||
|
|
||
| class Layer2(object): | ||
|
|
||
| def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, | ||
| is_secure=True, port=None, proxy=None, proxy_port=None, | ||
| host=None, debug=0, session_token=None, region=None): | ||
| self.layer1 = Layer1(aws_access_key_id, aws_secret_access_key, | ||
| is_secure, port, proxy, proxy_port, | ||
| host, debug, session_token, region) | ||
|
|
||
| def list_domains(self, domain_names=None): | ||
| """ | ||
| Return a list of :class:`boto.cloudsearch.domain.Domain` | ||
| objects for each domain defined in the current account. | ||
| """ | ||
| domain_data = self.layer1.describe_domains(domain_names) | ||
| return [Domain(self.layer1, data) for data in domain_data] | ||
|
|
||
| def create_domain(self, domain_name): | ||
| """ | ||
| Create a new CloudSearch domain and return the corresponding | ||
| :class:`boto.cloudsearch.domain.Domain` object. | ||
| """ | ||
| data = self.layer1.create_domain(domain_name) | ||
| return Domain(self.layer1, data) |
| @@ -0,0 +1,249 @@ | ||
| # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | ||
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | ||
| # All Rights Reserved | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a | ||
| # copy of this software and associated documentation files (the | ||
| # "Software"), to deal in the Software without restriction, including | ||
| # without limitation the rights to use, copy, modify, merge, publish, dis- | ||
| # tribute, sublicense, and/or sell copies of the Software, and to permit | ||
| # persons to whom the Software is furnished to do so, subject to the fol- | ||
| # lowing conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included | ||
| # in all copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | ||
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| # IN THE SOFTWARE. | ||
| # | ||
|
|
||
| try: | ||
| import simplejson as json | ||
| except ImportError: | ||
| import json | ||
|
|
||
| class OptionStatus(dict): | ||
| """ | ||
| Presents a combination of status field (defined below) which are | ||
| accessed as attributes and option values which are stored in the | ||
| native Python dictionary. In this class, the option values are | ||
| merged from a JSON object that is stored as the Option part of | ||
| the object. | ||
| :ivar domain_name: The name of the domain this option is associated with. | ||
| :ivar create_date: A timestamp for when this option was created. | ||
| :ivar state: The state of processing a change to an option. | ||
| Possible values: | ||
| * RequiresIndexDocuments: the option's latest value will not | ||
| be visible in searches until IndexDocuments has been called | ||
| and indexing is complete. | ||
| * Processing: the option's latest value is not yet visible in | ||
| all searches but is in the process of being activated. | ||
| * Active: the option's latest value is completely visible. | ||
| :ivar update_date: A timestamp for when this option was updated. | ||
| :ivar update_version: A unique integer that indicates when this | ||
| option was last updated. | ||
| """ | ||
|
|
||
| def __init__(self, domain, data=None, refresh_fn=None, save_fn=None): | ||
| self.domain = domain | ||
| self.refresh_fn = refresh_fn | ||
| self.save_fn = save_fn | ||
| self.refresh(data) | ||
|
|
||
| def _update_status(self, status): | ||
| self.creation_date = status['creation_date'] | ||
| self.status = status['state'] | ||
| self.update_date = status['update_date'] | ||
| self.update_version = int(status['update_version']) | ||
|
|
||
| def _update_options(self, options): | ||
| if options: | ||
| self.update(json.loads(options)) | ||
|
|
||
| def refresh(self, data=None): | ||
| """ | ||
| Refresh the local state of the object. You can either pass | ||
| new state data in as the parameter ``data`` or, if that parameter | ||
| is omitted, the state data will be retrieved from CloudSearch. | ||
| """ | ||
| if not data: | ||
| if self.refresh_fn: | ||
| data = self.refresh_fn(self.domain.name) | ||
| if data: | ||
| self._update_status(data['status']) | ||
| self._update_options(data['options']) | ||
|
|
||
| def to_json(self): | ||
| """ | ||
| Return the JSON representation of the options as a string. | ||
| """ | ||
| return json.dumps(self) | ||
|
|
||
| def startElement(self, name, attrs, connection): | ||
| return None | ||
|
|
||
| def endElement(self, name, value, connection): | ||
| if name == 'CreationDate': | ||
| self.created = value | ||
| elif name == 'State': | ||
| self.state = value | ||
| elif name == 'UpdateDate': | ||
| self.updated = value | ||
| elif name == 'UpdateVersion': | ||
| self.update_version = int(value) | ||
| elif name == 'Options': | ||
| self.update_from_json_doc(value) | ||
| else: | ||
| setattr(self, name, value) | ||
|
|
||
| def save(self): | ||
| """ | ||
| Write the current state of the local object back to the | ||
| CloudSearch service. | ||
| """ | ||
| if self.save_fn: | ||
| data = self.save_fn(self.domain.name, self.to_json()) | ||
| self.refresh(data) | ||
|
|
||
| def wait_for_state(self, state): | ||
| """ | ||
| Performs polling of CloudSearch to wait for the ``state`` | ||
| of this object to change to the provided state. | ||
| """ | ||
| while self.state != state: | ||
| time.sleep(5) | ||
| self.refresh() | ||
|
|
||
|
|
||
| class IndexFieldStatus(OptionStatus): | ||
|
|
||
| def _update_options(self, options): | ||
| self.update(options) | ||
|
|
||
| def save(self): | ||
| pass | ||
|
|
||
|
|
||
| class RankExpressionStatus(IndexFieldStatus): | ||
|
|
||
| pass | ||
|
|
||
| class ServicePoliciesStatus(OptionStatus): | ||
|
|
||
| def new_statement(self, arn, ip): | ||
| """ | ||
| Returns a new policy statement that will allow | ||
| access to the service described by ``arn`` by the | ||
| ip specified in ``ip``. | ||
| :type arn: string | ||
| :param arn: The Amazon Resource Notation identifier for the | ||
| service you wish to provide access to. This would be | ||
| either the search service or the document service. | ||
| :type ip: string | ||
| :param ip: An IP address or CIDR block you wish to grant access | ||
| to. | ||
| """ | ||
| return { | ||
| "Effect":"Allow", | ||
| "Action":"*", # Docs say use GET, but denies unless * | ||
| "Resource": arn, | ||
| "Condition": { | ||
| "IpAddress": { | ||
| "aws:SourceIp": [ip] | ||
| } | ||
| } | ||
| } | ||
|
|
||
| def _allow_ip(self, arn, ip): | ||
| if 'Statement' not in self: | ||
| s = self.new_statement(arn, ip) | ||
| self['Statement'] = [s] | ||
| self.save() | ||
| else: | ||
| add_statement = True | ||
| for statement in self['Statement']: | ||
| if statement['Resource'] == arn: | ||
| for condition_name in statement['Condition']: | ||
| if condition_name == 'IpAddress': | ||
| add_statement = False | ||
| condition = statement['Condition'][condition_name] | ||
| if ip not in condition['aws:SourceIp']: | ||
| condition['aws:SourceIp'].append(ip) | ||
|
|
||
| if add_statement: | ||
| s = self.new_statement(arn, ip) | ||
| self['Statement'].append(s) | ||
| self.save() | ||
|
|
||
| def allow_search_ip(self, ip): | ||
| """ | ||
| Add the provided ip address or CIDR block to the list of | ||
| allowable address for the search service. | ||
| :type ip: string | ||
| :param ip: An IP address or CIDR block you wish to grant access | ||
| to. | ||
| """ | ||
| arn = self.domain.search_service_arn | ||
| self._allow_ip(arn, ip) | ||
|
|
||
| def allow_doc_ip(self, ip): | ||
| """ | ||
| Add the provided ip address or CIDR block to the list of | ||
| allowable address for the document service. | ||
| :type ip: string | ||
| :param ip: An IP address or CIDR block you wish to grant access | ||
| to. | ||
| """ | ||
| arn = self.domain.doc_service_arn | ||
| self._allow_ip(arn, ip) | ||
|
|
||
| def _disallow_ip(self, arn, ip): | ||
| if 'Statement' not in self: | ||
| return | ||
| need_update = False | ||
| for statement in self['Statement']: | ||
| if statement['Resource'] == arn: | ||
| for condition_name in statement['Condition']: | ||
| if condition_name == 'IpAddress': | ||
| condition = statement['Condition'][condition_name] | ||
| if ip in condition['aws:SourceIp']: | ||
| condition['aws:SourceIp'].remove(ip) | ||
| need_update = True | ||
| if need_update: | ||
| self.save() | ||
|
|
||
| def disallow_search_ip(self, ip): | ||
| """ | ||
| Remove the provided ip address or CIDR block from the list of | ||
| allowable address for the search service. | ||
| :type ip: string | ||
| :param ip: An IP address or CIDR block you wish to grant access | ||
| to. | ||
| """ | ||
| arn = self.domain.search_service_arn | ||
| self._disallow_ip(arn, ip) | ||
|
|
||
| def disallow_doc_ip(self, ip): | ||
| """ | ||
| Remove the provided ip address or CIDR block from the list of | ||
| allowable address for the document service. | ||
| :type ip: string | ||
| :param ip: An IP address or CIDR block you wish to grant access | ||
| to. | ||
| """ | ||
| arn = self.domain.doc_service_arn | ||
| self._disallow_ip(arn, ip) |
| @@ -0,0 +1,298 @@ | ||
| # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | ||
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | ||
| # All Rights Reserved | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a | ||
| # copy of this software and associated documentation files (the | ||
| # "Software"), to deal in the Software without restriction, including | ||
| # without limitation the rights to use, copy, modify, merge, publish, dis- | ||
| # tribute, sublicense, and/or sell copies of the Software, and to permit | ||
| # persons to whom the Software is furnished to do so, subject to the fol- | ||
| # lowing conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included | ||
| # in all copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | ||
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| # IN THE SOFTWARE. | ||
| # | ||
| from math import ceil | ||
| import time | ||
| import json | ||
| import boto | ||
| import requests | ||
|
|
||
|
|
||
| class SearchServiceException(Exception): | ||
| pass | ||
|
|
||
|
|
||
| class CommitMismatchError(Exception): | ||
| pass | ||
|
|
||
|
|
||
| class SearchResults(object): | ||
|
|
||
| def __init__(self, **attrs): | ||
| self.rid = attrs['info']['rid'] | ||
| # self.doc_coverage_pct = attrs['info']['doc-coverage-pct'] | ||
| self.cpu_time_ms = attrs['info']['cpu-time-ms'] | ||
| self.time_ms = attrs['info']['time-ms'] | ||
| self.hits = attrs['hits']['found'] | ||
| self.docs = attrs['hits']['hit'] | ||
| self.start = attrs['hits']['start'] | ||
| self.rank = attrs['rank'] | ||
| self.match_expression = attrs['match-expr'] | ||
| self.query = attrs['query'] | ||
| self.search_service = attrs['search_service'] | ||
|
|
||
| self.num_pages_needed = ceil(self.hits / self.query.real_size) | ||
|
|
||
| def __len__(self): | ||
| return len(self.docs) | ||
|
|
||
| def __iter__(self): | ||
| return iter(self.docs) | ||
|
|
||
| def next_page(self): | ||
| """Call Cloudsearch to get the next page of search results | ||
| :rtype: :class:`exfm.cloudsearch.SearchResults` | ||
| :return: A cloudsearch SearchResults object | ||
| """ | ||
| if self.query.page <= self.num_pages_needed: | ||
| self.query.start += self.query.real_size | ||
| self.query.page += 1 | ||
| return self.search_service(self.query) | ||
| else: | ||
| raise StopIteration | ||
|
|
||
|
|
||
| class Query(object): | ||
|
|
||
| RESULTS_PER_PAGE = 500 | ||
|
|
||
| def __init__(self, q=None, bq=None, rank=None, | ||
| return_fields=None, size=10, | ||
| start=0, facet=None, facet_constraints=None, | ||
| facet_sort=None, facet_top_n=None, t=None): | ||
|
|
||
| self.q = q | ||
| self.bq = bq | ||
| self.rank = rank or [] | ||
| self.return_fields = return_fields or [] | ||
| self.start = start | ||
| self.facet = facet or [] | ||
| self.facet_constraints = facet_constraints or {} | ||
| self.facet_sort = facet_sort or {} | ||
| self.facet_top_n = facet_top_n or {} | ||
| self.t = t or {} | ||
| self.page = 0 | ||
| self.update_size(size) | ||
|
|
||
| def update_size(self, new_size): | ||
| self.size = new_size | ||
| self.real_size = Query.RESULTS_PER_PAGE if (self.size > | ||
| Query.RESULTS_PER_PAGE or self.size == 0) else self.size | ||
|
|
||
| def to_params(self): | ||
| """Transform search parameters from instance properties to a dictionary | ||
| :rtype: dict | ||
| :return: search parameters | ||
| """ | ||
| params = {'start': self.start, 'size': self.real_size} | ||
|
|
||
| if self.q: | ||
| params['q'] = self.q | ||
|
|
||
| if self.bq: | ||
| params['bq'] = self.bq | ||
|
|
||
| if self.rank: | ||
| params['rank'] = ','.join(self.rank) | ||
|
|
||
| if self.return_fields: | ||
| params['return-fields'] = ','.join(self.return_fields) | ||
|
|
||
| if self.facet: | ||
| params['facet'] = ','.join(self.facet) | ||
|
|
||
| if self.facet_constraints: | ||
| for k, v in self.facet_constraints.iteritems(): | ||
| params['facet-%s-constraints' % k] = v | ||
|
|
||
| if self.facet_sort: | ||
| for k, v in self.facet_sort.iteritems(): | ||
| params['facet-%s-sort' % k] = v | ||
|
|
||
| if self.facet_top_n: | ||
| for k, v in self.facet_top_n.iteritems(): | ||
| params['facet-%s-top-n' % k] = v | ||
|
|
||
| if self.t: | ||
| for k, v in self.t.iteritems(): | ||
| params['t-%s' % k] = v | ||
| return params | ||
|
|
||
|
|
||
| class SearchConnection(object): | ||
|
|
||
| def __init__(self, domain=None, endpoint=None): | ||
| self.domain = domain | ||
| self.endpoint = endpoint | ||
| if not endpoint: | ||
| self.endpoint = domain.search_service_endpoint | ||
|
|
||
| def build_query(self, q=None, bq=None, rank=None, return_fields=None, | ||
| size=10, start=0, facet=None, facet_constraints=None, | ||
| facet_sort=None, facet_top_n=None, t=None): | ||
| return Query(q=q, bq=bq, rank=rank, return_fields=return_fields, | ||
| size=size, start=start, facet=facet, | ||
| facet_constraints=facet_constraints, | ||
| facet_sort=facet_sort, facet_top_n=facet_top_n, t=t) | ||
|
|
||
| def search(self, q=None, bq=None, rank=None, return_fields=None, | ||
| size=10, start=0, facet=None, facet_constraints=None, | ||
| facet_sort=None, facet_top_n=None, t=None): | ||
| """ | ||
| Query Cloudsearch | ||
| :type q: | ||
| :param q: | ||
| :type bq: | ||
| :param bq: | ||
| :type rank: | ||
| :param rank: | ||
| :type return_fields: | ||
| :param return_fields: | ||
| :type size: | ||
| :param size: | ||
| :type start: | ||
| :param start: | ||
| :type facet: | ||
| :param facet: | ||
| :type facet_constraints: | ||
| :param facet_constraints: | ||
| :type facet_sort: | ||
| :param facet_sort: | ||
| :type facet_top_n: | ||
| :param facet_top_n: | ||
| :type t: | ||
| :param t: | ||
| :rtype: :class:`exfm.cloudsearch.SearchResults` | ||
| :return: A cloudsearch SearchResults object | ||
| """ | ||
|
|
||
| query = self.build_query(q=q, bq=bq, rank=rank, | ||
| return_fields=return_fields, | ||
| size=size, start=start, facet=facet, | ||
| facet_constraints=facet_constraints, | ||
| facet_sort=facet_sort, | ||
| facet_top_n=facet_top_n, t=t) | ||
| return self(query) | ||
|
|
||
| def __call__(self, query): | ||
| """Make a call to CloudSearch | ||
| :type query: :class:`exfm.cloudsearch.Query` | ||
| :param query: A fully specified Query instance | ||
| :rtype: :class:`exfm.cloudsearch.SearchResults` | ||
| :return: A cloudsearch SearchResults object | ||
| """ | ||
| url = "http://%s/2011-02-01/search" % (self.endpoint) | ||
| params = query.to_params() | ||
|
|
||
| r = requests.get(url, params=params) | ||
| data = json.loads(r.content) | ||
| data['query'] = query | ||
| data['search_service'] = self | ||
|
|
||
| if 'messages' in data and 'error' in data: | ||
| for m in data['messages']: | ||
| if m['severity'] == 'fatal': | ||
| raise SearchServiceException("Error processing search %s " | ||
| "=> %s" % (params, m['message']), query) | ||
| elif 'error' in data: | ||
| raise SearchServiceException("Unknown error processing search %s" | ||
| % (params), query) | ||
|
|
||
| return SearchResults(**data) | ||
|
|
||
| def get_all_paged(self, query, per_page): | ||
| """Get a generator to iterate over all pages of search results | ||
| :type query: :class:`exfm.cloudsearch.Query` | ||
| :param query: A fully specified Query instance | ||
| :type per_page: int | ||
| :param per_page: Number of docs in each SearchResults object. | ||
| :rtype: generator | ||
| :return: Generator containing :class:`exfm.cloudsearch.SearchResults` | ||
| """ | ||
| query.update_size(per_page) | ||
| page = 0 | ||
| num_pages_needed = 0 | ||
| while page <= num_pages_needed: | ||
| results = self(query) | ||
| num_pages_needed = results.num_pages_needed | ||
| yield results | ||
| query.start += query.real_size | ||
| page += 1 | ||
|
|
||
| def get_all_hits(self, query): | ||
| """Get a generator to iterate over all search results | ||
| Transparently handles the results paging from Cloudsearch | ||
| search results so even if you have many thousands of results | ||
| you can iterate over all results in a reasonably efficient | ||
| manner. | ||
| :type query: :class:`exfm.cloudsearch.Query` | ||
| :param query: A fully specified Query instance | ||
| :rtype: generator | ||
| :return: All docs matching query | ||
| """ | ||
| page = 0 | ||
| num_pages_needed = 0 | ||
| while page <= num_pages_needed: | ||
| results = self(query) | ||
| num_pages_needed = results.num_pages_needed | ||
| for doc in results: | ||
| yield doc | ||
| query.start += query.real_size | ||
| page += 1 | ||
|
|
||
| def get_num_hits(self, query): | ||
| """Return the total number of hits for query | ||
| :type query: :class:`exfm.cloudsearch.Query` | ||
| :param query: A fully specified Query instance | ||
| :rtype: int | ||
| :return: Total number of hits for query | ||
| """ | ||
| query.update_size(1) | ||
| return self(query).hits | ||
|
|
||
|
|
||
|
|
| @@ -0,0 +1,75 @@ | ||
| # Copyright (c) 202 Mitch Garnaat http://garnaat.org/ | ||
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | ||
| # All Rights Reserved | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a | ||
| # copy of this software and associated documentation files (the | ||
| # "Software"), to deal in the Software without restriction, including | ||
| # without limitation the rights to use, copy, modify, merge, publish, dis- | ||
| # tribute, sublicense, and/or sell copies of the Software, and to permit | ||
| # persons to whom the Software is furnished to do so, subject to the fol- | ||
| # lowing conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included | ||
| # in all copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | ||
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| # IN THE SOFTWARE. | ||
|
|
||
| class SourceAttribute(object): | ||
| """ | ||
| Provide information about attributes for an index field. | ||
| A maximum of 20 source attributes can be configured for | ||
| each index field. | ||
| :ivar default: Optional default value if the source attribute | ||
| is not specified in a document. | ||
| :ivar name: The name of the document source field to add | ||
| to this ``IndexField``. | ||
| :ivar data_function: Identifies the transformation to apply | ||
| when copying data from a source attribute. | ||
| :ivar data_map: The value is a dict with the following keys: | ||
| * cases - A dict that translates source field values | ||
| to custom values. | ||
| * default - An optional default value to use if the | ||
| source attribute is not specified in a document. | ||
| * name - the name of the document source field to add | ||
| to this ``IndexField`` | ||
| :ivar data_trim_title: Trims common title words from a source | ||
| document attribute when populating an ``IndexField``. | ||
| This can be used to create an ``IndexField`` you can | ||
| use for sorting. The value is a dict with the following | ||
| fields: | ||
| * default - An optional default value. | ||
| * language - an IETF RFC 4646 language code. | ||
| * separator - The separator that follows the text to trim. | ||
| * name - The name of the document source field to add. | ||
| """ | ||
|
|
||
| ValidDataFunctions = ('Copy', 'TrimTitle', 'Map') | ||
|
|
||
| def __init__(self): | ||
| self.data_copy = {} | ||
| self._data_function = self.ValidDataFunctions[0] | ||
| self.data_map = {} | ||
| self.data_trim_title = {} | ||
|
|
||
| @property | ||
| def data_function(self): | ||
| return self._data_function | ||
|
|
||
| @data_function.setter | ||
| def data_function(self, value): | ||
| if value not in self.ValidDataFunctions: | ||
| valid = '|'.join(self.ValidDataFunctions) | ||
| raise ValueError('data_function must be one of: %s' % valid) | ||
| self._data_function = value | ||
|
|