Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String Arithmetics: __add__ ops #68

Merged
merged 5 commits into from
Nov 27, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 144 additions & 92 deletions eland/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,100 +895,156 @@ def _resolve_arithmetic_op_fields(self, item, query_params, post_processing):
right_field = item[1][1][1][1]

# https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-api-reference-shared-java-lang.html#painless-api-reference-shared-Math
if isinstance(left_field, str) and isinstance(right_field, str):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value / doc[right_field].value"
}
if not field_name.endswith("||str") and not field_name.startswith("str||"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of the check on field_name, it would be neater to add an 'op_type' to the task. i.e.
# task = ('arithmetic_op_fields', (field_name, (op_name, (left_field, right_field)))) is
# task = ('arithmetic_op_fields', (field_name, (op_name, op_type)(left_field, right_field)))
or similar. Then op_type could be compared rather than field_name string match (magic match..)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Soon we should also refactor operations.py so tasks are subclass of a task class (using Bridge pattern or other). For now, if op_type is added to the arithmetic_op_fields task object, it will move this forward.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree that this could use a refactor. The task items will quickly become unmanageable as we add more ops. We should consider using dicts or named tuples as items.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

implemented in 5307850 for string types

if isinstance(left_field, str) and isinstance(right_field, str):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value / doc[right_field].value"
}
}
}
}
"""
if op_name == '__add__':
source = "doc['{0}'].value + doc['{1}'].value".format(left_field, right_field)
elif op_name == '__truediv__':
source = "doc['{0}'].value / doc['{1}'].value".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor(doc['{0}'].value / doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow(doc['{0}'].value, doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__mod__':
source = "doc['{0}'].value % doc['{1}'].value".format(left_field, right_field)
elif op_name == '__mul__':
source = "doc['{0}'].value * doc['{1}'].value".format(left_field, right_field)
elif op_name == '__sub__':
source = "doc['{0}'].value - doc['{1}'].value".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
"""
if op_name == '__add__':
source = "doc['{0}'].value + doc['{1}'].value".format(left_field, right_field)
elif op_name == '__truediv__':
source = "doc['{0}'].value / doc['{1}'].value".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor(doc['{0}'].value / doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow(doc['{0}'].value, doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__mod__':
source = "doc['{0}'].value % doc['{1}'].value".format(left_field, right_field)
elif op_name == '__mul__':
source = "doc['{0}'].value * doc['{1}'].value".format(left_field, right_field)
elif op_name == '__sub__':
source = "doc['{0}'].value - doc['{1}'].value".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))

if query_params['query_script_fields'] is None:
query_params['query_script_fields'] = {}
query_params['query_script_fields'][field_name] = {
'script': {
'source': source
if query_params['query_script_fields'] is None:
query_params['query_script_fields'] = {}
query_params['query_script_fields'][field_name] = {
'script': {
'source': source
}
}
}
elif isinstance(left_field, str) and np.issubdtype(np.dtype(type(right_field)), np.number):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value / right_field"
}
elif isinstance(left_field, str) and np.issubdtype(np.dtype(type(right_field)), np.number):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value / right_field"
}
}
}
}
"""
if op_name == '__add__':
source = "doc['{0}'].value + {1}".format(left_field, right_field)
elif op_name == '__truediv__':
source = "doc['{0}'].value / {1}".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor(doc['{0}'].value / {1})".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow(doc['{0}'].value, {1})".format(left_field, right_field)
elif op_name == '__mod__':
source = "doc['{0}'].value % {1}".format(left_field, right_field)
elif op_name == '__mul__':
source = "doc['{0}'].value * {1}".format(left_field, right_field)
elif op_name == '__sub__':
source = "doc['{0}'].value - {1}".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
elif np.issubdtype(np.dtype(type(left_field)), np.number) and isinstance(right_field, str):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "left_field / doc['right_field'].value"
}
"""
if op_name == '__add__':
source = "doc['{0}'].value + {1}".format(left_field, right_field)
elif op_name == '__truediv__':
source = "doc['{0}'].value / {1}".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor(doc['{0}'].value / {1})".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow(doc['{0}'].value, {1})".format(left_field, right_field)
elif op_name == '__mod__':
source = "doc['{0}'].value % {1}".format(left_field, right_field)
elif op_name == '__mul__':
source = "doc['{0}'].value * {1}".format(left_field, right_field)
elif op_name == '__sub__':
source = "doc['{0}'].value - {1}".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
elif np.issubdtype(np.dtype(type(left_field)), np.number) and isinstance(right_field, str):
"""
(if op_name = '__truediv__')

"script_fields": {
"field_name": {
"script": {
"source": "left_field / doc['right_field'].value"
}
}
}
}
"""
if op_name == '__add__':
source = "{0} + doc['{1}'].value".format(left_field, right_field)
elif op_name == '__truediv__':
source = "{0} / doc['{1}'].value".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor({0} / doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow({0}, doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__mod__':
source = "{0} % doc['{1}'].value".format(left_field, right_field)
elif op_name == '__mul__':
source = "{0} * doc['{1}'].value".format(left_field, right_field)
elif op_name == '__sub__':
source = "{0} - doc['{1}'].value".format(left_field, right_field)
"""
if op_name == '__add__':
source = "{0} + doc['{1}'].value".format(left_field, right_field)
elif op_name == '__truediv__':
source = "{0} / doc['{1}'].value".format(left_field, right_field)
elif op_name == '__floordiv__':
source = "Math.floor({0} / doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__pow__':
source = "Math.pow({0}, doc['{1}'].value)".format(left_field, right_field)
elif op_name == '__mod__':
source = "{0} % doc['{1}'].value".format(left_field, right_field)
elif op_name == '__mul__':
source = "{0} * doc['{1}'].value".format(left_field, right_field)
elif op_name == '__sub__':
source = "{0} - doc['{1}'].value".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))

else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
else:
raise TypeError("Types for operation inconsistent {} {} {}", type(left_field), type(right_field), op_name)
raise TypeError("Types for operation inconsistent {} {} {}", type(left_field), type(right_field), op_name)

elif field_name.startswith("str||") and field_name.endswith("||str"):
if isinstance(left_field, str) and isinstance(right_field, str):
"""
(if op_name = '__add__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value + doc[right_field].value"
}
}
}
"""
if op_name == '__add__':
source = "doc['{0}'].value + doc['{1}'].value".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))

elif field_name.endswith("||str"):
if isinstance(left_field, str) and isinstance(right_field, str):
"""
(if op_name = '__add__')

"script_fields": {
"field_name": {
"script": {
"source": "doc[left_field].value + right_field"
}
}
}
"""
if op_name == '__add__':
source = "doc['{0}'].value + '{1}'".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))

elif field_name.startswith("str||"):
if isinstance(left_field, str) and isinstance(right_field, str):
"""
(if op_name = '__add__')

"script_fields": {
"field_name": {
"script": {
"source": "left_field + doc[right_field].value"
}
}
}
"""
if op_name == '__add__':
source = "'{0}' + doc['{1}'].value".format(left_field, right_field)
else:
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))

if query_params['query_script_fields'] is None:
query_params['query_script_fields'] = {}
Expand All @@ -1000,15 +1056,13 @@ def _resolve_arithmetic_op_fields(self, item, query_params, post_processing):

return query_params, post_processing


def _resolve_post_processing_task(self, item, query_params, post_processing):
# Just do this in post-processing
if item[0] != 'field_names':
post_processing.append(item)

return query_params, post_processing


def _size(self, query_params, post_processing):
# Shrink wrap code around checking if size parameter is set
size = query_params['query_size'] # can be None
Expand All @@ -1023,7 +1077,6 @@ def _size(self, query_params, post_processing):
# This can return None
return size


def info_es(self, buf):
buf.write("Operations:\n")
buf.write(" tasks: {0}\n".format(self._tasks))
Expand All @@ -1044,7 +1097,6 @@ def info_es(self, buf):
buf.write(" body: {0}\n".format(body))
buf.write(" post_processing: {0}\n".format(post_processing))


def update_query(self, boolean_filter):
task = ('boolean_filter', boolean_filter)
self._tasks.append(task)
34 changes: 34 additions & 0 deletions eland/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,40 @@ def check_arithmetics(self, right):
"{0} != {1}".format(self._index_pattern, right._index_pattern)
)

def check_str_arithmetics(self, right, self_field, right_field):
"""
In the case of string arithmetics, we need an additional check to ensure that the
selected fields are aggregatable.

Parameters
----------
right: ElandQueryCompiler
The query compiler to compare self to

Raises
------
TypeError, ValueError
If string arithmetic operations aren't possible
"""

# only check compatibility if right is an ElandQueryCompiler
# else return the raw string as the new field name
right_agg = {right_field: right_field}
if right:
self.check_arithmetics(right)
right_agg = right._mappings.aggregatable_field_names([right_field])

self_agg = self._mappings.aggregatable_field_names([self_field])

if self_agg and right_agg:
return list(self_agg.keys())[0], list(right_agg.keys())[0]

else:
raise ValueError(
"Can not perform arithmetic operations on non aggregatable fields"
"One of [{}, {}] is not aggregatable.".format(self.name, right.name)
)

def arithmetic_op_fields(self, new_field_name, op, left_field, right_field):
result = self.copy()

Expand Down
Loading