Skip to content

Commit

Permalink
Merge pull request #138 from georgia-tech-db/short-circuiting
Browse files Browse the repository at this point in the history
Implemented short-circuiting for logical expressions
  • Loading branch information
gaurav274 committed Mar 5, 2021
2 parents d4cdde0 + 95db64b commit ff040b9
Show file tree
Hide file tree
Showing 12 changed files with 210 additions and 36 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,9 @@ evaql_parserVisitor.py
.vscode/*

/api-docs/_autosummary/

# Conda
miniconda.sh

# Datasets
eva_datasets/
2 changes: 1 addition & 1 deletion src/expression/abstract_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def return_type(self, return_type: ExpressionReturnType):
# how about if we maintain *args
# refactor if need be
@abstractmethod
def evaluate(self, *args):
def evaluate(self, *args, **kwargs):
NotImplementedError('Must be implemented in subclasses.')

def __eq__(self, other):
Expand Down
4 changes: 2 additions & 2 deletions src/expression/aggregation_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.INTEGER,
children=children) # can also be a float

def evaluate(self, *args):
batch = self.get_child(0).evaluate(*args)
def evaluate(self, *args, **kwargs):
batch = self.get_child(0).evaluate(*args, **kwargs)
if self.etype == ExpressionType.AGGREGATION_SUM:
return Batch(frames=batch.frames.agg(['sum']))
elif self.etype == ExpressionType.AGGREGATION_COUNT:
Expand Down
6 changes: 3 additions & 3 deletions src/expression/arithmetic_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.FLOAT,
children=children)

def evaluate(self, *args):
vl = self.get_child(0).evaluate(*args).frames
vr = self.get_child(1).evaluate(*args).frames
def evaluate(self, *args, **kwargs):
vl = self.get_child(0).evaluate(*args, **kwargs).frames
vr = self.get_child(1).evaluate(*args, **kwargs).frames

if self.etype == ExpressionType.ARITHMETIC_ADD:
return Batch(pd.DataFrame(vl + vr))
Expand Down
6 changes: 3 additions & 3 deletions src/expression/comparison_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
children=children)

def evaluate(self, *args):
def evaluate(self, *args, **kwargs):
# evaluate always return batch
left_values = self.get_child(0).evaluate(*args).frames
right_values = self.get_child(1).evaluate(*args).frames
left_values = self.get_child(0).evaluate(*args, **kwargs).frames
right_values = self.get_child(1).evaluate(*args, **kwargs).frames

if len(left_values) != len(right_values):
if len(left_values) == 1:
Expand Down
2 changes: 1 addition & 1 deletion src/expression/constant_value_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, value):
super().__init__(ExpressionType.CONSTANT_VALUE)
self._value = value

def evaluate(self, *args):
def evaluate(self, *args, **kwargs):
return Batch(pd.DataFrame([self._value]))

@property
Expand Down
5 changes: 3 additions & 2 deletions src/expression/function_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@ def function(self):
def function(self, func: Callable):
self._function = func

def evaluate(self, batch: Batch):
def evaluate(self, batch: Batch, **kwargs):
new_batch = batch
child_batches = [child.evaluate(batch) for child in self.children]
child_batches = \
[child.evaluate(batch, **kwargs) for child in self.children]
if len(child_batches):
new_batch = Batch.merge_column_wise(child_batches)

Expand Down
21 changes: 13 additions & 8 deletions src/expression/logical_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,23 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
children=children)

def evaluate(self, *args):
def evaluate(self, *args, **kwargs):
if self.get_children_count() == 2:
left_values = self.get_child(0).evaluate(*args).frames
right_values = self.get_child(1).evaluate(*args).frames
left_values = self.get_child(0).evaluate(*args, **kwargs).frames
if self.etype == ExpressionType.LOGICAL_AND:
return Batch(pd.DataFrame(left_values & right_values))
if (~left_values).all().bool(): # check if all are false
return Batch(left_values)
kwargs["mask"] = left_values[left_values[0]].index.tolist()
elif self.etype == ExpressionType.LOGICAL_OR:
return Batch(pd.DataFrame(left_values | right_values))

if left_values.all().bool(): # check if all are true
return Batch(left_values)
kwargs["mask"] = left_values[~left_values[0]].index.tolist()
right_values = self.get_child(
1).evaluate(*args, **kwargs).frames
left_values.iloc[kwargs["mask"]] = right_values
return Batch(pd.DataFrame(left_values))
else:
values = self.get_child(0).evaluate(*args).frames

values = self.get_child(0).evaluate(*args, **kwargs).frames
if self.etype == ExpressionType.LOGICAL_NOT:
return Batch(pd.DataFrame(~values))

Expand Down
8 changes: 3 additions & 5 deletions src/expression/tuple_value_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,9 @@ def col_object(self) -> DataFrameColumn:
def col_object(self, value: DataFrameColumn):
self._col_object = value

def evaluate(self, batch: Batch, *args):
if args is None:
# error Handling
pass

def evaluate(self, batch: Batch, *args, **kwargs):
if "mask" in kwargs:
batch = batch[kwargs["mask"]]
return batch.project([self.col_name])

def __eq__(self, other):
Expand Down
20 changes: 11 additions & 9 deletions src/models/storage/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,15 @@ def __str__(self):
def __eq__(self, other: 'Batch'):
return self.frames.equals(other.frames)

def _get_frames_from_indices(self, required_frame_ids):
new_frames = self.frames.iloc[required_frame_ids, :]
new_batch = Batch(new_frames)
return new_batch

def __getitem__(self, indices) -> 'Batch':
"""
Takes as input the slice for the list
Arguments:
item (list or Slice):
Returns a batch with the desired frames
:return:
Arguments:
indices (list, slice or mask): list must be
a list of indices; mask is boolean array-like
(i.e. list, NumPy array, DataFrame, etc.)
of appropriate size with True for desired frames.
"""
if isinstance(indices, list):
return self._get_frames_from_indices(indices)
Expand All @@ -127,6 +124,11 @@ def __getitem__(self, indices) -> 'Batch':
step = indices.step if indices.step else 1
return self._get_frames_from_indices(range(start, end, step))

def _get_frames_from_indices(self, required_frame_ids):
new_frames = self.frames.iloc[required_frame_ids, :]
new_batch = Batch(new_frames)
return new_batch

def sort(self, by=None):
"""
in_place sort
Expand Down
118 changes: 116 additions & 2 deletions test/expression/test_logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import pandas as pd
from mock import Mock

from src.expression.abstract_expression import ExpressionType
from src.expression.comparison_expression import ComparisonExpression
from src.expression.logical_expression import LogicalExpression
from src.expression.constant_value_expression import ConstantValueExpression
from src.expression.tuple_value_expression import TupleValueExpression
from src.models.storage.batch import Batch


class LogicalExpressionsTest(unittest.TestCase):
Expand Down Expand Up @@ -71,7 +75,9 @@ def test_logical_or(self):
comparison_expression_right
)
self.assertEqual(
[True], logical_expr.evaluate(None).frames[0].tolist())
[True],
logical_expr.evaluate(None).frames[0].tolist()
)

def test_logical_not(self):
const_exp1 = ConstantValueExpression(0)
Expand All @@ -88,4 +94,112 @@ def test_logical_not(self):
comparison_expression_right
)
self.assertEqual(
[True], logical_expr.evaluate(None).frames[0].tolist())
[True],
logical_expr.evaluate(None).frames[0].tolist()
)

def test_short_circuiting_and_complete(self):
# tests whether right-hand side is bypassed completely with and
tup_val_exp_l = TupleValueExpression(col_name=0)
tup_val_exp_r = TupleValueExpression(col_name=1)

comp_exp_l = ComparisonExpression(
ExpressionType.COMPARE_EQUAL,
tup_val_exp_l,
tup_val_exp_r
)
comp_exp_r = Mock(spec=ComparisonExpression)

logical_exp = LogicalExpression(
ExpressionType.LOGICAL_AND,
comp_exp_l,
comp_exp_r
)

tuples = Batch(pd.DataFrame(
{0: [1, 2, 3], 1: [4, 5, 6]}))
self.assertEqual(
[False, False, False],
logical_exp.evaluate(tuples).frames[0].tolist()
)
comp_exp_r.evaluate.assert_not_called()

def test_short_circuiting_or_complete(self):
# tests whether right-hand side is bypassed completely with or
tup_val_exp_l = TupleValueExpression(col_name=0)
tup_val_exp_r = TupleValueExpression(col_name=1)

comp_exp_l = ComparisonExpression(
ExpressionType.COMPARE_EQUAL,
tup_val_exp_l,
tup_val_exp_r
)
comp_exp_r = Mock(spec=ComparisonExpression)

logical_exp = LogicalExpression(
ExpressionType.LOGICAL_OR,
comp_exp_l,
comp_exp_r
)

tuples = Batch(pd.DataFrame(
{0: [1, 2, 3], 1: [1, 2, 3]}))
self.assertEqual(
[True, True, True],
logical_exp.evaluate(tuples).frames[0].tolist()
)
comp_exp_r.evaluate.assert_not_called()

def test_short_circuiting_and_partial(self):
# tests whether right-hand side is partially executed with and
tup_val_exp_l = TupleValueExpression(col_name=0)
tup_val_exp_r = TupleValueExpression(col_name=1)

comp_exp_l = ComparisonExpression(
ExpressionType.COMPARE_EQUAL,
tup_val_exp_l,
tup_val_exp_r
)
comp_exp_r = Mock(spec=ComparisonExpression)
comp_exp_r.evaluate = Mock(return_value=Mock(frames=[[True], [False]]))

logical_exp = LogicalExpression(
ExpressionType.LOGICAL_AND,
comp_exp_l,
comp_exp_r
)

tuples = Batch(pd.DataFrame(
{0: [1, 2, 3, 4], 1: [1, 2, 5, 6]}))
self.assertEqual(
[True, False, False, False],
logical_exp.evaluate(tuples).frames[0].tolist()
)
comp_exp_r.evaluate.assert_called_once_with(tuples, mask=[0, 1])

def test_short_circuiting_or_partial(self):
# tests whether right-hand side is partially executed with or
tup_val_exp_l = TupleValueExpression(col_name=0)
tup_val_exp_r = TupleValueExpression(col_name=1)

comp_exp_l = ComparisonExpression(
ExpressionType.COMPARE_EQUAL,
tup_val_exp_l,
tup_val_exp_r
)
comp_exp_r = Mock(spec=ComparisonExpression)
comp_exp_r.evaluate = Mock(return_value=Mock(frames=[[True], [False]]))

logical_exp = LogicalExpression(
ExpressionType.LOGICAL_OR,
comp_exp_l,
comp_exp_r
)

tuples = Batch(pd.DataFrame(
{0: [1, 2, 3, 4], 1: [5, 6, 3, 4]}))
self.assertEqual(
[True, False, True, True],
logical_exp.evaluate(tuples).frames[0].tolist()
)
comp_exp_r.evaluate.assert_called_once_with(tuples, mask=[0, 1])
48 changes: 48 additions & 0 deletions test/expression/test_tuple_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# coding=utf-8
# Copyright 2018-2020 EVA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import pandas as pd

from src.expression.tuple_value_expression import TupleValueExpression
from src.models.storage.batch import Batch


class TupleValueExpressionsTest(unittest.TestCase):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def test_masking(self):
tup_val_exp1 = TupleValueExpression(col_name=0)
tup_val_exp2 = TupleValueExpression(col_name=1)
tup_val_exp3 = TupleValueExpression(col_name=2)
tuples = Batch(pd.DataFrame({
0: [1, 2, 3, 4, 5, 6],
1: [7, 8, 9, 10, 11, 12],
2: [13, 14, 15, 16, 17, 18]
}))
mask1 = [0, 1, 2, 3, 4, 5]
self.assertEqual(
[1, 2, 3, 4, 5, 6],
tup_val_exp1.evaluate(tuples, mask=mask1).frames[0].tolist()
)
self.assertEqual(
[7, 9, 11],
tup_val_exp2.evaluate(tuples, mask=[0, 2, 4]).frames[1].tolist()
)
self.assertEqual(
[],
tup_val_exp3.evaluate(tuples, mask=[]).frames[2].tolist()
)

0 comments on commit ff040b9

Please sign in to comment.