Merge pull request #138 from georgia-tech-db/short-circuiting

Implemented short-circuiting for logical expressions
georgia-tech-db · Mar 5, 2021 · ff040b9 · ff040b9
2 parents d4cdde0 + 95db64b
commit ff040b9
Show file tree

Hide file tree

Showing 12 changed files with 210 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -148,3 +148,9 @@ evaql_parserVisitor.py
 .vscode/*
 
 /api-docs/_autosummary/
+
+# Conda
+miniconda.sh
+
+# Datasets
+eva_datasets/
diff --git a/src/expression/abstract_expression.py b/src/expression/abstract_expression.py
@@ -124,7 +124,7 @@ def return_type(self, return_type: ExpressionReturnType):
     # how about if we maintain *args
     # refactor if need be
     @abstractmethod
-    def evaluate(self, *args):
+    def evaluate(self, *args, **kwargs):
         NotImplementedError('Must be implemented in subclasses.')
 
     def __eq__(self, other):

diff --git a/src/expression/aggregation_expression.py b/src/expression/aggregation_expression.py
@@ -30,8 +30,8 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
         super().__init__(exp_type, rtype=ExpressionReturnType.INTEGER,
                          children=children)  # can also be a float
 
-    def evaluate(self, *args):
-        batch = self.get_child(0).evaluate(*args)
+    def evaluate(self, *args, **kwargs):
+        batch = self.get_child(0).evaluate(*args, **kwargs)
         if self.etype == ExpressionType.AGGREGATION_SUM:
             return Batch(frames=batch.frames.agg(['sum']))
         elif self.etype == ExpressionType.AGGREGATION_COUNT:

diff --git a/src/expression/arithmetic_expression.py b/src/expression/arithmetic_expression.py
@@ -30,9 +30,9 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
         super().__init__(exp_type, rtype=ExpressionReturnType.FLOAT,
                          children=children)
 
-    def evaluate(self, *args):
-        vl = self.get_child(0).evaluate(*args).frames
-        vr = self.get_child(1).evaluate(*args).frames
+    def evaluate(self, *args, **kwargs):
+        vl = self.get_child(0).evaluate(*args, **kwargs).frames
+        vr = self.get_child(1).evaluate(*args, **kwargs).frames
 
         if self.etype == ExpressionType.ARITHMETIC_ADD:
             return Batch(pd.DataFrame(vl + vr))

diff --git a/src/expression/comparison_expression.py b/src/expression/comparison_expression.py
@@ -31,10 +31,10 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
         super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
                          children=children)
 
-    def evaluate(self, *args):
+    def evaluate(self, *args, **kwargs):
         # evaluate always return batch
-        left_values = self.get_child(0).evaluate(*args).frames
-        right_values = self.get_child(1).evaluate(*args).frames
+        left_values = self.get_child(0).evaluate(*args, **kwargs).frames
+        right_values = self.get_child(1).evaluate(*args, **kwargs).frames
 
         if len(left_values) != len(right_values):
             if len(left_values) == 1:

diff --git a/src/expression/constant_value_expression.py b/src/expression/constant_value_expression.py
@@ -27,7 +27,7 @@ def __init__(self, value):
         super().__init__(ExpressionType.CONSTANT_VALUE)
         self._value = value
 
-    def evaluate(self, *args):
+    def evaluate(self, *args, **kwargs):
         return Batch(pd.DataFrame([self._value]))
 
     @property

diff --git a/src/expression/function_expression.py b/src/expression/function_expression.py
@@ -95,9 +95,10 @@ def function(self):
     def function(self, func: Callable):
         self._function = func
 
-    def evaluate(self, batch: Batch):
+    def evaluate(self, batch: Batch, **kwargs):
         new_batch = batch
-        child_batches = [child.evaluate(batch) for child in self.children]
+        child_batches = \
+            [child.evaluate(batch, **kwargs) for child in self.children]
         if len(child_batches):
             new_batch = Batch.merge_column_wise(child_batches)
 

diff --git a/src/expression/logical_expression.py b/src/expression/logical_expression.py
@@ -30,18 +30,23 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
         super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
                          children=children)
 
-    def evaluate(self, *args):
+    def evaluate(self, *args, **kwargs):
         if self.get_children_count() == 2:
-            left_values = self.get_child(0).evaluate(*args).frames
-            right_values = self.get_child(1).evaluate(*args).frames
+            left_values = self.get_child(0).evaluate(*args, **kwargs).frames
             if self.etype == ExpressionType.LOGICAL_AND:
-                return Batch(pd.DataFrame(left_values & right_values))
+                if (~left_values).all().bool():  # check if all are false
+                    return Batch(left_values)
+                kwargs["mask"] = left_values[left_values[0]].index.tolist()
             elif self.etype == ExpressionType.LOGICAL_OR:
-                return Batch(pd.DataFrame(left_values | right_values))
-
+                if left_values.all().bool():  # check if all are true
+                    return Batch(left_values)
+                kwargs["mask"] = left_values[~left_values[0]].index.tolist()
+            right_values = self.get_child(
+                1).evaluate(*args, **kwargs).frames
+            left_values.iloc[kwargs["mask"]] = right_values
+            return Batch(pd.DataFrame(left_values))
         else:
-            values = self.get_child(0).evaluate(*args).frames
-
+            values = self.get_child(0).evaluate(*args, **kwargs).frames
             if self.etype == ExpressionType.LOGICAL_NOT:
                 return Batch(pd.DataFrame(~values))
 

diff --git a/src/expression/tuple_value_expression.py b/src/expression/tuple_value_expression.py
@@ -66,11 +66,9 @@ def col_object(self) -> DataFrameColumn:
     def col_object(self, value: DataFrameColumn):
         self._col_object = value
 
-    def evaluate(self, batch: Batch, *args):
-        if args is None:
-            # error Handling
-            pass
-
+    def evaluate(self, batch: Batch, *args, **kwargs):
+        if "mask" in kwargs:
+            batch = batch[kwargs["mask"]]
         return batch.project([self.col_name])
 
     def __eq__(self, other):

diff --git a/src/models/storage/batch.py b/src/models/storage/batch.py
@@ -104,18 +104,15 @@ def __str__(self):
     def __eq__(self, other: 'Batch'):
         return self.frames.equals(other.frames)
 
-    def _get_frames_from_indices(self, required_frame_ids):
-        new_frames = self.frames.iloc[required_frame_ids, :]
-        new_batch = Batch(new_frames)
-        return new_batch
-
     def __getitem__(self, indices) -> 'Batch':
         """
-        Takes as input the slice for the list
-        Arguments:
-            item (list or Slice):
+        Returns a batch with the desired frames
 
-        :return:
+        Arguments:
+            indices (list, slice or mask): list must be
+            a list of indices; mask is boolean array-like
+            (i.e. list, NumPy array, DataFrame, etc.)
+            of appropriate size with True for desired frames.
         """
         if isinstance(indices, list):
             return self._get_frames_from_indices(indices)
@@ -127,6 +124,11 @@ def __getitem__(self, indices) -> 'Batch':
             step = indices.step if indices.step else 1
             return self._get_frames_from_indices(range(start, end, step))
 
+    def _get_frames_from_indices(self, required_frame_ids):
+        new_frames = self.frames.iloc[required_frame_ids, :]
+        new_batch = Batch(new_frames)
+        return new_batch
+
     def sort(self, by=None):
         """
         in_place sort

diff --git a/test/expression/test_logical.py b/test/expression/test_logical.py
@@ -13,11 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+import pandas as pd
+from mock import Mock
 
 from src.expression.abstract_expression import ExpressionType
 from src.expression.comparison_expression import ComparisonExpression
 from src.expression.logical_expression import LogicalExpression
 from src.expression.constant_value_expression import ConstantValueExpression
+from src.expression.tuple_value_expression import TupleValueExpression
+from src.models.storage.batch import Batch
 
 
 class LogicalExpressionsTest(unittest.TestCase):
@@ -71,7 +75,9 @@ def test_logical_or(self):
             comparison_expression_right
         )
         self.assertEqual(
-            [True], logical_expr.evaluate(None).frames[0].tolist())
+            [True],
+            logical_expr.evaluate(None).frames[0].tolist()
+        )
 
     def test_logical_not(self):
         const_exp1 = ConstantValueExpression(0)
@@ -88,4 +94,112 @@ def test_logical_not(self):
             comparison_expression_right
         )
         self.assertEqual(
-            [True], logical_expr.evaluate(None).frames[0].tolist())
+            [True],
+            logical_expr.evaluate(None).frames[0].tolist()
+        )
+
+    def test_short_circuiting_and_complete(self):
+        # tests whether right-hand side is bypassed completely with and
+        tup_val_exp_l = TupleValueExpression(col_name=0)
+        tup_val_exp_r = TupleValueExpression(col_name=1)
+
+        comp_exp_l = ComparisonExpression(
+            ExpressionType.COMPARE_EQUAL,
+            tup_val_exp_l,
+            tup_val_exp_r
+        )
+        comp_exp_r = Mock(spec=ComparisonExpression)
+
+        logical_exp = LogicalExpression(
+            ExpressionType.LOGICAL_AND,
+            comp_exp_l,
+            comp_exp_r
+        )
+
+        tuples = Batch(pd.DataFrame(
+            {0: [1, 2, 3], 1: [4, 5, 6]}))
+        self.assertEqual(
+            [False, False, False],
+            logical_exp.evaluate(tuples).frames[0].tolist()
+        )
+        comp_exp_r.evaluate.assert_not_called()
+
+    def test_short_circuiting_or_complete(self):
+        # tests whether right-hand side is bypassed completely with or
+        tup_val_exp_l = TupleValueExpression(col_name=0)
+        tup_val_exp_r = TupleValueExpression(col_name=1)
+
+        comp_exp_l = ComparisonExpression(
+            ExpressionType.COMPARE_EQUAL,
+            tup_val_exp_l,
+            tup_val_exp_r
+        )
+        comp_exp_r = Mock(spec=ComparisonExpression)
+
+        logical_exp = LogicalExpression(
+            ExpressionType.LOGICAL_OR,
+            comp_exp_l,
+            comp_exp_r
+        )
+
+        tuples = Batch(pd.DataFrame(
+            {0: [1, 2, 3], 1: [1, 2, 3]}))
+        self.assertEqual(
+            [True, True, True],
+            logical_exp.evaluate(tuples).frames[0].tolist()
+        )
+        comp_exp_r.evaluate.assert_not_called()
+
+    def test_short_circuiting_and_partial(self):
+        # tests whether right-hand side is partially executed with and
+        tup_val_exp_l = TupleValueExpression(col_name=0)
+        tup_val_exp_r = TupleValueExpression(col_name=1)
+
+        comp_exp_l = ComparisonExpression(
+            ExpressionType.COMPARE_EQUAL,
+            tup_val_exp_l,
+            tup_val_exp_r
+        )
+        comp_exp_r = Mock(spec=ComparisonExpression)
+        comp_exp_r.evaluate = Mock(return_value=Mock(frames=[[True], [False]]))
+
+        logical_exp = LogicalExpression(
+            ExpressionType.LOGICAL_AND,
+            comp_exp_l,
+            comp_exp_r
+        )
+
+        tuples = Batch(pd.DataFrame(
+            {0: [1, 2, 3, 4], 1: [1, 2, 5, 6]}))
+        self.assertEqual(
+            [True, False, False, False],
+            logical_exp.evaluate(tuples).frames[0].tolist()
+        )
+        comp_exp_r.evaluate.assert_called_once_with(tuples, mask=[0, 1])
+
+    def test_short_circuiting_or_partial(self):
+        # tests whether right-hand side is partially executed with or
+        tup_val_exp_l = TupleValueExpression(col_name=0)
+        tup_val_exp_r = TupleValueExpression(col_name=1)
+
+        comp_exp_l = ComparisonExpression(
+            ExpressionType.COMPARE_EQUAL,
+            tup_val_exp_l,
+            tup_val_exp_r
+        )
+        comp_exp_r = Mock(spec=ComparisonExpression)
+        comp_exp_r.evaluate = Mock(return_value=Mock(frames=[[True], [False]]))
+
+        logical_exp = LogicalExpression(
+            ExpressionType.LOGICAL_OR,
+            comp_exp_l,
+            comp_exp_r
+        )
+
+        tuples = Batch(pd.DataFrame(
+            {0: [1, 2, 3, 4], 1: [5, 6, 3, 4]}))
+        self.assertEqual(
+            [True, False, True, True],
+            logical_exp.evaluate(tuples).frames[0].tolist()
+        )
+        comp_exp_r.evaluate.assert_called_once_with(tuples, mask=[0, 1])
diff --git a/test/expression/test_tuple_value.py b/test/expression/test_tuple_value.py
@@ -0,0 +1,48 @@
+# coding=utf-8
+# Copyright 2018-2020 EVA
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import pandas as pd
+
+from src.expression.tuple_value_expression import TupleValueExpression
+from src.models.storage.batch import Batch
+
+
+class TupleValueExpressionsTest(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def test_masking(self):
+        tup_val_exp1 = TupleValueExpression(col_name=0)
+        tup_val_exp2 = TupleValueExpression(col_name=1)
+        tup_val_exp3 = TupleValueExpression(col_name=2)
+        tuples = Batch(pd.DataFrame({
+            0: [1, 2, 3, 4, 5, 6],
+            1: [7, 8, 9, 10, 11, 12],
+            2: [13, 14, 15, 16, 17, 18]
+        }))
+        mask1 = [0, 1, 2, 3, 4, 5]
+        self.assertEqual(
+            [1, 2, 3, 4, 5, 6],
+            tup_val_exp1.evaluate(tuples, mask=mask1).frames[0].tolist()
+        )
+        self.assertEqual(
+            [7, 9, 11],
+            tup_val_exp2.evaluate(tuples, mask=[0, 2, 4]).frames[1].tolist()
+        )
+        self.assertEqual(
+            [],
+            tup_val_exp3.evaluate(tuples, mask=[]).frames[2].tolist()
+        )