deprecated code removal: Outcomes and temp outcomes no longer used

georgia-tech-db · Feb 22, 2021 · 3ec7bc7 · 3ec7bc7
1 parent ca4f18f
commit 3ec7bc7
Show file tree

Hide file tree

Showing 8 changed files with 18 additions and 207 deletions.
diff --git a/src/models/storage/batch.py b/src/models/storage/batch.py
@@ -18,7 +18,6 @@
 
 from typing import List
 from pandas import DataFrame
-from src.models.inference.outcome import Outcome
 from src.utils.logging_manager import LoggingManager, LoggingLevel
 
 
@@ -42,23 +41,15 @@ class Batch:
 
     Arguments:
         frames (DataFrame): pandas Dataframe holding frames data
-        outcomes (Dict[str, List[BasePrediction]]): outcomes of running a udf
-        with name 'x' as key
         identifier_column (str): A column used to uniquely a row
 
 
     """
 
     def __init__(self,
                  frames=pd.DataFrame(),
-                 outcomes=None,
-                 temp_outcomes=None,
                  identifier_column='id'):
         super().__init__()
-        if outcomes is None:
-            outcomes = dict()
-        if temp_outcomes is None:
-            temp_outcomes = dict()
         # store the batch with columns sorted
         if isinstance(frames, DataFrame):
             self._frames = frames[sorted(frames.columns)]
@@ -68,8 +59,6 @@ def __init__(self,
             raise ValueError('Batch constructor not properly called. \
                 Expected pandas.DataFrame')
         self._batch_size = len(frames)
-        self._outcomes = outcomes
-        self._temp_outcomes = temp_outcomes
         self._identifier_column = identifier_column
 
     @property
@@ -113,65 +102,11 @@ def __str__(self):
                % (self._frames, self._batch_size, self.identifier_column)
 
     def __eq__(self, other: 'Batch'):
-        return self.frames.equals(other.frames) and \
-            self._outcomes == other._outcomes and \
-            self._temp_outcomes == other._temp_outcomes
-
-    def set_outcomes(self, name, predictions: List[Outcome],
-                     is_temp: bool = False):
-        """
-        Used for storing outcomes of the UDF predictions
-
-        Arguments:
-            name (str): name of the UDF to which the predictions belong to
-
-            predictions (pandas.DataFrame): Predictions/Outcome after executing
-            the UDF on prediction
-
-            is_temp (bool, default: False): Check if the outcomes are temporary
-
-        """
-        if is_temp:
-            self._temp_outcomes[name] = predictions
-        else:
-            self._outcomes[name] = predictions
-
-    def get_outcomes_for(self, name: str) -> List[Outcome]:
-        """
-        Returns names corresponding to a name
-        Arguments:
-            name (str): name of the udf on which predicate is being executed
-
-        Returns:
-            List[BasePrediction]
-        """
-        if name in self._outcomes:
-            return self._outcomes.get(name, [])
-        else:
-            return self._temp_outcomes.get(name, [])
-
-    def has_outcome(self, name: str):
-        """
-        Method used for checking if the outcome with given name is present.
-        Either in temporary outcomes or actual outcomes.
-
-        Arguments:
-            name (str): name of the outcome to check
-        Returns:
-            bool: True if present else false
-        """
-
-        return name in self._outcomes or name in self._temp_outcomes
+        return self.frames.equals(other.frames)
 
     def _get_frames_from_indices(self, required_frame_ids):
         new_frames = self.frames.iloc[required_frame_ids, :]
         new_batch = Batch(new_frames)
-        for key in self._outcomes:
-            new_batch._outcomes[key] = [self._outcomes[key][i]
-                                        for i in required_frame_ids]
-        for key in self._temp_outcomes:
-            new_batch._temp_outcomes[key] = [self._temp_outcomes[key][i]
-                                             for i in required_frame_ids]
         return new_batch
 
     def __getitem__(self, indices) -> 'Batch':
@@ -236,7 +171,6 @@ def sort_orderby(self, by, sort_type):
     def project(self, cols: []) -> 'Batch':
         """
         Takes as input the column list, returns the projection.
-        Keep the outcomes and temp_outcomes unchanged.
         We do a copy for now.
         """
         verfied_cols = [c for c in cols if c in self._frames]
@@ -245,16 +179,14 @@ def project(self, cols: []) -> 'Batch':
             LoggingManager().log("Unexpected columns %s\n\
                                  Frames: %s" % (unknown_cols, self._frames),
                                  LoggingLevel.WARNING)
-        return Batch(self._frames[verfied_cols], self._outcomes.copy(),
-                     self._temp_outcomes.copy(), self._identifier_column)
+        return Batch(self._frames[verfied_cols], self._identifier_column)
 
     @classmethod
     def merge_column_wise(cls,
                           batches: ['Batch'],
                           auto_renaming=False) -> 'Batch':
         """
         Merge list of batch frames column_wise and return a new batch frame
-        No outcome merge. Add later when there is a actual usage.
         Arguments:
             batches: List[Batch]: lsit of batch objects to be merged
             auto_renaming: if true rename column names if required
@@ -296,18 +228,8 @@ def _unique_keys(dict1, dict2):
             return self
 
         new_frames = self.frames.append(other.frames, ignore_index=True)
-        new_outcomes = {}
-        temp_new_outcomes = {}
-
-        for key in _unique_keys(self._outcomes, other._outcomes):
-            new_outcomes[key] = self._outcomes.get(key, []) + \
-                other._outcomes.get(key, [])
-        for key in _unique_keys(self._temp_outcomes, other._temp_outcomes):
-            temp_new_outcomes[key] = self._temp_outcomes.get(key, []) + \
-                other._temp_outcomes.get(key, [])
-
-        return Batch(new_frames, outcomes=new_outcomes,
-                     temp_outcomes=temp_new_outcomes)
+
+        return Batch(new_frames)
 
     @classmethod
     def concat(cls, batch_list: List['Batch'], copy=True) -> 'Batch':

diff --git a/src/parser/parser_visitor/_create_statements.py b/src/parser/parser_visitor/_create_statements.py
@@ -47,7 +47,6 @@ def visitColumnCreateTable(
                     create_definitions = self.visit(ctx.createDefinitions())
 
             except BaseException:
-                print("Exception")
                 # stop parsing something bad happened
                 return None
 

diff --git a/src/parser/parser_visitor/_insert_statements.py b/src/parser/parser_visitor/_insert_statements.py
@@ -50,7 +50,6 @@ def visitInsertStatement(self, ctx: evaql_parser.InsertStatementContext):
                         # Support only (value1, value2, .... value n)
                         value_list = insrt_value[0]
                 except BaseException:
-                    print("Exception")
                     # stop parsing something bad happened
                     return None
 
@@ -80,7 +79,6 @@ def visitInsertStatementValue(
                         insert_stmt_value.append(expr)
 
                 except BaseException:
-                    print("Exception")
                     # stop parsing something bad happened
                     return None
         return insert_stmt_value
diff --git a/src/udfs/ssd_object_detector.py b/src/udfs/ssd_object_detector.py
@@ -280,10 +280,8 @@ def decode_single(self,
 
         for i, score in enumerate(scores_in.split(1, 1)):
             # skip background
-            # print(score[score>0.90])
             if i == 0:
                 continue
-            # print(i)
 
             score = score.squeeze(1)
             mask = score > 0.05

diff --git a/test/expression/test_expression_tree.py b/test/expression/test_expression_tree.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
-import pandas as pd
 
 from src.expression.abstract_expression import ExpressionType
 from src.expression.comparison_expression import ComparisonExpression
@@ -22,29 +21,9 @@
 from src.expression.aggregation_expression import AggregationExpression
 from src.expression.function_expression import FunctionExpression
 from src.expression.logical_expression import LogicalExpression
-from src.models.inference.outcome import Outcome
-from src.models.storage.batch import Batch
-
-from test.util import create_dataframe
 
 
 class ExpressionEvaluationTest(unittest.TestCase):
-    @unittest.skip("This test case needs to be redesigned")
-    def test_func_expr_with_cmpr_and_const_expr_should_work(self):
-        frames = create_dataframe(1)
-        outcome_1 = Outcome(pd.DataFrame(
-            [{'labels': ["car", "bus"], 'scores': [0.5, 0.6]}]), 'labels')
-
-        func = FunctionExpression(lambda x, y: [outcome_1])
-        value_expr = ConstantValueExpression("car")
-        expression_tree = ComparisonExpression(ExpressionType.COMPARE_EQUAL,
-                                               func,
-                                               value_expr)
-
-        batch = Batch(frames=frames)
-        output = expression_tree.evaluate(batch)
-        self.assertEqual([True, False], output)
-
     def test_if_expr_tree_is_equal(self):
         const_exp1 = ConstantValueExpression(0)
         const_exp2 = ConstantValueExpression(0)

diff --git a/test/expression/test_function_expression.py b/test/expression/test_function_expression.py
@@ -31,17 +31,6 @@ def test_should_work_for_function_without_children_eval_mode(self):
         actual = expression.evaluate(values)
         self.assertEqual(values, actual)
 
-    @unittest.skip("outcome in batch is not used.")
-    def test_should_update_the_batch_with_outcomes_in_exec_mode(self):
-        values = [1, 2, 3]
-        expression = FunctionExpression(lambda x: values,
-                                        mode=ExecutionMode.EXEC, name="test")
-        expected_batch = Batch(frames=pd.DataFrame(),
-                               outcomes={"test": [1, 2, 3]})
-        input_batch = Batch(frames=pd.DataFrame())
-        expression.evaluate(input_batch)
-        self.assertEqual(expected_batch, input_batch)
-
     def test_should_throw_assert_error_when_name_not_provided_exec_mode(self):
         self.assertRaises(AssertionError,
                           lambda _=None:
@@ -66,18 +55,6 @@ def test_should_filter_function_output(self):
         expected = Batch(pd.DataFrame(values['id']) + 1)
         self.assertEqual(expected, actual)
 
-    @unittest.skip("temp outcome in batch is not used.")
-    def test_should_update_temp_outcomes_when_is_temp_set_exec_mode(self):
-        values = [1, 2, 3]
-        expression = FunctionExpression(lambda x: values,
-                                        mode=ExecutionMode.EXEC,
-                                        name="test", is_temp=True)
-        expected_batch = Batch(frames=pd.DataFrame(),
-                               temp_outcomes={"test": [1, 2, 3]})
-        input_batch = Batch(frames=pd.DataFrame())
-        expression.evaluate(input_batch)
-        self.assertEqual(expected_batch, input_batch)
-
     @patch('src.expression.function_expression.Context')
     def test_function_move_the_device_to_gpu_if_compatible(self, context):
         context_instance = context.return_value

diff --git a/test/integration_tests/test_pytorch.py b/test/integration_tests/test_pytorch.py
@@ -39,7 +39,6 @@ def test_should_run_pytorch_and_fastrcnn(self):
         select_query = """SELECT FastRCNNObjectDetector(data) FROM MyVideo
                         WHERE id < 5;"""
         actual_batch = perform_query(select_query)
-        print(actual_batch)
         self.assertEqual(actual_batch.batch_size, 5)
 
     def test_should_run_pytorch_and_ssd(self):