Skip to content

Commit

Permalink
deprecated code removal: Outcomes and temp outcomes no longer used
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav274 committed Feb 22, 2021
1 parent ca4f18f commit 3ec7bc7
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 207 deletions.
86 changes: 4 additions & 82 deletions src/models/storage/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from typing import List
from pandas import DataFrame
from src.models.inference.outcome import Outcome
from src.utils.logging_manager import LoggingManager, LoggingLevel


Expand All @@ -42,23 +41,15 @@ class Batch:
Arguments:
frames (DataFrame): pandas Dataframe holding frames data
outcomes (Dict[str, List[BasePrediction]]): outcomes of running a udf
with name 'x' as key
identifier_column (str): A column used to uniquely a row
"""

def __init__(self,
frames=pd.DataFrame(),
outcomes=None,
temp_outcomes=None,
identifier_column='id'):
super().__init__()
if outcomes is None:
outcomes = dict()
if temp_outcomes is None:
temp_outcomes = dict()
# store the batch with columns sorted
if isinstance(frames, DataFrame):
self._frames = frames[sorted(frames.columns)]
Expand All @@ -68,8 +59,6 @@ def __init__(self,
raise ValueError('Batch constructor not properly called. \
Expected pandas.DataFrame')
self._batch_size = len(frames)
self._outcomes = outcomes
self._temp_outcomes = temp_outcomes
self._identifier_column = identifier_column

@property
Expand Down Expand Up @@ -113,65 +102,11 @@ def __str__(self):
% (self._frames, self._batch_size, self.identifier_column)

def __eq__(self, other: 'Batch'):
return self.frames.equals(other.frames) and \
self._outcomes == other._outcomes and \
self._temp_outcomes == other._temp_outcomes

def set_outcomes(self, name, predictions: List[Outcome],
is_temp: bool = False):
"""
Used for storing outcomes of the UDF predictions
Arguments:
name (str): name of the UDF to which the predictions belong to
predictions (pandas.DataFrame): Predictions/Outcome after executing
the UDF on prediction
is_temp (bool, default: False): Check if the outcomes are temporary
"""
if is_temp:
self._temp_outcomes[name] = predictions
else:
self._outcomes[name] = predictions

def get_outcomes_for(self, name: str) -> List[Outcome]:
"""
Returns names corresponding to a name
Arguments:
name (str): name of the udf on which predicate is being executed
Returns:
List[BasePrediction]
"""
if name in self._outcomes:
return self._outcomes.get(name, [])
else:
return self._temp_outcomes.get(name, [])

def has_outcome(self, name: str):
"""
Method used for checking if the outcome with given name is present.
Either in temporary outcomes or actual outcomes.
Arguments:
name (str): name of the outcome to check
Returns:
bool: True if present else false
"""

return name in self._outcomes or name in self._temp_outcomes
return self.frames.equals(other.frames)

def _get_frames_from_indices(self, required_frame_ids):
new_frames = self.frames.iloc[required_frame_ids, :]
new_batch = Batch(new_frames)
for key in self._outcomes:
new_batch._outcomes[key] = [self._outcomes[key][i]
for i in required_frame_ids]
for key in self._temp_outcomes:
new_batch._temp_outcomes[key] = [self._temp_outcomes[key][i]
for i in required_frame_ids]
return new_batch

def __getitem__(self, indices) -> 'Batch':
Expand Down Expand Up @@ -236,7 +171,6 @@ def sort_orderby(self, by, sort_type):
def project(self, cols: []) -> 'Batch':
"""
Takes as input the column list, returns the projection.
Keep the outcomes and temp_outcomes unchanged.
We do a copy for now.
"""
verfied_cols = [c for c in cols if c in self._frames]
Expand All @@ -245,16 +179,14 @@ def project(self, cols: []) -> 'Batch':
LoggingManager().log("Unexpected columns %s\n\
Frames: %s" % (unknown_cols, self._frames),
LoggingLevel.WARNING)
return Batch(self._frames[verfied_cols], self._outcomes.copy(),
self._temp_outcomes.copy(), self._identifier_column)
return Batch(self._frames[verfied_cols], self._identifier_column)

@classmethod
def merge_column_wise(cls,
batches: ['Batch'],
auto_renaming=False) -> 'Batch':
"""
Merge list of batch frames column_wise and return a new batch frame
No outcome merge. Add later when there is a actual usage.
Arguments:
batches: List[Batch]: lsit of batch objects to be merged
auto_renaming: if true rename column names if required
Expand Down Expand Up @@ -296,18 +228,8 @@ def _unique_keys(dict1, dict2):
return self

new_frames = self.frames.append(other.frames, ignore_index=True)
new_outcomes = {}
temp_new_outcomes = {}

for key in _unique_keys(self._outcomes, other._outcomes):
new_outcomes[key] = self._outcomes.get(key, []) + \
other._outcomes.get(key, [])
for key in _unique_keys(self._temp_outcomes, other._temp_outcomes):
temp_new_outcomes[key] = self._temp_outcomes.get(key, []) + \
other._temp_outcomes.get(key, [])

return Batch(new_frames, outcomes=new_outcomes,
temp_outcomes=temp_new_outcomes)

return Batch(new_frames)

@classmethod
def concat(cls, batch_list: List['Batch'], copy=True) -> 'Batch':
Expand Down
1 change: 0 additions & 1 deletion src/parser/parser_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def visitColumnCreateTable(
create_definitions = self.visit(ctx.createDefinitions())

except BaseException:
print("Exception")
# stop parsing something bad happened
return None

Expand Down
2 changes: 0 additions & 2 deletions src/parser/parser_visitor/_insert_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def visitInsertStatement(self, ctx: evaql_parser.InsertStatementContext):
# Support only (value1, value2, .... value n)
value_list = insrt_value[0]
except BaseException:
print("Exception")
# stop parsing something bad happened
return None

Expand Down Expand Up @@ -80,7 +79,6 @@ def visitInsertStatementValue(
insert_stmt_value.append(expr)

except BaseException:
print("Exception")
# stop parsing something bad happened
return None
return insert_stmt_value
2 changes: 0 additions & 2 deletions src/udfs/ssd_object_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,8 @@ def decode_single(self,

for i, score in enumerate(scores_in.split(1, 1)):
# skip background
# print(score[score>0.90])
if i == 0:
continue
# print(i)

score = score.squeeze(1)
mask = score > 0.05
Expand Down
21 changes: 0 additions & 21 deletions test/expression/test_expression_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import pandas as pd

from src.expression.abstract_expression import ExpressionType
from src.expression.comparison_expression import ComparisonExpression
Expand All @@ -22,29 +21,9 @@
from src.expression.aggregation_expression import AggregationExpression
from src.expression.function_expression import FunctionExpression
from src.expression.logical_expression import LogicalExpression
from src.models.inference.outcome import Outcome
from src.models.storage.batch import Batch

from test.util import create_dataframe


class ExpressionEvaluationTest(unittest.TestCase):
@unittest.skip("This test case needs to be redesigned")
def test_func_expr_with_cmpr_and_const_expr_should_work(self):
frames = create_dataframe(1)
outcome_1 = Outcome(pd.DataFrame(
[{'labels': ["car", "bus"], 'scores': [0.5, 0.6]}]), 'labels')

func = FunctionExpression(lambda x, y: [outcome_1])
value_expr = ConstantValueExpression("car")
expression_tree = ComparisonExpression(ExpressionType.COMPARE_EQUAL,
func,
value_expr)

batch = Batch(frames=frames)
output = expression_tree.evaluate(batch)
self.assertEqual([True, False], output)

def test_if_expr_tree_is_equal(self):
const_exp1 = ConstantValueExpression(0)
const_exp2 = ConstantValueExpression(0)
Expand Down
23 changes: 0 additions & 23 deletions test/expression/test_function_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,6 @@ def test_should_work_for_function_without_children_eval_mode(self):
actual = expression.evaluate(values)
self.assertEqual(values, actual)

@unittest.skip("outcome in batch is not used.")
def test_should_update_the_batch_with_outcomes_in_exec_mode(self):
values = [1, 2, 3]
expression = FunctionExpression(lambda x: values,
mode=ExecutionMode.EXEC, name="test")
expected_batch = Batch(frames=pd.DataFrame(),
outcomes={"test": [1, 2, 3]})
input_batch = Batch(frames=pd.DataFrame())
expression.evaluate(input_batch)
self.assertEqual(expected_batch, input_batch)

def test_should_throw_assert_error_when_name_not_provided_exec_mode(self):
self.assertRaises(AssertionError,
lambda _=None:
Expand All @@ -66,18 +55,6 @@ def test_should_filter_function_output(self):
expected = Batch(pd.DataFrame(values['id']) + 1)
self.assertEqual(expected, actual)

@unittest.skip("temp outcome in batch is not used.")
def test_should_update_temp_outcomes_when_is_temp_set_exec_mode(self):
values = [1, 2, 3]
expression = FunctionExpression(lambda x: values,
mode=ExecutionMode.EXEC,
name="test", is_temp=True)
expected_batch = Batch(frames=pd.DataFrame(),
temp_outcomes={"test": [1, 2, 3]})
input_batch = Batch(frames=pd.DataFrame())
expression.evaluate(input_batch)
self.assertEqual(expected_batch, input_batch)

@patch('src.expression.function_expression.Context')
def test_function_move_the_device_to_gpu_if_compatible(self, context):
context_instance = context.return_value
Expand Down
1 change: 0 additions & 1 deletion test/integration_tests/test_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def test_should_run_pytorch_and_fastrcnn(self):
select_query = """SELECT FastRCNNObjectDetector(data) FROM MyVideo
WHERE id < 5;"""
actual_batch = perform_query(select_query)
print(actual_batch)
self.assertEqual(actual_batch.batch_size, 5)

def test_should_run_pytorch_and_ssd(self):
Expand Down
Loading

0 comments on commit 3ec7bc7

Please sign in to comment.