Skip to content

Commit

Permalink
style check passed, private test passed
Browse files Browse the repository at this point in the history
  • Loading branch information
Yifei Wang authored and Yifei Wang committed Nov 25, 2023
1 parent ddf1878 commit 863ba54
Show file tree
Hide file tree
Showing 12 changed files with 341 additions and 125 deletions.
5 changes: 0 additions & 5 deletions evadb/expression/function_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,7 @@ def evaluate(self, batch: Batch, **kwargs) -> Batch:
# note the function might be using cache
with self._stats.timer:
# apply the function and project the required columns
print("function output names",self.function_obj.outputs)
# print("function input names",self.function_obj.inputs)
outcomes = self._apply_function_expression(func, batch, **kwargs)
print("RUN PASSEd THIS POINT?")
# process outcomes only if output is not empty
if outcomes.frames.empty is False:
outcomes = outcomes.project(self.projection_columns)
Expand Down Expand Up @@ -185,7 +182,6 @@ def _apply_function_expression(self, func: Callable, batch: Batch, **kwargs):
func_args = Batch.merge_column_wise(
[child.evaluate(batch, **kwargs) for child in self.children]
)
# print("these are function args!!!",func.forward.tags)
if not self._cache:
return func_args.apply_function_expression(func)

Expand Down Expand Up @@ -231,7 +227,6 @@ def _apply_function_expression(self, func: Callable, batch: Batch, **kwargs):
results[cache_miss] = cache_miss_results.to_numpy()

# 5. return the correct batch
print("results after function", results)
return Batch(pd.DataFrame(results, columns=output_cols))

def __str__(self) -> str:
Expand Down
1 change: 0 additions & 1 deletion evadb/functions/abstract/abstract_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def __init__(self, *args, **kwargs):
self.setup(*args, **kwargs)

def __call__(self, *args, **kwargs):
print("In abstract_function",self.name," name on the left")
return self.forward(args[0])

def __str__(self):
Expand Down
2 changes: 1 addition & 1 deletion evadb/functions/decorators/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def forward(input_signatures: List[IOArgument], output_signatures: List[IOArgume
input_signature (List[IOArgument]): List of input arguments for the function
output_signature ( List[IOArgument])): List of output arguments for the function
"""

def inner_fn(arg_fn):
def wrapper(*args):
# calling the forward function defined by the user inside the function implementation
Expand All @@ -61,7 +62,6 @@ def wrapper(*args):
tags = {}
tags["input"] = input_signatures
tags["output"] = output_signatures
print("tags created here",tags)
wrapper.tags = tags
return wrapper

Expand Down
140 changes: 88 additions & 52 deletions evadb/models/storage/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable, Iterable, List, TypeVar, Union
from datetime import datetime
from decimal import Decimal
from typing import Callable, Iterable, List, TypeVar, Union

import numpy as np
import pandas as pd
from decimal import Decimal
import numbers

from evadb.catalog.catalog_type import NdArrayType
from evadb.expression.abstract_expression import ExpressionType
from evadb.parser.alias import Alias
from evadb.utils.generic_utils import PickleSerializer
from evadb.utils.logging_manager import logger
from evadb.catalog.catalog_type import ColumnType, NdArrayType

Batch = TypeVar("Batch")


Expand Down Expand Up @@ -172,12 +173,11 @@ def apply_function_expression(self, expr: Callable) -> Batch:
"""
Execute function expression on frames.
"""
input_tags = expr.forward.tags['input'][0]
output_tags = expr.forward.tags['output'][0]
input_tags = expr.forward.tags["input"][0]
output_tags = expr.forward.tags["output"][0]

self.drop_column_alias(metadata=(input_tags, output_tags))

self.drop_column_alias(metadata=(input_tags,output_tags))

return Batch(expr(self._frames))

def iterrows(self):
Expand Down Expand Up @@ -440,11 +440,9 @@ def modify_column_alias(self, alias: Union[Alias, str]) -> None:

self._frames.columns = new_col_names

def drop_column_alias(self, metadata =None) -> None:
def drop_column_alias(self, metadata=None) -> None:
# table1.a, table1.b, table1.c -> a, b, c
print(self._frames.info())
input_meta,output_meta=metadata

input_meta, output_meta = metadata

new_col_names = []
for col_name in self.columns:
Expand All @@ -459,6 +457,7 @@ def drop_column_alias(self, metadata =None) -> None:
defined_column_types = [entry for entry in input_meta.column_types]
defined_column_shapes = [entry for entry in input_meta.column_shapes]
column_rename_map = {}

def is_shape_matching(data, expected_shape):
"""
Check if the shape of the data matches the expected shape..
Expand All @@ -473,6 +472,7 @@ def is_shape_matching(data, expected_shape):
return False

return True

def get_basic_element(data):
# Check if the data is iterable (but not a string, as strings are also iterable)
if isinstance(data, Iterable) and not isinstance(data, (str, bytes)):
Expand All @@ -483,8 +483,9 @@ def get_basic_element(data):
return get_basic_element(data[0])
else:
# If the data is not iterable, return it as is
return data
def deduce_and_map_type(element,check_type):
return data

def deduce_and_map_type(element, check_type):
python_type_to_ndarray_type = {
int: NdArrayType.INT64, # Python's int is commonly mapped to NumPy's np.int64
float: NdArrayType.FLOAT64, # Python's float maps to np.float64
Expand All @@ -507,35 +508,85 @@ def deduce_and_map_type(element,check_type):
np.datetime64: NdArrayType.DATETIME,
}
flexible_type_mapping = {
NdArrayType.INT8: [NdArrayType.INT16, NdArrayType.INT32, NdArrayType.INT64, NdArrayType.FLOAT32, NdArrayType.FLOAT64],
NdArrayType.UINT8: [NdArrayType.INT16, NdArrayType.INT32, NdArrayType.INT64, NdArrayType.FLOAT32, NdArrayType.FLOAT64],
NdArrayType.INT16: [NdArrayType.INT8, NdArrayType.INT32, NdArrayType.INT64, NdArrayType.FLOAT32, NdArrayType.FLOAT64],
NdArrayType.INT32: [NdArrayType.INT8, NdArrayType.INT16, NdArrayType.INT64, NdArrayType.FLOAT32, NdArrayType.FLOAT64],
NdArrayType.INT64: [NdArrayType.INT8, NdArrayType.INT16, NdArrayType.INT32, NdArrayType.FLOAT32, NdArrayType.FLOAT64],
NdArrayType.FLOAT32: [NdArrayType.FLOAT64, NdArrayType.INT8, NdArrayType.INT16, NdArrayType.INT32, NdArrayType.INT64],
NdArrayType.FLOAT64: [NdArrayType.FLOAT32, NdArrayType.INT8, NdArrayType.INT16, NdArrayType.INT32, NdArrayType.INT64]
}
NdArrayType.INT8: [
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.INT64,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
],
NdArrayType.UINT8: [
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.INT64,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
],
NdArrayType.INT16: [
NdArrayType.INT8,
NdArrayType.INT32,
NdArrayType.INT64,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
],
NdArrayType.INT32: [
NdArrayType.INT8,
NdArrayType.INT16,
NdArrayType.INT64,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
],
NdArrayType.INT64: [
NdArrayType.INT8,
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
],
NdArrayType.FLOAT32: [
NdArrayType.FLOAT64,
NdArrayType.INT8,
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.INT64,
],
NdArrayType.FLOAT64: [
NdArrayType.FLOAT32,
NdArrayType.INT8,
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.INT64,
],
}
element_type = type(element)
if isinstance(element,int):
return check_type in [NdArrayType.INT16, NdArrayType.INT32, NdArrayType.INT64, NdArrayType.FLOAT32, NdArrayType.FLOAT64]
if isinstance(element,float):
if isinstance(element, int):
return check_type in [
NdArrayType.INT16,
NdArrayType.INT32,
NdArrayType.INT64,
NdArrayType.FLOAT32,
NdArrayType.FLOAT64,
]
if isinstance(element, float):
return check_type in [NdArrayType.FLOAT32, NdArrayType.FLOAT64]

# Special handling for numpy types
if isinstance(element, np.generic):
element_type = np.dtype(type(element)).type
print("BEFORE MAP TYPE: ",element_type)
deduced_type = python_type_to_ndarray_type.get(element_type)
if deduced_type==check_type:
deduced_type = python_type_to_ndarray_type.get(element_type)
if deduced_type == check_type:
return True
if deduced_type in flexible_type_mapping and check_type in flexible_type_mapping[deduced_type]:
if (
deduced_type in flexible_type_mapping
and check_type in flexible_type_mapping[deduced_type]
):
return True

return False

for col_name in self.columns:
match = False
for i,def_name in enumerate(list(defined_column_names)):
for i, def_name in enumerate(list(defined_column_names)):
# If the column name matches, keep it as is
if def_name == col_name:
column_rename_map[col_name] = col_name
Expand All @@ -545,32 +596,17 @@ def deduce_and_map_type(element,check_type):
match = True
# if the column name doesnt match
if not match:
for i,def_name in enumerate(list(defined_column_names)):
# check for data type match
print(self._frames.dtypes)
print(self._frames.head(3))

shape_match = False
type_match = False
for i, def_name in enumerate(list(defined_column_names)):
# check if shape match
sample_data = self._frames[col_name].iloc[0]
if is_shape_matching(sample_data,defined_column_shapes[i]):
shape_match=True
print(def_name,"'s shape match with ",col_name)
basic_element=get_basic_element(sample_data)
if deduce_and_map_type(basic_element,defined_column_types[i]):
type_match = True
column_rename_map[col_name]=def_name
if is_shape_matching(sample_data, defined_column_shapes[i]):
basic_element = get_basic_element(sample_data)
if deduce_and_map_type(basic_element, defined_column_types[i]):
column_rename_map[col_name] = def_name
defined_column_names.remove(def_name)
defined_column_types.pop(i)
defined_column_shapes.pop(i)
break
print("all matches")
else:
print("Name and Shape match, type didnt match ")
else:
print("Name and Shape match failed on column: ",col_name)

break

# Rename columns in the dataframe
self._frames.rename(columns=column_rename_map, inplace=True)
Expand Down
1 change: 0 additions & 1 deletion evadb/server/command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def execute_query(

with query_compile_time:
stmt = Parser().parse(query)[0]
print("START EXECUTE HERE",stmt)
res_batch = execute_statement(
evadb, stmt, do_not_raise_exceptions, do_not_print_exceptions, **kwargs
)
Expand Down
19 changes: 16 additions & 3 deletions test/integration_tests/long/functions/ndarray/blob_detector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -27,9 +41,8 @@ def name(self):
],
output_signatures=[
PandasDataframe(
columns=["num_labels","labeled_im"],
column_types=[NdArrayType.FLOAT32,
NdArrayType.FLOAT32],
columns=["num_labels", "labeled_im"],
column_types=[NdArrayType.FLOAT32, NdArrayType.FLOAT32],
column_shapes=[(None,), (None, None)],
)
],
Expand Down
15 changes: 14 additions & 1 deletion test/integration_tests/long/functions/ndarray/grayscale.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -40,7 +54,6 @@ def forward(self, frame: pd.DataFrame) -> pd.DataFrame:
Returns:
ret (pd.DataFrame): The modified frame.
"""
print("FORWARD IN GRAYSCALE when declare")

def Grayscale(row: pd.Series) -> np.ndarray:
row = row.to_list()
Expand Down
16 changes: 15 additions & 1 deletion test/integration_tests/long/functions/ndarray/high_pass.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -47,7 +61,7 @@ def highpass(row: pd.Series) -> np.ndarray:

import cv2

low_pass = cv2.GaussianBlur(frame, (3,3), 0)
low_pass = cv2.GaussianBlur(frame, (3, 3), 0)
high_pass = cv2.subtract(frame, low_pass)

return high_pass
Expand Down
Loading

0 comments on commit 863ba54

Please sign in to comment.