Skip to content

Commit

Permalink
feat: update notebooks (#892)
Browse files Browse the repository at this point in the history
πŸ‘‹ Thanks for submitting a Pull Request to EvaDB!

πŸ™Œ We want to make contributing to EvaDB as easy and transparent as
possible. Here are a few tips to get you started:

- πŸ” Search existing EvaDB
[PRs](https://github.com/georgia-tech-db/eva/pulls) to see if a similar
PR already exists.
- πŸ”— Link this PR to a EvaDB
[issue](https://github.com/georgia-tech-db/eva/issues) to help us
understand what bug fix or feature is being implemented.
- πŸ“ˆ Provide before and after profiling results to help us quantify the
improvement your PR provides (if applicable).

πŸ‘‰ Please see our βœ… [Contributing
Guide](https://evadb.readthedocs.io/en/stable/source/contribute/index.html)
for more details.

---------

Co-authored-by: Gaurav <gaurav21776@gmail.com>
  • Loading branch information
jarulraj and gaurav274 committed Jun 25, 2023
1 parent f435a2f commit 5a65ad0
Show file tree
Hide file tree
Showing 30 changed files with 1,551 additions and 918 deletions.
97 changes: 76 additions & 21 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,64 +7,114 @@ orbs:
workflows:
main:
jobs:
################################
#### PIP
################################
################################
- Pip-PyPI
- Pip-Local
- Docker-Local
- AWS
################################
#### UNIT TESTS
################################
################################
- Linux:
name: "Test | v3.8 | Linux"
v: "3.8"
mode: "COV"
mode: "TEST"
ray: "DISABLED"
- Linux:
name: "Test | v3.9 | Linux"
v: "3.9"
mode: "COV"
mode: "TEST"
ray: "DISABLED"
- Linux:
name: "Test | v3.10 | Linux"
v: "3.10"
mode: "COV"
mode: "TEST"
ray: "DISABLED"
- Linux:
name: "Test | Ray | v3.8 | Linux"
v: "3.8"
mode: "TEST"
ray: "ENABLED"
- Linux:
name: "Test | Ray | v3.9 | Linux"
v: "3.9"
mode: "TEST"
ray: "ENABLED"
- Linux:
name: "Test | Ray | v3.10 | Linux"
v: "3.10"
mode: "TEST"
ray: "ENABLED"
# Ray does not work on 3.11
# https://github.com/ray-project/ray/issues/33232
# - Linux:
# name: "Test | v3.11 | Linux"
# v: "3.11"
# mode: "TEST"
# ray: "DISABLED"
################################
### NOTEBOOKS
################################
################################
- Linux:
name: "Notebook | v3.8 | Linux"
v: "3.8"
mode: "NOTEBOOK"
ray: "DISABLED"
- Linux:
name: "Notebook | v3.9 | Linux"
v: "3.9"
mode: "NOTEBOOK"
ray: "DISABLED"
- Linux:
name: "Notebook | v3.10 | Linux"
v: "3.10"
mode: "NOTEBOOK"
ray: "DISABLED"
- Linux:
name: "Notebook | Ray | v3.8 | Linux"
v: "3.8"
mode: "NOTEBOOK"
ray: "ENABLED"
- Linux:
name: "Notebook | Ray | v3.9 | Linux"
v: "3.9"
mode: "NOTEBOOK"
ray: "ENABLED"
- Linux:
name: "Notebook | Ray | v3.10 | Linux"
v: "3.10"
mode: "NOTEBOOK"
ray: "ENABLED"
# - Linux:
# name: "Notebook | v3.11 | Linux"
# v: "3.11"
# mode: "NOTEBOOK"
################################
## LINTER
################################
################################
- Linux:
name: "Linter | Linux"
v: "3.10"
mode: "LINTER"
### RAY
- Linux:
name: "Test | Ray | v3.10 | Linux"
v: "3.10"
mode: "RAY"
- Windows:
name: "Windows | v3.10"
- MacOS:
name: "MacOS | v3.10"
# missing Torchvision
#- Linux:
# name: "Linux - v3.11"
# v: "3.11"
ray: "DISABLED"
################################
# OTHER OPERATING SYSTEMS
################################
################################
#- Windows:
# name: "Windows | v3.10"
# - MacOS:
# name: "MacOS | v3.10"
################################
# DOCKER AND CLOUD
################################
################################
- Docker-Local
- AWS

jobs:
Linux:
Expand All @@ -75,6 +125,9 @@ jobs:
mode:
type: string
default: "ALL"
ray:
type: string
default: "DISABLED"
resource_class: large
docker:
# https://circleci.com/docs/circleci-images#language-image-variants
Expand All @@ -88,23 +141,25 @@ jobs:
- v1-model_cache-{{ checksum "setup.py" }}

- run:
name: Install EVA package from GitHub repo with all dependencies
name: Install EvaDB package from GitHub repo with all dependencies
command: |
"python<< parameters.v >>" -m venv test_evadb
pip install --upgrade pip
source test_evadb/bin/activate
pip install ".[dev]"
pip uninstall -y ray
# Enable Ray
# Enable Ray (update evadb.yml file and install Ray package)
- when:
condition:
equal: [ RAY, << parameters.mode >> ]
equal: [ ENABLED, << parameters.ray >> ]
steps:
- run:
name: Enable Ray setting in the config.yml file
command: |
source test_evadb/bin/activate
python -c "import yaml;f = open('evadb/evadb.yml', 'r+');config_obj = yaml.load(f, Loader=yaml.FullLoader);config_obj['experimental']['ray'] = True;f.seek(0);f.write(yaml.dump(config_obj));f.truncate();"
pip install ".[dev]"
- run:
name: Test and upload coverage report to coveralls
Expand Down
8 changes: 8 additions & 0 deletions evadb/catalog/catalog_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ def reset(self):
"""
self._clear_catalog_contents()

def close(self):
"""
This method closes all the connections
"""
if self.sql_config is not None:
sqlalchemy_engine = self.sql_config.engine
sqlalchemy_engine.dispose()

def _bootstrap_catalog(self):
"""Bootstraps catalog.
This method runs all tasks required for using catalog. Currently,
Expand Down
3 changes: 2 additions & 1 deletion evadb/catalog/sql_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self, uri):

self.worker_uri = str(uri)
# set echo=True to log SQL
self.engine = create_engine(self.worker_uri)
self.engine = create_engine(self.worker_uri, connect_args={"timeout": 1000})

if self.engine.url.get_backend_name() == "sqlite":
# enforce foreign key constraint and wal logging for sqlite
Expand All @@ -80,3 +80,4 @@ def _enable_sqlite_pragma(dbapi_con, con_record):
event.listen(self.engine, "connect", _enable_sqlite_pragma)
# statements
self.session = scoped_session(sessionmaker(bind=self.engine))
self.session.close()
2 changes: 1 addition & 1 deletion evadb/evadb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ server:
socket_timeout: 60

experimental:
ray: False
ray: False

third_party:
OPENAI_KEY: ""
20 changes: 20 additions & 0 deletions evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
parse_table_clause,
)
from evadb.udfs.udf_bootstrap_queries import init_builtin_udfs
from evadb.utils.generic_utils import is_ray_enabled_and_installed
from evadb.utils.logging_manager import logger


Expand Down Expand Up @@ -508,6 +509,25 @@ def rename(self, table_name, new_table_name, **kwargs) -> EvaDBQuery:
stmt = parse_rename(table_name, new_table_name, **kwargs)
return EvaDBQuery(self._evadb, stmt)

def close(self):
"""
Closes the connection.
Args: None
Returns: None
Examples:
>>> cursor.close()
"""
self._evadb.catalog().close()

ray_enabled = self._evadb.config.get_value("experimental", "ray")
if is_ray_enabled_and_installed(ray_enabled):
import ray

ray.shutdown()


def connect(
evadb_dir: str = EvaDB_DATABASE_DIR, sql_backend: str = None
Expand Down
9 changes: 7 additions & 2 deletions evadb/optimizer/rules/rules_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
XformLateralJoinToLinearFlow,
)
from evadb.optimizer.rules.rules_base import Rule
from evadb.utils.generic_utils import is_ray_available
from evadb.utils.generic_utils import is_ray_enabled_and_installed


class RulesManager:
Expand Down Expand Up @@ -116,8 +116,13 @@ def __init__(self, config: ConfigurationManager):
LogicalVectorIndexScanToPhysical(),
]

# These rules are enabled only if
# (1) ray is installed and (2) ray is enabled
# Ray must be installed using pip
# It must also be enabled in "evadb.yml"
# NOTE: By default, it is not enabled
ray_enabled = config.get_value("experimental", "ray")
if ray_enabled and is_ray_available():
if is_ray_enabled_and_installed(ray_enabled):
self._implementation_rules.extend(
[
LogicalExchangeToPhysical(),
Expand Down
9 changes: 7 additions & 2 deletions evadb/udfs/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,13 @@ def completion_with_backoff(**kwargs):
],
}

response = completion_with_backoff(**params)
results.append(response.choices[0].message.content)
try:
response = completion_with_backoff(**params)
results.append(response.choices[0].message.content)
# https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance
# ignore API rate limit error etc.
except Exception as e:
results.append(f"{e}")

df = pd.DataFrame({"response": results})

Expand Down
2 changes: 2 additions & 0 deletions evadb/udfs/ocr_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def setup(self):
try_to_import_torch()
try_to_import_torchvision()
try_to_import_transformers()
# https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

from transformers import DonutProcessor, VisionEncoderDecoderModel

self.processor = DonutProcessor.from_pretrained(
Expand Down
15 changes: 10 additions & 5 deletions evadb/udfs/udf_bootstrap_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,17 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
# "RuntimeError: random_device could not be read"
# The suspicion is that importing torch prior to decord resolves this issue
try:
import torch # noqa
import torch # noqa: F401
except ImportError:
pass

# Enable environment variables
# Relevant for ocr and other transformer-based models
import os

os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# list of UDF queries to load
queries = [
mnistcnn_udf_query,
Expand All @@ -236,10 +243,10 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
# ocr_udf_query,
# Mvit_udf_query,
Sift_udf_query,
Yolo_udf_query,
yolo8n_query,
]

# if mode is 'debug', add debug UDFs and a smaller Yolo model
# if mode is 'debug', add debug UDFs
if mode == "debug":
queries.extend(
[
Expand All @@ -248,8 +255,6 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
DummyFeatureExtractor_udf_query,
]
)
queries.remove(Yolo_udf_query)
queries.append(yolo8n_query)

# execute each query in the list of UDF queries
# ignore exceptions during the bootstrapping phase due to missing packages
Expand Down
7 changes: 6 additions & 1 deletion evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,16 @@ def try_to_import_ray():
def is_ray_available() -> bool:
try:
try_to_import_ray()
return False
return True
except ValueError: # noqa: E722
return False


def is_ray_enabled_and_installed(ray_enabled: bool) -> bool:
ray_installed = is_ray_available()
return ray_enabled and ray_installed


##############################
## VISION
##############################
Expand Down
1 change: 1 addition & 0 deletions script/formatting/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def format_file(file_path, add_header, strip_header, format_code):

# check the notebooks
def check_notebook_format(notebook_file):
print(notebook_file)
notebook_file_name = os.path.basename(notebook_file)

# Ignore this notebook
Expand Down
Loading

0 comments on commit 5a65ad0

Please sign in to comment.