Merge remote-tracking branch 'origin/staging' into return_plot

georgia-tech-db · Nov 16, 2023 · eff6f60 · eff6f60
2 parents eb88e9b + 69b39b8
commit eff6f60
Show file tree

Hide file tree

Showing 32 changed files with 1,564 additions and 32 deletions.
diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst
@@ -92,6 +92,6 @@ After registering ``MnistImageClassifier`` function, you can call the function i
 AI-Centric Query Optimization
 -----------------------------
 
-EvaDB optimizes the AI queries to save money spent on running models and reduce query execution time. It contains a novel `Cascades-style query optimizer <https://www.cse.iitb.ac.in/infolab/Data/Courses/CS632/Papers/Cascades-graefe.pdf>`__  tailored for AI queries.
+EvaDB optimizes the AI queries to save money spent on running models and reduce query execution time. It contains a novel `Cascades-style query optimizer <https://faculty.cc.gatech.edu/~jarulraj/courses/8803-s21/slides/22-cascades.pdf>`__  tailored for AI queries.
 
-Query optimization has powered SQL database systems for several decades. It is the bridge that connects the declarative query language to efficient query execution on hardware. EvaDB accelerates AI queries using a collection of optimizations detailed in the :ref:`optimizations<optimizations>` page.
+Query optimization has powered SQL database systems for several decades. It is the bridge that connects the declarative query language to efficient query execution on hardware. EvaDB accelerates AI queries using a collection of optimizations detailed in the :ref:`optimizations<optimizations>` page.
diff --git a/docs/source/reference/ai/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst
@@ -58,7 +58,7 @@ EvaDB's default forecast framework is `statsforecast <https://nixtla.github.io/s
    * - LIBRARY (str, default: 'statsforecast')
      - We can select one of `statsforecast` (default) or `neuralforecast`. `statsforecast` provides access to statistical forecasting methods, while `neuralforecast` gives access to deep-learning based forecasting methods.
    * - MODEL (str, default: 'ARIMA')
-     - If LIBRARY is `statsforecast`, we can select one of ARIMA, ting, ETS, Theta. The default is ARIMA. Check `Automatic Forecasting <https://nixtla.github.io/statsforecast/src/core/models_intro.html#automatic-forecasting>`_ to learn details about these models. If LIBRARY is `neuralforecast`, we can select one of NHITS or NBEATS. The default is NBEATS. Check `NBEATS docs <https://nixtla.github.io/neuralforecast/models.nbeats.html>`_ for details.
+     - If LIBRARY is `statsforecast`, we can select one of ARIMA, ting, ETS, Theta. The default is ARIMA. Check `Automatic Forecasting <https://nixtla.mintlify.app/statsforecast/index.html#automatic-forecasting>`_ to learn details about these models. If LIBRARY is `neuralforecast`, we can select one of NHITS or NBEATS. The default is NBEATS. Check `NBEATS docs <https://nixtla.github.io/neuralforecast/models.nbeats.html>`_ for details.
    * - AUTO (str, default: 'T')
      - If set to 'T', it enables automatic hyperparameter optimization. Must be set to 'T' for `statsforecast` library. One may set this parameter to `false` if LIBRARY is `neuralforecast` for faster (but less reliable) results.
    * - CONF (int, default: 90)
@@ -103,4 +103,3 @@ Below is an example query with `neuralforecast` with `trend` column as exogenous
     LIBRARY 'neuralforecast'
     AUTO 'f'
     FREQUENCY 'M';
-
diff --git a/docs/source/reference/databases/github.rst b/docs/source/reference/databases/github.rst
@@ -19,7 +19,7 @@ Required:
 
 Optional:
 
-* ``github_token`` is not required for public repositories. However, the rate limit is lower without a valid github_token. Check the `Rate limits page <https://docs.github.com/en/rest/overview/resources-in-the-rest-api>`_ to learn more about how to check your rate limit status. Check `Managing your personal access tokens page <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens>`_ to learn how to create personal access tokens.
+* ``github_token`` is not required for public repositories. However, the rate limit is lower without a valid github_token. Check the `Rate limits page <https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api?apiVersion=2022-11-28>`_ to learn more about how to check your rate limit status. Check `Managing your personal access tokens page <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens>`_ to learn how to create personal access tokens.
 
 Create Connection
 -----------------

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import datetime
 import shutil
 from pathlib import Path
 from typing import Any, List
@@ -39,6 +40,8 @@
     FunctionIOCatalogEntry,
     FunctionMetadataCatalogEntry,
     IndexCatalogEntry,
+    JobCatalogEntry,
+    JobHistoryCatalogEntry,
     TableCatalogEntry,
     drop_all_tables_except_catalog,
     init_db,
@@ -61,6 +64,8 @@
     FunctionMetadataCatalogService,
 )
 from evadb.catalog.services.index_catalog_service import IndexCatalogService
+from evadb.catalog.services.job_catalog_service import JobCatalogService
+from evadb.catalog.services.job_history_catalog_service import JobHistoryCatalogService
 from evadb.catalog.services.table_catalog_service import TableCatalogService
 from evadb.catalog.sql_config import IDENTIFIER_COLUMN, SQLConfig
 from evadb.expression.function_expression import FunctionExpression
@@ -85,6 +90,10 @@ def __init__(self, db_uri: str):
         self._config_catalog_service = ConfigurationCatalogService(
             self._sql_config.session
         )
+        self._job_catalog_service = JobCatalogService(self._sql_config.session)
+        self._job_history_catalog_service = JobHistoryCatalogService(
+            self._sql_config.session
+        )
         self._table_catalog_service = TableCatalogService(self._sql_config.session)
         self._column_service = ColumnCatalogService(self._sql_config.session)
         self._function_service = FunctionCatalogService(self._sql_config.session)
@@ -215,6 +224,137 @@ def check_native_table_exists(self, table_name: str, database_name: str):
 
         return True
 
+    "Job catalog services"
+
+    def insert_job_catalog_entry(
+        self,
+        name: str,
+        queries: str,
+        start_time: datetime,
+        end_time: datetime,
+        repeat_interval: int,
+        active: bool,
+        next_schedule_run: datetime,
+    ) -> JobCatalogEntry:
+        """A new entry is persisted in the job catalog.
+
+        Args:
+            name: job name
+            queries: job's queries
+            start_time: job start time
+            end_time: job end time
+            repeat_interval: job repeat interval
+            active: job status
+            next_schedule_run: next run time as per schedule
+        """
+        job_entry = self._job_catalog_service.insert_entry(
+            name,
+            queries,
+            start_time,
+            end_time,
+            repeat_interval,
+            active,
+            next_schedule_run,
+        )
+
+        return job_entry
+
+    def get_job_catalog_entry(self, job_name: str) -> JobCatalogEntry:
+        """
+        Returns the job catalog entry for the given database_name
+        Arguments:
+            job_name (str): name of the job
+
+        Returns:
+            JobCatalogEntry
+        """
+
+        table_entry = self._job_catalog_service.get_entry_by_name(job_name)
+
+        return table_entry
+
+    def drop_job_catalog_entry(self, job_entry: JobCatalogEntry) -> bool:
+        """
+        This method deletes the job from  catalog.
+
+        Arguments:
+           job_entry: job catalog entry to remove
+
+        Returns:
+           True if successfully deleted else False
+        """
+        return self._job_catalog_service.delete_entry(job_entry)
+
+    def get_next_executable_job(self, only_past_jobs: bool = False) -> JobCatalogEntry:
+        """Get the oldest job that is ready to be triggered by trigger time
+        Arguments:
+            only_past_jobs: boolean flag to denote if only jobs with trigger time in
+                past should be considered
+        Returns:
+            Returns the first job to be triggered
+        """
+        return self._job_catalog_service.get_next_executable_job(only_past_jobs)
+
+    def update_job_catalog_entry(
+        self, job_name: str, next_scheduled_run: datetime, active: bool
+    ):
+        """Update the next_scheduled_run and active column as per the provided values
+        Arguments:
+            job_name (str): job which should be updated
+
+            next_run_time (datetime): the next trigger time for the job
+
+            active (bool): the active status for the job
+        """
+        self._job_catalog_service.update_next_scheduled_run(
+            job_name, next_scheduled_run, active
+        )
+
+    "Job history catalog services"
+
+    def insert_job_history_catalog_entry(
+        self,
+        job_id: str,
+        job_name: str,
+        execution_start_time: datetime,
+        execution_end_time: datetime,
+    ) -> JobCatalogEntry:
+        """A new entry is persisted in the job history catalog.
+
+        Args:
+            job_id: job id for the execution entry
+            job_name: job name for the execution entry
+            execution_start_time: job execution start time
+            execution_end_time: job execution end time
+        """
+        job_history_entry = self._job_history_catalog_service.insert_entry(
+            job_id, job_name, execution_start_time, execution_end_time
+        )
+
+        return job_history_entry
+
+    def get_job_history_by_job_id(self, job_id: int) -> List[JobHistoryCatalogEntry]:
+        """Returns all the entries present for this job_id on in the history.
+
+        Args:
+            job_id: the id of job whose history should be fetched
+        """
+        return self._job_history_catalog_service.get_entry_by_job_id(job_id)
+
+    def update_job_history_end_time(
+        self, job_id: int, execution_start_time: datetime, execution_end_time: datetime
+    ) -> List[JobHistoryCatalogEntry]:
+        """Updates the execution_end_time for this job history matching job_id and execution_start_time.
+
+        Args:
+            job_id: id of the job whose history entry which should be updated
+            execution_start_time: the start time for the job history entry
+            execution_end_time: the end time for the job history entry
+        """
+        return self._job_history_catalog_service.update_entry_end_time(
+            job_id, execution_start_time, execution_end_time
+        )
+
     "Table catalog services"
 
     def insert_table_catalog_entry(

diff --git a/evadb/catalog/models/job_catalog.py b/evadb/catalog/models/job_catalog.py
@@ -0,0 +1,92 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import json
+
+from sqlalchemy import Boolean, Column, DateTime, Index, Integer, String
+from sqlalchemy.orm import relationship
+
+from evadb.catalog.models.base_model import BaseModel
+from evadb.catalog.models.utils import JobCatalogEntry
+
+
+class JobCatalog(BaseModel):
+    """The `JobCatalog` catalog stores information about all the created Jobs.
+    `_row_id:` an autogenerated unique identifier.
+    `_name:` the job name.
+    `_queries:` the queries to run as part of this job
+    `_start_time:` the job's start time
+    `_end_time:` the job's end time
+    `_repeat_interval:` the job's repeat interval
+    `_repeat_period:` the job's repeat period
+    `_active:` is the job active/deleted
+    `_next_scheduled_run:` the next trigger time for the job as per the schedule
+    `_created_at:` entry creation time
+    `_updated_at:` entry last update time
+    """
+
+    __tablename__ = "job_catalog"
+
+    _name = Column("name", String(100), unique=True)
+    _queries = Column("queries", String, nullable=False)
+    _start_time = Column("start_time", DateTime, default=datetime.datetime.now)
+    _end_time = Column("end_ts", DateTime)
+    _repeat_interval = Column("repeat_interval", Integer)
+    _active = Column("active", Boolean, default=True)
+    _next_scheduled_run = Column("next_scheduled_run", DateTime)
+
+    _created_at = Column("created_at", DateTime, default=datetime.datetime.now)
+    _updated_at = Column(
+        "updated_at",
+        DateTime,
+        default=datetime.datetime.now,
+        onupdate=datetime.datetime.now,
+    )
+
+    _next_run_index = Index("_next_run_index", _next_scheduled_run)
+    _job_history_catalog = relationship("JobHistoryCatalog", cascade="all, delete")
+
+    def __init__(
+        self,
+        name: str,
+        queries: str,
+        start_time: datetime,
+        end_time: datetime,
+        repeat_interval: Integer,
+        active: bool,
+        next_schedule_run: datetime,
+    ):
+        self._name = name
+        self._queries = queries
+        self._start_time = start_time
+        self._end_time = end_time
+        self._repeat_interval = repeat_interval
+        self._active = active
+        self._next_scheduled_run = next_schedule_run
+
+    def as_dataclass(self) -> "JobCatalogEntry":
+        return JobCatalogEntry(
+            row_id=self._row_id,
+            name=self._name,
+            queries=json.loads(self._queries),
+            start_time=self._start_time,
+            end_time=self._end_time,
+            repeat_interval=self._repeat_interval,
+            active=self._active,
+            next_scheduled_run=self._next_scheduled_run,
+            created_at=self._created_at,
+            updated_at=self._updated_at,
+        )
diff --git a/evadb/catalog/models/job_history_catalog.py b/evadb/catalog/models/job_history_catalog.py
@@ -0,0 +1,73 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import datetime
+
+from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, UniqueConstraint
+
+from evadb.catalog.models.base_model import BaseModel
+from evadb.catalog.models.utils import JobHistoryCatalogEntry
+
+
+class JobHistoryCatalog(BaseModel):
+    """The `JobHistoryCatalog` stores the execution history of jobs .
+    `_row_id:` an autogenerated unique identifier.
+    `_job_id:` job id.
+    `_job_name:` job name.
+    `_execution_start_time:` start time of this run
+    `_execution_end_time:` end time for this run
+    `_created_at:` entry creation time
+    `_updated_at:` entry last update time
+    """
+
+    __tablename__ = "job_history_catalog"
+
+    _job_id = Column(
+        "job_id", Integer, ForeignKey("job_catalog._row_id", ondelete="CASCADE")
+    )
+    _job_name = Column("job_name", String(100))
+    _execution_start_time = Column("execution_start_time", DateTime)
+    _execution_end_time = Column("execution_end_time", DateTime)
+    _created_at = Column("created_at", DateTime, default=datetime.datetime.now)
+    _updated_at = Column(
+        "updated_at",
+        DateTime,
+        default=datetime.datetime.now,
+        onupdate=datetime.datetime.now,
+    )
+
+    __table_args__ = (UniqueConstraint("job_id", "execution_start_time"), {})
+
+    def __init__(
+        self,
+        job_id: int,
+        job_name: str,
+        execution_start_time: datetime,
+        execution_end_time: datetime,
+    ):
+        self._job_id = job_id
+        self._job_name = job_name
+        self._execution_start_time = execution_start_time
+        self._execution_end_time = execution_end_time
+
+    def as_dataclass(self) -> "JobHistoryCatalogEntry":
+        return JobHistoryCatalogEntry(
+            row_id=self._row_id,
+            job_id=self._job_id,
+            job_name=self._job_name,
+            execution_start_time=self._execution_start_time,
+            execution_end_time=self._execution_end_time,
+            created_at=self._created_at,
+            updated_at=self._updated_at,
+        )