Merge branch 'master' into ashmita/license-plate

georgia-tech-db · Apr 28, 2023 · 75877cb · 75877cb
2 parents c974a48 + c93075c
commit 75877cb
Show file tree

Hide file tree

Showing 27 changed files with 2,453 additions and 341 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -2,61 +2,68 @@ version: 2.1
 
 orbs:
   win: circleci/windows@2.2.0
+  macos: circleci/macos@2.3.4
 
 workflows:
   main:
     jobs:
-        - test:
+        #### UNIT TESTS
+        - Linux:
             name: "Test | v3.7 | Linux"
             v: "3.7"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.8 | Linux"
             v: "3.8"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.9 | Linux"
             v: "3.9"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.10 | Linux"
             v: "3.10"
             mode: "TEST"
         ### NOTEBOOKS
-        - test:
+        - Linux:
             name: "Notebook | v3.7 | Linux"
             v: "3.7"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.8 | Linux"
             v: "3.8"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.9 | Linux"
             v: "3.9"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.10 | Linux"
             v: "3.10"
             mode: "NOTEBOOK"
         ### LINTER
-        - test:
+        - Linux:
             name: "Linter | Linux"
             v: "3.10"
             mode: "LINTER"
         ### RAY
-        - test:
+        - Linux:
             name: "Test | Ray | v3.10 | Linux"
             v: "3.10"
             mode: "RAY"
         - Pip
-        - Windows
-        #- test:
-        #    name: "Linux -  v3.11"  # missing Torchvision
+        - Windows:
+            name: "Windows | v3.10"
+        # test_create_index_doesn't work on MacOS
+        # - MacOS:
+        #    name: "MacOS | v3.10"
+        # missing Torchvision
+        #- Linux:
+        #    name: "Linux -  v3.11"  
         #    v: "3.11"
 
 jobs:
-  test:
+  Linux:
     parameters:
       v:
         type: string
@@ -76,6 +83,7 @@ jobs:
           name: Install EVA package from GitHub repo with all dependencies
           command: |
             "python<< parameters.v >>" -m venv test_evadb
+            pip install --upgrade pip
             source test_evadb/bin/activate
             pip install ".[dev]"
 
@@ -97,35 +105,55 @@ jobs:
             bash script/test/test.sh -m "<< parameters.mode >>"
 
   Windows:
-    executor: win/default
-    parameters:
-      v:
-        type: string
-        default: "3.10"
-    steps:
-      - checkout
+      executor: win/default
+      steps:
+        - checkout
+        - run: 
+            name: Install EVA package from GitHub repo and run tests
+            command: |
+              choco install python --version=3.10.8 -y
+              python --version
+              pip --version
+              pip install virtualenv
+              virtualenv test_evadb
+              test_evadb\Scripts\activate
+              pip install ".[dev]"
+              bash script\test\test.sh
 
-      - run:
-          name: Test windows
-          command: |
-            Set-StrictMode -Version Latest
-            $ErrorActionPreference = 'Continue'
-            pip install virtualenv
-            virtualenv test_evadb
-            test_evadb\Scripts\activate
-            pip install ".[dev]"
-            bash script\test\test.sh
+  MacOS:
+      macos:
+        xcode: "14.2.0"
+      steps:
+        - run:           
+            name: Setup Python
+            command: |
+              brew update
+              brew install pyenv git
+              pyenv install 3.10.8
+              pyenv global 3.10.8
+              eval "$(pyenv init -)"
+              python --version
+              pip --version
+        - checkout
+        - run:           
+            name: Install EVA package from GitHub repo and run tests
+            command: |
+              python -m venv test_evadb
+              source test_evadb/bin/activate
+              pip install --upgrade pip
+              pip debug --verbose
+              pip install ".[dev]"
+              source test_evadb/bin/activate
+              bash script/test/test.sh 
 
   Pip:
     resource_class: large
     docker:
       - image: "cimg/python:3.10"
     steps:
-
       - checkout
-
       - run:
-          name: Install EVA package from pip and start server
+          name: Install EVA package from PIP and start server
           command: |
             pip install --upgrade pip
             pip install evadb

diff --git a/.gitignore b/.gitignore
@@ -179,6 +179,8 @@ test_evadb/
 tutorials/*.py
 *.pth
 *.pt
+tutorials/bddtest.zip
+bddtest/
 
 # benchmark
 .benchmarks

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### [Deprecated]
 ### [Removed]
 
+##  [0.2.1] - 2023-04-25
+### [Added]
+
+* PR #664: Add Tutorial Notebook for HuggingFace 
+* PR #668: fix: Remove detoxify to fix pip install evadb 
+* PR #660: Bump Eva-Decord to Not Need ffmpeg 
+* PR #657: docs: benefit of caching and predicate reordering 
+
+##  [0.2.0] - 2023-04-16
+### [Added]
+
+* PR #647: feat: LOAD CSV Notebook 
+* PR #626: docs: Documentation for creating UDFs using Decorators. 
+* PR #599: feat: EVA x HuggingFace 
+* PR #621: feat: Ray integration
+
+### [Changed]
+
+* PR #649: fix: Expr bugs 
+* PR #628: test: adding support for pytest-xdist 
+* PR #633: fix: Install Decord from EVA-Fork 
+* PR #646: update doc for extending eva 
+* PR #642: Build fix 
+* PR #641: fix: Unnest bug  
+
 ##  [0.1.6] - 2023-04-05
 ### [Added]
 

diff --git a/README.md b/README.md
@@ -4,6 +4,9 @@
   </a>
   <div>
         <h3>Try It Out!</h3>
+        <a href="https://github.com/georgia-tech-db/eva">
+            <img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/georgia-tech-db/eva/master/docs/images/eva/v1.json" alt="Logo"/>
+        </a>
         <a href="https://colab.research.google.com/github/georgia-tech-db/eva/blob/master/tutorials/03-emotion-analysis.ipynb">
             <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open EVA on Colab"/>
         </a>
@@ -21,23 +24,31 @@
 
 # EVA AI-Relational Database System
 
-EVA is an open-source **AI-relational database with first-class support for deep learning models**. It aims to support AI-powered database applications that operate on both structured (tables) and unstructured data (videos, text, podcasts, PDFs, etc.) with deep learning models.
+- ⚡️ 10-100x faster AI pipelines using SQL-like queries 
+- 💰 Save money spent on GPU-driven inference
+- 📦 Built-in caching to avoid re-running deep learning models across queries
+- 📏 Over 20 AI-centric query optimization rules
+- ⌨️ First-party integrations for PyTorch and HuggingFace models
+- 🐍 Installable via pip
+- 🤝 Fully implemented in Python
+
+EVA is an open-source **AI-relational database with first-class support for deep learning models**. It supports next-generation AI-powered database applications that operate on structured (tables) and unstructured data (videos, text, podcasts, PDFs, etc.) with deep learning models.
 
-EVA accelerates AI pipelines using a collection of optimizations inspired by relational database systems including function caching, sampling, and cost-based operator reordering. It comes with a wide range of models for analyzing unstructured data including image classification, object detection, OCR, face detection, etc. It is fully implemented in Python, and licensed under the Apache license.
+EVA accelerates AI pipelines by 10-100x using a collection of optimizations inspired by relational database systems, including function caching, sampling, and cost-based predicate reordering. It comes with a wide range of models for analyzing unstructured data, including models for image classification, object detection, OCR, text sentiment classification, face detection, etc. It is fully implemented in Python and licensed under the Apache license.
 
-EVA supports a AI-oriented query language for analysing unstructured data. Here are some illustrative applications:
+EVA supports an AI-oriented query language tailored for analyzing unstructured data. Here are some illustrative applications:
 
  * <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/03-emotion-analysis.html">Examining the emotion palette of actors in a movie</a>
  * <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/02-object-detection.html">Analysing traffic flow at an intersection </a>
  * <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/01-mnist.html">Classifying images based on their content</a>
- * <a href="https://github.com/georgia-tech-db/license-plate-recognition">Recogizing license plates </a>
+ * <a href="https://github.com/georgia-tech-db/license-plate-recognition">Recognizing license plates </a>
  * <a href="https://github.com/georgia-tech-db/toxicity-classification">Analysing toxicity of social media memes </a>
 
-If you are wondering why you might need a AI-relational database system, start with the page on <a href="https://evadb.readthedocs.io/en/stable/source/overview/video.html#">Video Database Systems</a>. It describes how EVA lets you easily make use of deep learning models and you can save money spent on inference on large image or video datasets.
+If you are wondering why you might need an AI-relational database system, start with the page on <a href="https://evadb.readthedocs.io/en/stable/source/overview/video.html#">Video Database Systems</a>. It describes how EVA lets you easily use deep learning models and save money spent on GPU-driven inference on large image or video datasets.
 
 The <a href="https://evadb.readthedocs.io/en/stable/source/overview/installation.html">Getting Started</a> page shows how you can use EVA for different computer vision tasks: image classification, object detection, action recognition, and how you can easily extend EVA to support your custom deep learning model in the form of user-defined functions.
 
-The <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/index.html">User Guides</a> section contains Jupyter Notebooks that demonstrate how to use various features of EVA. Each notebook includes a link to Google Colab, where you can run the code by yourself.
+The <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/index.html">User Guides</a> section contains Jupyter Notebooks that demonstrate how to use various features of EVA. Each notebook includes a link to Google Colab to run the code.
 
 ## Why EVA? ##
 
@@ -48,12 +59,12 @@ The <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/index.html"
 
 <details>
   <summary><b>Speed up queries and save money spent on model inference</b></summary>
-  EVA comes with a collection of built-in sampling, caching, and filtering optimizations inspired by time-tested relational database systems.
+  EVA has built-in sampling, caching, and filtering optimizations inspired by time-tested relational database systems.
 </details>
 
 <details>
   <summary><b>Extensible by design to support custom deep learning models </b></summary>
-  EVA has first-class support for user-defined functions that wrap around your deep learning models in PyTorch.
+  EVA has first-class support for user-defined functions that wrap around your deep learning models in PyTorch and HuggingFace.
 </details>
 
 ## Links
@@ -64,52 +75,51 @@ The <a href="https://evadb.readthedocs.io/en/stable/source/tutorials/index.html"
 
 ## Quick Start
 
-1. To install EVA, we recommend using the pip package manager (EVA supports Python versions 3.7+).
+- Install EVA using the pip package manager. EVA supports Python versions 3.7+.
 
 ```shell
 pip install evadb
 ```
 
-2. EVA is based on a client-server architecture. It works in Jupyter notebooks (illustrative notebooks are available in the [Tutorials](https://github.com/georgia-tech-db/eva/blob/master/tutorials/03-emotion-analysis.ipynb) folder) and also supports a terminal-based client. To start the EVA server and a terminal-based client, use the following commands:
+- To start and connect to an EVA server in a Jupyter notebook, check out this [illustrative emotion analysis notebook](https://github.com/georgia-tech-db/eva/blob/master/tutorials/03-emotion-analysis.ipynb):
 ```shell
-eva_server &   # launch server
-eva_client     # launch client
+cursor = connect_to_server()
 ```
 
-3. Load a video onto the EVA server from the client (we use [ua_detrac.mp4](data/ua_detrac/ua_detrac.mp4) video as an example):
+- Load a video onto the EVA server (we use [ua_detrac.mp4](data/ua_detrac/ua_detrac.mp4) for illustration):
 
 ```mysql
-LOAD VIDEO "data/ua_detrac/ua_detrac.mp4" INTO MyVideo;
+LOAD VIDEO "data/ua_detrac/ua_detrac.mp4" INTO UADETRAC;
 ```
 
-4. That's it! You can now run queries over the loaded video:
+- That's it! You can now run queries over the loaded video:
 
 ```mysql
-SELECT id, data FROM MyVideo WHERE id < 5;
+SELECT id, data FROM UADETRAC WHERE id < 5;
 ```
 
-5. Search for frames in the video that contain a car
+- Search for frames in the video that contain a car
 
 ```mysql
-SELECT id, data FROM MyVideo WHERE ['car'] <@ FastRCNNObjectDetector(data).labels;
+SELECT id, data FROM UADETRAC WHERE ['car'] <@ YoloV5(data).labels;
 ```
 | Source Video  | Query Result |
 |---------------|--------------|
 |<img alt="Source Video" src="https://github.com/georgia-tech-db/eva/releases/download/v0.1.0/traffic-input.webp" width="300"> |<img alt="Query Result" src="https://github.com/georgia-tech-db/eva/releases/download/v0.1.0/traffic-output.webp" width="300"> |
 
-6. Search for frames in the video that contain a pedestrian and a car
+- Search for frames in the video that contain a pedestrian and a car
 
 ```mysql
-SELECT id, data FROM MyVideo WHERE ['pedestrian', 'car'] <@ FastRCNNObjectDetector(data).labels;
+SELECT id, data FROM UADETRAC WHERE ['pedestrian', 'car'] <@ YoloV5(data).labels;
 ```
 
-7. Search for frames in the video with more than 3 cars
+- Search for frames with more than three cars
 
 ```mysql
-SELECT id, data FROM MyVideo WHERE ArrayCount(FastRCNNObjectDetector(data).labels, 'car') > 3;
+SELECT id, data FROM UADETRAC WHERE ArrayCount(YoloV5(data).labels, 'car') > 3;
 ```
 
-8. You can create a new user-defined function (UDF) that wraps around your custom vision model or an off-the-shelf model like FastRCNN:
+- You can **create a custom user-defined function (UDF)** that wraps around a fine-tuned or off-the-shelf deep learning model:
 ```mysql
 CREATE UDF IF NOT EXISTS MyUDF
 INPUT  (frame NDARRAY UINT8(3, ANYDIM, ANYDIM))
@@ -119,14 +129,40 @@ TYPE  Classification
 IMPL  'eva/udfs/fastrcnn_object_detector.py';
 ```
 
-9. You can combine multiple user-defined functions in a single query to accomplish more complicated tasks.
+- **Compose multiple user-defined functions in a single query** to accomplish complicated AI pipelines.
 ```mysql
    -- Analyse emotions of faces in a video
    SELECT id, bbox, EmotionDetector(Crop(data, bbox)) 
    FROM MyVideo JOIN LATERAL UNNEST(FaceDetector(data)) AS Face(bbox, conf)  
    WHERE id < 15;
 ```
 
+- Besides making it easy to write queries for complex AI pipelines, EVA **speeds up query execution using its AI-centric query optimizer**. Two illustrative  optimizations are:
+
+   💾 **Caching**: EVA automatically caches and reuses previous query results (especially model inference results), eliminating redundant computation and reducing query processing time.
+
+   🎯 **Predicate Reordering**: EVA optimizes the order in which the query predicates are evaluated (e.g., runs the faster, more selective model first), leading to faster queries and lower inference costs.
+
+Consider these two exploratory queries on a dataset of dog images:
+<img align="right" style="display:inline;" width="40%" src="https://github.com/georgia-tech-db/eva/blob/master/data/assets/eva_performance_comparison.png?raw=true"></a>
+
+```mysql
+  -- Query 1: Find all images of black-colored dogs
+  SELECT id, bbox FROM dogs 
+  JOIN LATERAL UNNEST(YoloV5(data)) AS Obj(label, bbox, score) 
+  WHERE Obj.label = 'dog' 
+    AND Color(Crop(data, bbox)) = 'black'; 
+
+  -- Query 2: Find all Great Danes that are black-colored
+  SELECT id, bbox FROM dogs 
+  JOIN LATERAL UNNEST(YoloV5(data)) AS Obj(label, bbox, score) 
+  WHERE Obj.label = 'dog' 
+    AND DogBreedClassifier(Crop(data, bbox)) = 'great dane' 
+    AND Color(Crop(data, bbox)) = 'black';
+```
+
+By reusing the results of the first query and reordering the predicates based on available cached results, EVA runs up the second query **10x faster**!
+
 ## Illustrative EVA Applications 
 
 ### Traffic Analysis (Object Detection Model)
@@ -176,12 +212,11 @@ Join the EVA community on [Slack](https://join.slack.com/t/eva-db/shared_invite/
 [![Coverage Status](https://coveralls.io/repos/github/georgia-tech-db/eva/badge.svg?branch=master)](https://coveralls.io/github/georgia-tech-db/eva?branch=master)
 [![Documentation Status](https://readthedocs.org/projects/evadb/badge/?version=stable)](https://evadb.readthedocs.io/en/stable/index.html)
 
-We welcome all kinds of contributions to EVA.
-To file a bug or request a feature, please use <a href="https://github.com/georgia-tech-db/eva/issues">GitHub issues</a>. <a href="https://github.com/georgia-tech-db/eva/pulls">Pull requests</a> are welcome.
+EVA is the beneficiary of many [contributors](https://github.com/georgia-tech-db/eva/graphs/contributors). All kinds of contributions to EVA are appreciated. To file a bug or to request a feature, please use <a href="https://github.com/georgia-tech-db/eva/issues">GitHub issues</a>. <a href="https://github.com/georgia-tech-db/eva/pulls">Pull requests</a> are welcome.
 
-For more information on contributing to EVA, see our
+For more information, see our
 [contribution guide](https://evadb.readthedocs.io/en/stable/source/contribute/index.html).
 
 ## License
-Copyright (c) 2018-2023 [Georgia Tech Database Group](http://db.cc.gatech.edu/)
+Copyright (c) 2018-2023 [Georgia Tech Database Group](http://db.cc.gatech.edu/).
 Licensed under [Apache License](LICENSE).
diff --git a/data/assets/eva_performance_comparison.png b/data/assets/eva_performance_comparison.png