From 335d913bfc92863c65aade4087a3f9b8db184be7 Mon Sep 17 00:00:00 2001 From: Anirudh Prabakaran Date: Mon, 31 Oct 2022 23:26:51 -0400 Subject: [PATCH 1/4] Tutorial notebook to demonstrate downloading/loading dataset --- tutorials/05-loading-data.ipynb | 216 ++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 tutorials/05-loading-data.ipynb diff --git a/tutorials/05-loading-data.ipynb b/tutorials/05-loading-data.ipynb new file mode 100644 index 000000000..5cabdf0c0 --- /dev/null +++ b/tutorials/05-loading-data.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9aa3c1b1-1587-45de-baaf-e0ccec1ddd8f", + "metadata": {}, + "source": [ + "# MOVIE ANALYSIS" + ] + }, + { + "cell_type": "markdown", + "id": "573bbbff-e4e9-4a5e-a43f-8feb51d3124f", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + "
\n", + " Run on Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "27d7ebfa-c0b6-4a15-bd17-59a4e1cb7c77", + "metadata": {}, + "source": [ + "### Start EVA Server\n", + "We are reusing the start server notebook for launching the EVA server" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c758b60b-e75e-4128-805d-46a210638daf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘00-start-eva-server.ipynb’ already there; not retrieving.\n", + "\n", + "[ -z \"$(lsof -ti:5432)\" ] || kill -9 \"$(lsof -ti:5432)\"\n", + "nohup eva_server > eva.log 2>&1 &\n", + "\n", + "\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3 is available.\n", + "You should consider upgrading via the '/home/anip/codes/eva/test_eva_db/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "!wget -nc \"https://raw.githubusercontent.com/georgia-tech-db/eva/master/tutorials/00-start-eva-server.ipynb\"\n", + "%run 00-start-eva-server.ipynb\n", + "cursor = connect_to_server()" + ] + }, + { + "cell_type": "markdown", + "id": "ca54c9e5-6ec9-4aea-b421-ad1fa3ddcdd1", + "metadata": {}, + "source": [ + "### Video Files\n", + " - Download dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62def7ce-3f83-4fa0-b9fd-2e553b3919ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘bddtest.zip’ already there; not retrieving.\n", + "\n", + "Archive: bddtest.zip\n" + ] + } + ], + "source": [ + "# sample dataset of 2 videos\n", + "!wget -nc https://www.dropbox.com/s/jnkil8py0ng5za5/bddtest.zip\n", + "\n", + "# unzip\n", + "!unzip -n bddtest.zip" + ] + }, + { + "cell_type": "markdown", + "id": "463ce1f8-8333-4e11-ab0a-dda6d9bb067b", + "metadata": {}, + "source": [ + "- Load the 2 videos" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "95b1e0a4-d9f5-40f2-830f-69c8c9f21172", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@status: 0\n", + "@batch: Batch Object:\n", + "@dataframe: 0\n", + "0 Video successfully added at location: bddtest/...\n", + "@batch_size: 1\n", + "@identifier_column: None\n", + "@query_time: 0.03893153000001348\n", + "@status: 0\n", + "@batch: Batch Object:\n", + "@dataframe: 0\n", + "0 Video successfully added at location: bddtest/...\n", + "@batch_size: 1\n", + "@identifier_column: None\n", + "@query_time: 0.021008542000004127\n" + ] + } + ], + "source": [ + "cursor.execute('LOAD FILE \"./bddtest/videos/bddtest_00a2e3ca5c856cde.mp4\" INTO bddtest_1;')\n", + "response = cursor.fetch_all()\n", + "print(response)\n", + "\n", + "cursor.execute('LOAD FILE \"./bddtest/videos/bddtest_00a395fed60c0b47.mp4\" INTO bddtest_2;')\n", + "response = cursor.fetch_all()\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "8ace1640-fb0c-4e70-a595-f7e9befdf757", + "metadata": {}, + "source": [ + "## Query from the dataset\n", + "- Run a test query on the loaded video" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "69e378d4-4d30-47cf-84c8-8fe56afe517e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@status: -1\n", + "@batch: None\n", + "@error: CUDA error: no kernel image is available for execution on the device\n", + "CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.\n", + "For debugging consider passing CUDA_LAUNCH_BLOCKING=1.\n" + ] + } + ], + "source": [ + "cursor.execute(\"\"\"SELECT id, FastRCNNObjectDetector(data) \n", + " FROM bddtest_1 \n", + " WHERE id < 20\"\"\")\n", + "response = cursor.fetch_all()\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2990038a-00ec-4d36-aae2-82a789f2389a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c470dc7-ac2b-4e57-9027-06786a73e141", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 9fbabc9f11f3676905e7de9f415ba55b7e2f0726 Mon Sep 17 00:00:00 2001 From: Anirudh Prabakaran Date: Wed, 2 Nov 2022 22:23:06 -0400 Subject: [PATCH 2/4] Fix path bug in load csv executor --- eva/executor/load_csv_executor.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/eva/executor/load_csv_executor.py b/eva/executor/load_csv_executor.py index 0cc8e9d6e..de7ae97cf 100644 --- a/eva/executor/load_csv_executor.py +++ b/eva/executor/load_csv_executor.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os +from pathlib import Path import pandas as pd @@ -22,6 +22,7 @@ from eva.planner.load_data_plan import LoadDataPlan from eva.readers.csv_reader import CSVReader from eva.storage.storage_engine import StorageEngine +from eva.utils.logging_manager import logger class LoadCSVExecutor(AbstractExecutor): @@ -39,11 +40,28 @@ def exec(self): using storage engine """ + csv_file_path = None + # Validate file_path + if Path(self.node.file_path).exists(): + csv_file_path = self.node.file_path + # check in the upload directory + else: + csv_path = Path(Path(self.upload_dir) / self.node.file_path) + if csv_path.exists(): + csv_file_path = csv_path + + if csv_file_path is None: + error = "Failed to find a video file at location: {}".format( + self.node.file_path + ) + logger.error(error) + raise RuntimeError(error) + # Read the CSV file # converters is a dictionary of functions that convert the values # in the column to the desired type csv_reader = CSVReader( - os.path.join(self.upload_dir, self.node.file_path), + csv_file_path, column_list=self.node.column_list, batch_mem_size=self.node.batch_mem_size, ) @@ -58,7 +76,7 @@ def exec(self): df_yield_result = Batch( pd.DataFrame( { - "CSV": str(self.node.file_path), + "CSV": csv_file_path, "Number of loaded frames": num_loaded_frames, }, index=[0], From e167a12b627ff6d5f824ed8dc6d40b08a1fce4da Mon Sep 17 00:00:00 2001 From: Anirudh Prabakaran Date: Wed, 2 Nov 2022 22:48:51 -0400 Subject: [PATCH 3/4] Updated notebook to demonstrated load and querying CSVs --- tutorials/05-loading-data.ipynb | 306 ++++++++++++++++++++++++++++---- 1 file changed, 276 insertions(+), 30 deletions(-) diff --git a/tutorials/05-loading-data.ipynb b/tutorials/05-loading-data.ipynb index 5cabdf0c0..e2e445a1f 100644 --- a/tutorials/05-loading-data.ipynb +++ b/tutorials/05-loading-data.ipynb @@ -37,31 +37,34 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c758b60b-e75e-4128-805d-46a210638daf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File ‘00-start-eva-server.ipynb’ already there; not retrieving.\n", - "\n", - "[ -z \"$(lsof -ti:5432)\" ] || kill -9 \"$(lsof -ti:5432)\"\n", - "nohup eva_server > eva.log 2>&1 &\n", - "\n", - "\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3 is available.\n", - "You should consider upgrading via the '/home/anip/codes/eva/test_eva_db/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "!wget -nc \"https://raw.githubusercontent.com/georgia-tech-db/eva/master/tutorials/00-start-eva-server.ipynb\"\n", "%run 00-start-eva-server.ipynb\n", "cursor = connect_to_server()" ] }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c2c02070-1635-4b09-9916-c7ae828eeda2", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.insert(0,'..')\n", + "from eva.server.db_api import connect\n", + "\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "connection = connect(host = '0.0.0.0', port = 5432) # hostname, port of the server where EVADB is running\n", + "\n", + "cursor = connection.cursor()" + ] + }, { "cell_type": "markdown", "id": "ca54c9e5-6ec9-4aea-b421-ad1fa3ddcdd1", @@ -107,7 +110,9 @@ "cell_type": "code", "execution_count": 3, "id": "95b1e0a4-d9f5-40f2-830f-69c8c9f21172", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -119,14 +124,14 @@ "0 Video successfully added at location: bddtest/...\n", "@batch_size: 1\n", "@identifier_column: None\n", - "@query_time: 0.03893153000001348\n", + "@query_time: 0.035945682000601664\n", "@status: 0\n", "@batch: Batch Object:\n", "@dataframe: 0\n", "0 Video successfully added at location: bddtest/...\n", "@batch_size: 1\n", "@identifier_column: None\n", - "@query_time: 0.021008542000004127\n" + "@query_time: 0.02204383199568838\n" ] } ], @@ -140,6 +145,83 @@ "print(response)" ] }, + { + "cell_type": "markdown", + "id": "d7b7c9d5-2eae-4d57-a3f1-78792fca0b83", + "metadata": { + "tags": [] + }, + "source": [ + "- Create a table to store the metadata information " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8897b9bb-0993-4eb0-959d-6484a651a90f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@status: 0\n", + "@batch: Batch Object:\n", + "@dataframe: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "@batch_size: 0\n", + "@identifier_column: None\n", + "@query_time: 0.026316527000744827\n" + ] + } + ], + "source": [ + "create_table_query = f\"\"\" \n", + "\n", + " CREATE TABLE IF NOT EXISTS bddtestmeta(\n", + " id INTEGER UNIQUE,\n", + " frame_id INTEGER,\n", + " video_id INTEGER,\n", + " dataset_name TEXT(30),\n", + " label TEXT(30),\n", + " bbox NDARRAY FLOAT32(4),\n", + " object_id INTEGER\n", + " );\n", + " \n", + " \"\"\"\n", + "\n", + "cursor.execute(create_table_query)\n", + "response = cursor.fetch_all()\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "574f8c04-c934-4654-8558-576e34e8735d", + "metadata": {}, + "source": [ + "- Load the 2 corresponding CSV files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf8415ac-f9e0-4bee-b2aa-b4e104b10a4b", + "metadata": {}, + "outputs": [], + "source": [ + "cursor.execute(\"LOAD FILE 'bddtest/info/bddtest_00a2e3ca5c856cde.csv' INTO bddtestmeta WITH FORMAT CSV;\")\n", + "response = cursor.fetch_all()\n", + "print(response)\n", + "\n", + "cursor.execute(\"LOAD FILE 'bddtest/info/bddtest_00a395fed60c0b47.csv' INTO bddtestmeta WITH FORMAT CSV;\")\n", + "response = cursor.fetch_all()\n", + "print(response)" + ] + }, { "cell_type": "markdown", "id": "8ace1640-fb0c-4e70-a595-f7e9befdf757", @@ -151,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "id": "69e378d4-4d30-47cf-84c8-8fe56afe517e", "metadata": {}, "outputs": [ @@ -159,29 +241,193 @@ "name": "stdout", "output_type": "stream", "text": [ - "@status: -1\n", - "@batch: None\n", - "@error: CUDA error: no kernel image is available for execution on the device\n", - "CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.\n", - "For debugging consider passing CUDA_LAUNCH_BLOCKING=1.\n" + "@status: 0\n", + "@batch: Batch Object:\n", + "@dataframe: bddtest_1.id fastrcnnobjectdetector.labels \\\n", + "0 0 [stop sign, car, stop sign, car, car, car, car... \n", + "1 1 [stop sign, car, car, stop sign, car, car, car... \n", + "2 2 [stop sign, car, car, stop sign, car, car, car... \n", + "3 3 [stop sign, car, car, stop sign, car, car, car... \n", + "4 4 [stop sign, car, car, car, car, car, stop sign... \n", + "5 5 [stop sign, car, car, car, car, stop sign, car... \n", + "6 6 [stop sign, car, car, car, car, stop sign, car... \n", + "7 7 [stop sign, car, car, car, car, stop sign, car... \n", + "8 8 [stop sign, car, car, car, car, stop sign, car... \n", + "9 9 [stop sign, car, car, car, car, car, stop sign... \n", + "10 10 [stop sign, car, car, car, car, car, stop sign... \n", + "11 11 [stop sign, car, car, car, car, car, stop sign... \n", + "12 12 [stop sign, car, car, car, car, stop sign, car... \n", + "13 13 [stop sign, car, car, car, car, car, stop sign... \n", + "14 14 [stop sign, car, car, car, car, car, car, car,... \n", + "15 15 [stop sign, car, car, car, car, stop sign, car... \n", + "16 16 [stop sign, car, car, stop sign, car, car, car... \n", + "17 17 [stop sign, car, car, stop sign, car, car, car... \n", + "18 18 [stop sign, car, car, stop sign, car, car, car... \n", + "19 19 [stop sign, car, car, stop sign, car, car, car... \n", + "20 0 [stop sign, car, stop sign, car, car, car, car... \n", + "21 1 [stop sign, car, car, stop sign, car, car, car... \n", + "22 2 [stop sign, car, car, stop sign, car, car, car... \n", + "23 3 [stop sign, car, car, stop sign, car, car, car... \n", + "24 4 [stop sign, car, car, car, car, car, stop sign... \n", + "25 5 [stop sign, car, car, car, car, stop sign, car... \n", + "26 6 [stop sign, car, car, car, car, stop sign, car... \n", + "27 7 [stop sign, car, car, car, car, stop sign, car... \n", + "28 8 [stop sign, car, car, car, car, stop sign, car... \n", + "29 9 [stop sign, car, car, car, car, car, stop sign... \n", + "30 10 [stop sign, car, car, car, car, car, stop sign... \n", + "31 11 [stop sign, car, car, car, car, car, stop sign... \n", + "32 12 [stop sign, car, car, car, car, stop sign, car... \n", + "33 13 [stop sign, car, car, car, car, car, stop sign... \n", + "34 14 [stop sign, car, car, car, car, car, car, car,... \n", + "35 15 [stop sign, car, car, car, car, stop sign, car... \n", + "36 16 [stop sign, car, car, stop sign, car, car, car... \n", + "37 17 [stop sign, car, car, stop sign, car, car, car... \n", + "38 18 [stop sign, car, car, stop sign, car, car, car... \n", + "39 19 [stop sign, car, car, stop sign, car, car, car... \n", + "\n", + " fastrcnnobjectdetector.bboxes \\\n", + "0 [[343.097442627, 195.4741821289, 405.721130371... \n", + "1 [[334.6392822266, 191.1281433105, 398.79653930... \n", + "2 [[327.6994934082, 189.3657836914, 392.40979003... \n", + "3 [[323.3418884277, 187.5710754395, 388.48910522... \n", + "4 [[319.9740600586, 186.371383667, 385.494506835... \n", + "5 [[315.4874267578, 184.0178833008, 382.19509887... \n", + "6 [[311.832244873, 184.0472717285, 379.105621337... \n", + "7 [[307.4016418457, 183.0279998779, 374.34979248... \n", + "8 [[302.5659790039, 180.6338806152, 370.37249755... \n", + "9 [[298.9287719727, 179.3368682861, 368.86962890... \n", + "10 [[296.3753051758, 177.8909454346, 366.65948486... \n", + "11 [[290.900604248, 176.7446594238, 361.315673828... \n", + "12 [[288.2717895508, 174.8643493652, 358.72824096... \n", + "13 [[284.2260131836, 174.2529602051, 355.75827026... \n", + "14 [[279.9111633301, 172.367767334, 354.378875732... \n", + "15 [[275.9497375488, 170.4543914795, 351.43450927... \n", + "16 [[273.2353820801, 167.7679138184, 347.34454345... \n", + "17 [[268.1208496094, 165.2581787109, 343.94210815... \n", + "18 [[265.0013427734, 163.6615753174, 340.21258544... \n", + "19 [[259.8805541992, 161.1826782227, 339.23223876... \n", + "20 [[343.097442627, 195.4741821289, 405.721130371... \n", + "21 [[334.6392822266, 191.1281433105, 398.79653930... \n", + "22 [[327.6994934082, 189.3657836914, 392.40979003... \n", + "23 [[323.3418884277, 187.5710754395, 388.48910522... \n", + "24 [[319.9740600586, 186.371383667, 385.494506835... \n", + "25 [[315.4874267578, 184.0178833008, 382.19509887... \n", + "26 [[311.832244873, 184.0472717285, 379.105621337... \n", + "27 [[307.4016418457, 183.0279998779, 374.34979248... \n", + "28 [[302.5659790039, 180.6338806152, 370.37249755... \n", + "29 [[298.9287719727, 179.3368682861, 368.86962890... \n", + "30 [[296.3753051758, 177.8909454346, 366.65948486... \n", + "31 [[290.900604248, 176.7446594238, 361.315673828... \n", + "32 [[288.2717895508, 174.8643493652, 358.72824096... \n", + "33 [[284.2260131836, 174.2529602051, 355.75827026... \n", + "34 [[279.9111633301, 172.367767334, 354.378875732... \n", + "35 [[275.9497375488, 170.4543914795, 351.43450927... \n", + "36 [[273.2353820801, 167.7679138184, 347.34454345... \n", + "37 [[268.1208496094, 165.2581787109, 343.94210815... \n", + "38 [[265.0013427734, 163.6615753174, 340.21258544... \n", + "39 [[259.8805541992, 161.1826782227, 339.23223876... \n", + "\n", + " fastrcnnobjectdetector.scores \n", + "0 [0.9981474876, 0.9964281917000001, 0.994714677... \n", + "1 [0.9978869557000001, 0.9953299761000001, 0.994... \n", + "2 [0.9990385771, 0.9947265983, 0.9944388866, 0.9... \n", + "3 [0.9989394546, 0.9953050613000001, 0.994326889... \n", + "4 [0.9987252355, 0.9948284030000001, 0.992198407... \n", + "5 [0.9990416169, 0.9954633117, 0.9914832711, 0.9... \n", + "6 [0.9990183115000001, 0.9958524704, 0.994458437... \n", + "7 [0.9987213016, 0.9967773557, 0.995477736000000... \n", + "8 [0.9987788796, 0.9963755012000001, 0.994629442... \n", + "9 [0.9989332557, 0.9951727986000001, 0.992852389... \n", + "10 [0.9988330007, 0.9963194132000001, 0.993861198... \n", + "11 [0.9984756112000001, 0.9955522418, 0.995419502... \n", + "12 [0.9988971949000001, 0.9956197143000001, 0.995... \n", + "13 [0.998880446, 0.9963383675, 0.9951064587, 0.99... \n", + "14 [0.9992144108000001, 0.996243, 0.9955233335, 0... \n", + "15 [0.9992043376, 0.9949402809000001, 0.993791997... \n", + "16 [0.9990880489, 0.9960907102000001, 0.994690418... \n", + "17 [0.9988904595, 0.99519521, 0.995152235, 0.9935... \n", + "18 [0.9989151955000001, 0.9953479171, 0.994166851... \n", + "19 [0.9991793036000001, 0.9965285659, 0.995107770... \n", + "20 [0.9981474876, 0.9964281917000001, 0.994714677... \n", + "21 [0.9978869557000001, 0.9953299761000001, 0.994... \n", + "22 [0.9990385771, 0.9947265983, 0.9944388866, 0.9... \n", + "23 [0.9989394546, 0.9953050613000001, 0.994326889... \n", + "24 [0.9987252355, 0.9948284030000001, 0.992198407... \n", + "25 [0.9990416169, 0.9954633117, 0.9914832711, 0.9... \n", + "26 [0.9990183115000001, 0.9958524704, 0.994458437... \n", + "27 [0.9987213016, 0.9967773557, 0.995477736000000... \n", + "28 [0.9987788796, 0.9963755012000001, 0.994629442... \n", + "29 [0.9989332557, 0.9951727986000001, 0.992852389... \n", + "30 [0.9988330007, 0.9963194132000001, 0.993861198... \n", + "31 [0.9984756112000001, 0.9955522418, 0.995419502... \n", + "32 [0.9988971949000001, 0.9956197143000001, 0.995... \n", + "33 [0.998880446, 0.9963383675, 0.9951064587, 0.99... \n", + "34 [0.9992144108000001, 0.996243, 0.9955233335, 0... \n", + "35 [0.9992043376, 0.9949402809000001, 0.993791997... \n", + "36 [0.9990880489, 0.9960907102000001, 0.994690418... \n", + "37 [0.9988904595, 0.99519521, 0.995152235, 0.9935... \n", + "38 [0.9989151955000001, 0.9953479171, 0.994166851... \n", + "39 [0.9991793036000001, 0.9965285659, 0.995107770... \n", + "@batch_size: 40\n", + "@identifier_column: None\n", + "@query_time: 161.77714671699505\n" ] } ], "source": [ "cursor.execute(\"\"\"SELECT id, FastRCNNObjectDetector(data) \n", " FROM bddtest_1 \n", - " WHERE id < 20\"\"\")\n", + " WHERE id < 20;\"\"\")\n", "response = cursor.fetch_all()\n", "print(response)" ] }, + { + "cell_type": "markdown", + "id": "55551f98-6e3f-4088-a02f-6852b56dfc63", + "metadata": {}, + "source": [ + "- Run a test query on the loaded metadata (CSV)" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "2990038a-00ec-4d36-aae2-82a789f2389a", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@status: 0\n", + "@batch: Batch Object:\n", + "@dataframe: bddtestmeta.id bddtestmeta.frame_id bddtestmeta.video_id\n", + "0 9881 583 4\n", + "1 9882 583 4\n", + "2 9883 589 4\n", + "3 9884 589 4\n", + "4 9885 589 4\n", + "... ... ... ...\n", + "17000 9876 583 4\n", + "17001 9877 583 4\n", + "17002 9878 583 4\n", + "17003 9879 583 4\n", + "17004 9880 583 4\n", + "\n", + "[17005 rows x 3 columns]\n", + "@batch_size: 17005\n", + "@identifier_column: None\n", + "@query_time: 2.022062427000492\n" + ] + } + ], + "source": [ + "cursor.execute(\"\"\"SELECT id, frame_id, video_id \n", + " FROM bddtestmeta;\"\"\")\n", + "response = cursor.fetch_all()\n", + "print(response)" + ] }, { "cell_type": "code", From d5efb8db3e2e4cb487c354f87d9309216954b1f5 Mon Sep 17 00:00:00 2001 From: Anirudh Prabakaran Date: Fri, 4 Nov 2022 10:54:11 -0400 Subject: [PATCH 4/4] Fix testcases --- eva/executor/load_csv_executor.py | 12 +----- test/executor/test_load_executor.py | 62 ++++++++++++++++++++++++++- test/executor/test_upload_executor.py | 11 ++++- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/eva/executor/load_csv_executor.py b/eva/executor/load_csv_executor.py index de7ae97cf..f541137ee 100644 --- a/eva/executor/load_csv_executor.py +++ b/eva/executor/load_csv_executor.py @@ -73,14 +73,6 @@ def exec(self): num_loaded_frames += len(batch) # yield result - df_yield_result = Batch( - pd.DataFrame( - { - "CSV": csv_file_path, - "Number of loaded frames": num_loaded_frames, - }, - index=[0], - ) + yield Batch( + pd.DataFrame([f"CSV successfully loaded at location: {csv_file_path}"]) ) - - yield df_yield_result diff --git a/test/executor/test_load_executor.py b/test/executor/test_load_executor.py index 8f8e4ab79..fbeb0fb5a 100644 --- a/test/executor/test_load_executor.py +++ b/test/executor/test_load_executor.py @@ -125,6 +125,32 @@ def test_should_fail_to_find_file(self, write_mock, create_mock): with self.assertRaises(RuntimeError): next(load_executor.exec()) + @patch("eva.executor.load_video_executor.StorageEngine.write") + def test_should_fail_to_find_csv_file(self, write_mock, create_mock): + file_path = "csv" + table_metainfo = "info" + batch_mem_size = 3000 + file_options = {} + file_options["file_format"] = FileFormatType.CSV + column_list = None + plan = type( + "LoadDataPlan", + (), + { + "table_metainfo": table_metainfo, + "file_path": file_path, + "batch_mem_size": batch_mem_size, + "column_list": column_list, + "file_options": file_options, + }, + ) + + load_executor = LoadDataExecutor(plan) + with patch.object(Path, "exists") as mock_exists: + mock_exists.side_effect = [False, False] + with self.assertRaises(RuntimeError): + next(load_executor.exec()) + @patch("eva.storage.storage_engine.StorageEngine.write") def test_should_call_csv_reader_and_storage_engine(self, write_mock): batch_frames = [list(range(5))] * 2 @@ -132,6 +158,9 @@ def test_should_call_csv_reader_and_storage_engine(self, write_mock): # creates a dummy.csv create_sample_csv() + self.upload_path = Path( + ConfigurationManager().get_value("storage", "upload_dir") + ) file_path = "dummy.csv" table_metainfo = "info" batch_mem_size = 3000 @@ -164,8 +193,39 @@ def test_should_call_csv_reader_and_storage_engine(self, write_mock): # Note: We call exec() from the child classes. self.assertEqual( batch, - Batch(pd.DataFrame([{"CSV": file_path, "Number of loaded frames": 20}])), + Batch( + pd.DataFrame( + [ + f"CSV successfully loaded at location: {self.upload_path / file_path}" + ] + ) + ), ) # remove the dummy.csv file_remove("dummy.csv") + + def test_should_fail_to_find_csv_file(self): + file_path = "dummy" + table_metainfo = "info" + batch_mem_size = 3000 + file_options = {} + file_options["file_format"] = FileFormatType.CSV + column_list = None + plan = type( + "LoadDataPlan", + (), + { + "table_metainfo": table_metainfo, + "file_path": file_path, + "batch_mem_size": batch_mem_size, + "column_list": column_list, + "file_options": file_options, + }, + ) + + load_executor = LoadDataExecutor(plan) + with patch.object(Path, "exists") as mock_exists: + mock_exists.side_effect = [False, False] + with self.assertRaises(RuntimeError): + next(load_executor.exec()) diff --git a/test/executor/test_upload_executor.py b/test/executor/test_upload_executor.py index 136ca6d78..acaa511b3 100644 --- a/test/executor/test_upload_executor.py +++ b/test/executor/test_upload_executor.py @@ -141,6 +141,9 @@ def test_should_call_csv_reader_and_storage_engine(self, write_mock): # creates a dummy.csv csv_blob = create_sample_csv_as_blob() + self.upload_path = Path( + ConfigurationManager().get_value("storage", "upload_dir") + ) file_path = "dummy.csv" table_metainfo = "info" batch_mem_size = 3000 @@ -173,7 +176,13 @@ def test_should_call_csv_reader_and_storage_engine(self, write_mock): # Note: We call exec() from the child classes. self.assertEqual( batch, - Batch(pd.DataFrame([{"CSV": file_path, "Number of loaded frames": 20}])), + Batch( + pd.DataFrame( + [ + f"CSV successfully loaded at location: {self.upload_path / file_path}" + ] + ) + ), ) # remove the dummy.csv