From a362b7348acbbc22e88660bbce69e1a279ea0e65 Mon Sep 17 00:00:00 2001 From: Andy Xu Date: Wed, 13 Sep 2023 23:20:56 -0700 Subject: [PATCH 1/3] Created using Colaboratory --- tutorials/16-homesale-forecasting.ipynb | 1669 +++++++++++++++++++++++ 1 file changed, 1669 insertions(+) create mode 100644 tutorials/16-homesale-forecasting.ipynb diff --git a/tutorials/16-homesale-forecasting.ipynb b/tutorials/16-homesale-forecasting.ipynb new file mode 100644 index 0000000000..5febe7dda2 --- /dev/null +++ b/tutorials/16-homesale-forecasting.ipynb @@ -0,0 +1,1669 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true, + "authorship_tag": "ABX9TyMRs4OhSD+xFDErR5d6n38S", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Setup\n", + "We first setup the backend postgres database and EvaDB to bring AI inside database systems." + ], + "metadata": { + "id": "GHToaA_NKiHY" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Start Postgres" + ], + "metadata": { + "id": "f1GutjuqECBh" + } + }, + { + "cell_type": "code", + "source": [ + "!apt install postgresql\n", + "!service postgresql start" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z7PodOEEEDsQ", + "outputId": "2e1039a1-415f-47c0-bae6-784b15d10714" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "The following additional packages will be installed:\n", + " libcommon-sense-perl libjson-perl libjson-xs-perl libtypes-serialiser-perl\n", + " logrotate netbase postgresql-14 postgresql-client-14\n", + " postgresql-client-common postgresql-common ssl-cert sysstat\n", + "Suggested packages:\n", + " bsd-mailx | mailx postgresql-doc postgresql-doc-14 isag\n", + "The following NEW packages will be installed:\n", + " libcommon-sense-perl libjson-perl libjson-xs-perl libtypes-serialiser-perl\n", + " logrotate netbase postgresql postgresql-14 postgresql-client-14\n", + " postgresql-client-common postgresql-common ssl-cert sysstat\n", + "0 upgraded, 13 newly installed, 0 to remove and 16 not upgraded.\n", + "Need to get 18.3 MB of archives.\n", + "After this operation, 51.5 MB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 logrotate amd64 3.19.0-1ubuntu1.1 [54.3 kB]\n", + "Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 netbase all 6.3 [12.9 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libcommon-sense-perl amd64 3.75-2build1 [21.1 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu jammy/main amd64 libjson-perl all 4.04000-1 [81.8 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libtypes-serialiser-perl all 1.01-1 [11.6 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu jammy/main amd64 libjson-xs-perl amd64 4.030-1build3 [87.2 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql-client-common all 238 [29.6 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 postgresql-client-14 amd64 14.9-0ubuntu0.22.04.1 [1,222 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu jammy/main amd64 ssl-cert all 1.1.2 [17.4 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql-common all 238 [169 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 postgresql-14 amd64 14.9-0ubuntu0.22.04.1 [16.1 MB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql all 14+238 [3,288 B]\n", + "Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 sysstat amd64 12.5.2-2ubuntu0.2 [487 kB]\n", + "Fetched 18.3 MB in 2s (12.2 MB/s)\n", + "Preconfiguring packages ...\n", + "Selecting previously unselected package logrotate.\n", + "(Reading database ... 120901 files and directories currently installed.)\n", + "Preparing to unpack .../00-logrotate_3.19.0-1ubuntu1.1_amd64.deb ...\n", + "Unpacking logrotate (3.19.0-1ubuntu1.1) ...\n", + "Selecting previously unselected package netbase.\n", + "Preparing to unpack .../01-netbase_6.3_all.deb ...\n", + "Unpacking netbase (6.3) ...\n", + "Selecting previously unselected package libcommon-sense-perl:amd64.\n", + "Preparing to unpack .../02-libcommon-sense-perl_3.75-2build1_amd64.deb ...\n", + "Unpacking libcommon-sense-perl:amd64 (3.75-2build1) ...\n", + "Selecting previously unselected package libjson-perl.\n", + "Preparing to unpack .../03-libjson-perl_4.04000-1_all.deb ...\n", + "Unpacking libjson-perl (4.04000-1) ...\n", + "Selecting previously unselected package libtypes-serialiser-perl.\n", + "Preparing to unpack .../04-libtypes-serialiser-perl_1.01-1_all.deb ...\n", + "Unpacking libtypes-serialiser-perl (1.01-1) ...\n", + "Selecting previously unselected package libjson-xs-perl.\n", + "Preparing to unpack .../05-libjson-xs-perl_4.030-1build3_amd64.deb ...\n", + "Unpacking libjson-xs-perl (4.030-1build3) ...\n", + "Selecting previously unselected package postgresql-client-common.\n", + "Preparing to unpack .../06-postgresql-client-common_238_all.deb ...\n", + "Unpacking postgresql-client-common (238) ...\n", + "Selecting previously unselected package postgresql-client-14.\n", + "Preparing to unpack .../07-postgresql-client-14_14.9-0ubuntu0.22.04.1_amd64.deb ...\n", + "Unpacking postgresql-client-14 (14.9-0ubuntu0.22.04.1) ...\n", + "Selecting previously unselected package ssl-cert.\n", + "Preparing to unpack .../08-ssl-cert_1.1.2_all.deb ...\n", + "Unpacking ssl-cert (1.1.2) ...\n", + "Selecting previously unselected package postgresql-common.\n", + "Preparing to unpack .../09-postgresql-common_238_all.deb ...\n", + "Adding 'diversion of /usr/bin/pg_config to /usr/bin/pg_config.libpq-dev by postgresql-common'\n", + "Unpacking postgresql-common (238) ...\n", + "Selecting previously unselected package postgresql-14.\n", + "Preparing to unpack .../10-postgresql-14_14.9-0ubuntu0.22.04.1_amd64.deb ...\n", + "Unpacking postgresql-14 (14.9-0ubuntu0.22.04.1) ...\n", + "Selecting previously unselected package postgresql.\n", + "Preparing to unpack .../11-postgresql_14+238_all.deb ...\n", + "Unpacking postgresql (14+238) ...\n", + "Selecting previously unselected package sysstat.\n", + "Preparing to unpack .../12-sysstat_12.5.2-2ubuntu0.2_amd64.deb ...\n", + "Unpacking sysstat (12.5.2-2ubuntu0.2) ...\n", + "Setting up logrotate (3.19.0-1ubuntu1.1) ...\n", + "Created symlink /etc/systemd/system/timers.target.wants/logrotate.timer → /lib/systemd/system/logrotate.timer.\n", + "Setting up libcommon-sense-perl:amd64 (3.75-2build1) ...\n", + "Setting up ssl-cert (1.1.2) ...\n", + "Setting up libtypes-serialiser-perl (1.01-1) ...\n", + "Setting up libjson-perl (4.04000-1) ...\n", + "Setting up netbase (6.3) ...\n", + "Setting up sysstat (12.5.2-2ubuntu0.2) ...\n", + "\n", + "Creating config file /etc/default/sysstat with new version\n", + "update-alternatives: using /usr/bin/sar.sysstat to provide /usr/bin/sar (sar) in auto mode\n", + "Created symlink /etc/systemd/system/sysstat.service.wants/sysstat-collect.timer → /lib/systemd/system/sysstat-collect.timer.\n", + "Created symlink /etc/systemd/system/sysstat.service.wants/sysstat-summary.timer → /lib/systemd/system/sysstat-summary.timer.\n", + "Created symlink /etc/systemd/system/multi-user.target.wants/sysstat.service → /lib/systemd/system/sysstat.service.\n", + "Setting up postgresql-client-common (238) ...\n", + "Setting up libjson-xs-perl (4.030-1build3) ...\n", + "Setting up postgresql-client-14 (14.9-0ubuntu0.22.04.1) ...\n", + "update-alternatives: using /usr/share/postgresql/14/man/man1/psql.1.gz to provide /usr/share/man/man1/psql.1.gz (psql.1.gz) in auto mode\n", + "Setting up postgresql-common (238) ...\n", + "Adding user postgres to group ssl-cert\n", + "\n", + "Creating config file /etc/postgresql-common/createcluster.conf with new version\n", + "Building PostgreSQL dictionaries from installed myspell/hunspell packages...\n", + "Removing obsolete dictionary files:\n", + "Created symlink /etc/systemd/system/multi-user.target.wants/postgresql.service → /lib/systemd/system/postgresql.service.\n", + "Setting up postgresql-14 (14.9-0ubuntu0.22.04.1) ...\n", + "Creating new PostgreSQL cluster 14/main ...\n", + "/usr/lib/postgresql/14/bin/initdb -D /var/lib/postgresql/14/main --auth-local peer --auth-host scram-sha-256 --no-instructions\n", + "The files belonging to this database system will be owned by user \"postgres\".\n", + "This user must also own the server process.\n", + "\n", + "The database cluster will be initialized with locale \"en_US.UTF-8\".\n", + "The default database encoding has accordingly been set to \"UTF8\".\n", + "The default text search configuration will be set to \"english\".\n", + "\n", + "Data page checksums are disabled.\n", + "\n", + "fixing permissions on existing directory /var/lib/postgresql/14/main ... ok\n", + "creating subdirectories ... ok\n", + "selecting dynamic shared memory implementation ... posix\n", + "selecting default max_connections ... 100\n", + "selecting default shared_buffers ... 128MB\n", + "selecting default time zone ... Etc/UTC\n", + "creating configuration files ... ok\n", + "running bootstrap script ... ok\n", + "performing post-bootstrap initialization ... ok\n", + "syncing data to disk ... ok\n", + "update-alternatives: using /usr/share/postgresql/14/man/man1/postmaster.1.gz to provide /usr/share/man/man1/postmaster.1.gz (postmaster.1.gz) in auto mode\n", + "invoke-rc.d: could not determine current runlevel\n", + "invoke-rc.d: policy-rc.d denied execution of start.\n", + "Setting up postgresql (14+238) ...\n", + "Processing triggers for man-db (2.10.2-1) ...\n", + " * Starting PostgreSQL 14 database server\n", + " ...done.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Create User and Database" + ], + "metadata": { + "id": "hfBwvBTfEWIR" + } + }, + { + "cell_type": "code", + "source": [ + "!sudo -u postgres psql -c \"CREATE USER eva WITH SUPERUSER PASSWORD 'password'\"\n", + "!sudo -u postgres psql -c \"CREATE DATABASE evadb\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UrlfWZOkEa4V", + "outputId": "dcbf46f5-489d-41bd-ad76-ce9254d3692c" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "CREATE ROLE\n", + "CREATE DATABASE\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###Prettify Output" + ], + "metadata": { + "id": "H2m43vfZE8x6" + } + }, + { + "cell_type": "code", + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "from IPython.core.display import display, HTML\n", + "def pretty_print(df):\n", + " return display(HTML( df.to_html().replace(\"\\\\n\",\"
\")))" + ], + "metadata": { + "id": "EZf65ZkcFIX7" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Install EvaDB\n", + "\n", + "We install EvaDB with extra postgres and forecasting dependency." + ], + "metadata": { + "id": "CpYS09iMEhaT" + } + }, + { + "cell_type": "code", + "source": [ + "%pip install --quiet \"evadb[postgres,forecasting] @ git+https://github.com/georgia-tech-db/evadb.git@a40c72ed6cb18993e2ae5bda28c7195f4de4f109\"\n", + "\n", + "import evadb\n", + "cursor = evadb.connect().cursor()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NoAykveeElqm", + "outputId": "456ac4a4-0b88-4c86-b183-4fbd2f5a2b0c" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[33mWARNING: Skipping evadb as it is not installed.\u001b[0m\u001b[33m\n", + "\u001b[0m Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.9/110.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m275.0/275.0 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.0/57.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.2/169.2 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.6/154.6 kB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m303.2/303.2 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.2/144.2 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.3/135.3 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for evadb (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for fugue-sql-antlr (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Downloading: \"http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/mnist-b07bb66b.pth\" to /root/.cache/torch/hub/checkpoints/mnist-b07bb66b.pth\n", + "100%|██████████| 1.03M/1.03M [00:01<00:00, 1.05MB/s]\n", + "Downloading: \"https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Prepare Data\n", + "We then prepara the dataset used in this time serise forecasting use case." + ], + "metadata": { + "id": "mUN-rlV8LHxN" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Create Data Source in EvaDB\n", + "We use data source to connect EvaDB directly to underlying database systems like Postgres." + ], + "metadata": { + "id": "nKwAY9eMFoc8" + } + }, + { + "cell_type": "code", + "source": [ + "params = {\n", + " \"user\": \"eva\",\n", + " \"password\": \"password\",\n", + " \"host\": \"localhost\",\n", + " \"port\": \"5432\",\n", + " \"database\": \"evadb\",\n", + "}\n", + "query = f\"CREATE DATABASE postgres_data WITH ENGINE = 'postgres', PARAMETERS = {params};\"\n", + "cursor.query(query).df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81 + }, + "id": "IsP6rLZ2Ftxo", + "outputId": "4f337261-1a04-4a1f-b8a3-f521536e4171" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "0 The database postgres_data has been successful..." + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
0The database postgres_data has been successful...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Load the Datasets\n", + "We load the [House Property Sales Time Series](https://www.kaggle.com/datasets/htagholdings/property-sales?resource=download) into our PostgreSQL database." + ], + "metadata": { + "id": "Lwls48NQJb6G" + } + }, + { + "cell_type": "code", + "source": [ + "!mkdir -p content\n", + "!wget -qnc -O /content/home_sales.csv https://www.dropbox.com/scl/fi/ww9qejjd3u9gc0m7dagiz/home_sales.csv?rlkey=3gy7yo3michjyumnhi8z24pys&dl=0" + ], + "metadata": { + "id": "msbHcP_xJpFV" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"\"\"\n", + " USE postgres_data {\n", + " CREATE TABLE IF NOT EXISTS home_sales (saledate VARCHAR(64), MA INT, type VARCHAR(64), bedrooms INT)\n", + " }\n", + "\"\"\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81 + }, + "id": "MhYfovbzNB-k", + "outputId": "ee87f25a-a6a6-4459-e808-8031ee356a27" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " status\n", + "0 success" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
status
0success
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"\"\"\n", + " USE postgres_data {\n", + " COPY home_sales(saledate, MA, type, bedrooms)\n", + " FROM '/content/home_sales.csv'\n", + " DELIMITER ',' CSV HEADER\n", + " }\n", + "\"\"\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81 + }, + "id": "OH8Fxn0SNYO3", + "outputId": "1473db0e-2e2c-462b-c46a-85a425012545" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " status\n", + "0 success" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
status
0success
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Preview the Data\n", + "The `home_sales` table contains 4 columns.\n", + "- saledate: the date that home was sold\n", + "- ma: moving average of the historical median price of the home\n", + "- type: whether the home is house or unit\n", + "- bedrooms: number of bedrooms" + ], + "metadata": { + "id": "vKXHBLtxNsbg" + } + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"SELECT * FROM postgres_data.home_sales LIMIT 3;\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "20_IEby7N1Pe", + "outputId": "979ac973-b746-4b2c-b28f-c9672f4f4b8f" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " home_sales.ma home_sales.bedrooms home_sales.saledate home_sales.type\n", + "0 441854 2 30/09/2007 house\n", + "1 441854 2 31/12/2007 house\n", + "2 441854 2 31/03/2008 house" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
home_sales.mahome_sales.bedroomshome_sales.saledatehome_sales.type
0441854230/09/2007house
1441854231/12/2007house
2441854231/03/2008house
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "TPpLZnydO2Ti" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Analysis Data with EvaDB\n", + "\n", + "We then use EvaDB to train a model to forecast the home price." + ], + "metadata": { + "id": "7DX411TFO-Lp" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Train the Forecast Model\n", + "We use the [statsforecast](https://github.com/Nixtla/statsforecast) engine to train a time serise forecast model for sale prices of home with two bedrooms." + ], + "metadata": { + "id": "hoLzadFlPOvh" + } + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"\"\"\n", + " CREATE FUNCTION IF NOT EXISTS HomeSaleForecast FROM\n", + " (\n", + " SELECT type, saledate, ma\n", + " FROM postgres_data.home_sales\n", + " WHERE bedrooms = 2\n", + " )\n", + " TYPE Forecasting\n", + " PREDICT 'ma'\n", + " TIME 'saledate'\n", + " ID 'type'\n", + " FREQUENCY 'M';\n", + "\"\"\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81 + }, + "id": "IFstPBI8UINv", + "outputId": "1499f130-ce52-44ec-d398-5dddc3f5d3b6" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "0 Function HomeSaleForecast successfully added t..." + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
0Function HomeSaleForecast successfully added t...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Use the Forecast Model\n", + "We then use the `HomeSaleForecast` model to predict the sale price for homes with two bedrooms for the next three month." + ], + "metadata": { + "id": "AhItlDfBUHOo" + } + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"SELECT HomeSaleForecast(3);\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "_7m-QQG5U3_C", + "outputId": "86383f52-ea94-47aa-847c-363bc65b5d56" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " homesaleforecast.type homesaleforecast.saledate homesaleforecast.ma\n", + "0 house 2019-10-31 510712.00000\n", + "1 house 2019-11-30 510712.00000\n", + "2 house 2019-12-31 510712.00000\n", + "3 unit 2019-10-31 423431.37500\n", + "4 unit 2019-11-30 422450.78125\n", + "5 unit 2019-12-31 421470.15625" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
homesaleforecast.typehomesaleforecast.saledatehomesaleforecast.ma
0house2019-10-31510712.00000
1house2019-11-30510712.00000
2house2019-12-31510712.00000
3unit2019-10-31423431.37500
4unit2019-11-30422450.78125
5unit2019-12-31421470.15625
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can use `ORDER BY` to find out the type of home and months that have lower market price." + ], + "metadata": { + "id": "cShJsgoRVmXU" + } + }, + { + "cell_type": "code", + "source": [ + "cursor.query(\"SELECT HomeSaleForecast(3) ORDER BY ma;\").df()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 355 + }, + "id": "abmrZiOzVO5S", + "outputId": "14324529-618b-4158-9930-cf47b01e2196" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "09-14-2023 06:17:03 ERROR [statement_binder_context:statement_binder_context.py:raise_error:0147] Found invalid column ma\n", + "ERROR:evadb.utils.logging_manager:Found invalid column ma\n" + ] + }, + { + "output_type": "error", + "ename": "BinderError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBinderError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"SELECT HomeSaleForecast(3).ma ORDER BY MA;\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/interfaces/relational/relation.py\u001b[0m in \u001b[0;36mdf\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m \"\"\"\n\u001b[0;32m--> 110\u001b[0;31m \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 111\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"relation execute failed\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/interfaces/relational/relation.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0mBatch\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mevadb\u001b[0m \u001b[0mBatch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \"\"\"\n\u001b[0;32m--> 120\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexecute_statement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_evadb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_query_node\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 121\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframes\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/server/command_handler.py\u001b[0m in \u001b[0;36mexecute_statement\u001b[0;34m(evadb, stmt, do_not_raise_exceptions, do_not_print_exceptions, **kwargs)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0mplan_generator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"plan_generator\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPlanGenerator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevadb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstmt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSKIP_BINDER_AND_OPTIMIZER_STATEMENTS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0mStatementBinder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mStatementBinderContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevadb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatalog\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstmt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0mlogical_plan\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStatementToPlanConverter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstmt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mphysical_plan\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplan_generator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogical_plan\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/functools.py\u001b[0m in \u001b[0;36m_method\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 924\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 926\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 927\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0m_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__isabstractmethod__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__isabstractmethod__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/binder/statement_binder.py\u001b[0m in \u001b[0;36m_bind_select_statement\u001b[0;34m(self, node)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0morderby_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mexpr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0morderby_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 208\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 209\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munion_link\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mcurrent_context\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_binder_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/functools.py\u001b[0m in \u001b[0;36m_method\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 924\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 926\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 927\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0m_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__isabstractmethod__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__isabstractmethod__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/binder/statement_binder.py\u001b[0m in \u001b[0;36m_bind_tuple_expr\u001b[0;34m(self, node)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mTupleValueExpression\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_bind_tuple_expr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTupleValueExpression\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m table_alias, col_obj = self._binder_context.get_binded_column(\n\u001b[0m\u001b[1;32m 297\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtable_alias\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 298\u001b[0m )\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/binder/statement_binder_context.py\u001b[0m in \u001b[0;36mget_binded_column\u001b[0;34m(self, col_name, alias)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0malias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol_obj\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 161\u001b[0;31m \u001b[0mraise_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 162\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_table_alias_map\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mColumnCatalogEntry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/evadb/binder/statement_binder_context.py\u001b[0m in \u001b[0;36mraise_error\u001b[0;34m()\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0merr_msg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"Found invalid column {col_name}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mBinderError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0malias\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mBinderError\u001b[0m: Found invalid column ma" + ] + } + ] + } + ] +} \ No newline at end of file From fd3432a0c3f2c24a126a5675b952f3782af86f26 Mon Sep 17 00:00:00 2001 From: Andy Xu Date: Wed, 13 Sep 2023 23:26:19 -0700 Subject: [PATCH 2/3] Update the title --- tutorials/16-homesale-forecasting.ipynb | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tutorials/16-homesale-forecasting.ipynb b/tutorials/16-homesale-forecasting.ipynb index 5febe7dda2..a6fa3c3044 100644 --- a/tutorials/16-homesale-forecasting.ipynb +++ b/tutorials/16-homesale-forecasting.ipynb @@ -5,8 +5,10 @@ "colab": { "provenance": [], "toc_visible": true, - "authorship_tag": "ABX9TyMRs4OhSD+xFDErR5d6n38S", - "include_colab_link": true + "collapsed_sections": [ + "GHToaA_NKiHY" + ], + "authorship_tag": "ABX9TyPDYDZy7r2CJmf1GcxtVkx9" }, "kernelspec": { "name": "python3", @@ -19,13 +21,24 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, "source": [ - "\"Open" - ] + "# Home Sale Forecasting\n", + "In this tutorial, we demonstrate how to use the forecasting capablity of EvaDB to predict the home sale price.\n", + "\n", + " \n", + " \n", + " \n", + "
\n", + " Run on Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "


" + ], + "metadata": { + "id": "QjHMHmrUW0Vg" + } }, { "cell_type": "markdown", From 3b1b7b6fed2829510ac739215dbc7004d2370e17 Mon Sep 17 00:00:00 2001 From: Andy Xu Date: Thu, 14 Sep 2023 22:53:53 -0700 Subject: [PATCH 3/3] Having a basic version --- tutorials/16-homesale-forecasting.ipynb | 608 +++++++++++++++++------- 1 file changed, 433 insertions(+), 175 deletions(-) diff --git a/tutorials/16-homesale-forecasting.ipynb b/tutorials/16-homesale-forecasting.ipynb index a6fa3c3044..f2d81c9ff7 100644 --- a/tutorials/16-homesale-forecasting.ipynb +++ b/tutorials/16-homesale-forecasting.ipynb @@ -8,7 +8,7 @@ "collapsed_sections": [ "GHToaA_NKiHY" ], - "authorship_tag": "ABX9TyPDYDZy7r2CJmf1GcxtVkx9" + "authorship_tag": "ABX9TyPOmDfDbnc8CP+70g/FkjHR" }, "kernelspec": { "name": "python3", @@ -62,7 +62,7 @@ { "cell_type": "code", "source": [ - "!apt install postgresql\n", + "!apt -qq install postgresql\n", "!service postgresql start" ], "metadata": { @@ -70,7 +70,7 @@ "base_uri": "https://localhost:8080/" }, "id": "Z7PodOEEEDsQ", - "outputId": "2e1039a1-415f-47c0-bae6-784b15d10714" + "outputId": "0dcaa531-ae05-4c13-ab74-6dacdf6d8739" }, "execution_count": 1, "outputs": [ @@ -78,9 +78,6 @@ "output_type": "stream", "name": "stdout", "text": [ - "Reading package lists... Done\n", - "Building dependency tree... Done\n", - "Reading state information... Done\n", "The following additional packages will be installed:\n", " libcommon-sense-perl libjson-perl libjson-xs-perl libtypes-serialiser-perl\n", " logrotate netbase postgresql-14 postgresql-client-14\n", @@ -91,23 +88,9 @@ " libcommon-sense-perl libjson-perl libjson-xs-perl libtypes-serialiser-perl\n", " logrotate netbase postgresql postgresql-14 postgresql-client-14\n", " postgresql-client-common postgresql-common ssl-cert sysstat\n", - "0 upgraded, 13 newly installed, 0 to remove and 16 not upgraded.\n", + "0 upgraded, 13 newly installed, 0 to remove and 18 not upgraded.\n", "Need to get 18.3 MB of archives.\n", "After this operation, 51.5 MB of additional disk space will be used.\n", - "Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 logrotate amd64 3.19.0-1ubuntu1.1 [54.3 kB]\n", - "Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 netbase all 6.3 [12.9 kB]\n", - "Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libcommon-sense-perl amd64 3.75-2build1 [21.1 kB]\n", - "Get:4 http://archive.ubuntu.com/ubuntu jammy/main amd64 libjson-perl all 4.04000-1 [81.8 kB]\n", - "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libtypes-serialiser-perl all 1.01-1 [11.6 kB]\n", - "Get:6 http://archive.ubuntu.com/ubuntu jammy/main amd64 libjson-xs-perl amd64 4.030-1build3 [87.2 kB]\n", - "Get:7 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql-client-common all 238 [29.6 kB]\n", - "Get:8 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 postgresql-client-14 amd64 14.9-0ubuntu0.22.04.1 [1,222 kB]\n", - "Get:9 http://archive.ubuntu.com/ubuntu jammy/main amd64 ssl-cert all 1.1.2 [17.4 kB]\n", - "Get:10 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql-common all 238 [169 kB]\n", - "Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 postgresql-14 amd64 14.9-0ubuntu0.22.04.1 [16.1 MB]\n", - "Get:12 http://archive.ubuntu.com/ubuntu jammy/main amd64 postgresql all 14+238 [3,288 B]\n", - "Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 sysstat amd64 12.5.2-2ubuntu0.2 [487 kB]\n", - "Fetched 18.3 MB in 2s (12.2 MB/s)\n", "Preconfiguring packages ...\n", "Selecting previously unselected package logrotate.\n", "(Reading database ... 120901 files and directories currently installed.)\n", @@ -228,7 +211,7 @@ "base_uri": "https://localhost:8080/" }, "id": "UrlfWZOkEa4V", - "outputId": "dcbf46f5-489d-41bd-ad76-ce9254d3692c" + "outputId": "1fc62319-0d3f-4f2a-bcc4-e408587e50fb" }, "execution_count": 2, "outputs": [ @@ -291,7 +274,7 @@ "base_uri": "https://localhost:8080/" }, "id": "NoAykveeElqm", - "outputId": "456ac4a4-0b88-4c86-b183-4fbd2f5a2b0c" + "outputId": "de6547e5-670d-4fba-d081-30ffecc74849" }, "execution_count": 4, "outputs": [ @@ -299,25 +282,25 @@ "output_type": "stream", "name": "stdout", "text": [ - "\u001b[33mWARNING: Skipping evadb as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.9/110.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m275.0/275.0 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.0/57.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.2/169.2 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.6/154.6 kB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.9/110.9 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m275.0/275.0 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[33mWARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))': /simple/triad/\u001b[0m\u001b[33m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.0/57.0 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.2/169.2 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.6/154.6 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m303.2/303.2 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.2/144.2 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.3/135.3 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m303.2/303.2 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.2/144.2 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.3/135.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Building wheel for evadb (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for fugue-sql-antlr (setup.py) ... \u001b[?25l\u001b[?25hdone\n" ] @@ -327,7 +310,7 @@ "name": "stderr", "text": [ "Downloading: \"http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/mnist-b07bb66b.pth\" to /root/.cache/torch/hub/checkpoints/mnist-b07bb66b.pth\n", - "100%|██████████| 1.03M/1.03M [00:01<00:00, 1.05MB/s]\n", + "100%|██████████| 1.03M/1.03M [00:01<00:00, 898kB/s]\n", "Downloading: \"https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\n" ] } @@ -372,7 +355,7 @@ "height": 81 }, "id": "IsP6rLZ2Ftxo", - "outputId": "4f337261-1a04-4a1f-b8a3-f521536e4171" + "outputId": "eee82699-fd4f-4aa8-edac-a9f0e0575e98" }, "execution_count": 5, "outputs": [ @@ -385,7 +368,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
T.propertytypeT.datesoldT.price
0unit2019-01-06402112.96875
1unit2018-12-30409601.65625
2unit2018-12-23417229.78125
3house2019-07-21766572.93750
4house2019-07-28766572.93750
5house2019-08-04766572.93750
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 42 } ] }