# This notebook is to demonstrate an end-to-end usecase for EVA

## Launch EVA DB
Run the command `python eva.py` in the server where you want to deploy EVA

In [None]:
import cv2
import json
import nest_asyncio
import os
import sys
from tqdm import tqdm

import numpy as np
import pandas as pd

# eva lib
sys.path.insert(0,'..')
from eva.server.db_api import connect

## Establish connection with EVA

In [None]:
nest_asyncio.apply()
connection = connect(host = '0.0.0.0', port = 5432) # hostname, port of the server where EVADB is running
cursor = connection.cursor()

## Download a dataset of choice from cloud
- Available datasets can be found at the link [here](https://drive.google.com/drive/folders/1UpVOYje3llxE9EDrMNOrjtjzFAsnnjQt)
- Refer the [documentation](https://evagatech.readthedocs.io/en/latest/guide/packaging.html) to know how to package datasets or models. 

In [None]:
%%bash
sh ../script/data/download_dataset.sh bddtest
tree ~/.eva/data/datasets/bddtest

## Download a custom model of choice from cloud
- Available models can be found at the link [here](https://drive.google.com/drive/folders/1E5pEl01HmvoPJltVblcHZnIzMYxb9yxS)
- You need to do this only if your UDF requires a custom model for execution. For example, our object_detection UDF uses a model from torchvision so this step is not required

In [None]:
%%bash
sh ../script/data/download_model.sh vehicle_make_predictor
tree ~/.eva/data/models/vehicle_make_predictor

## Utility Methods

### Method to create the meta table
- The given sample query is specific to BDD. If your video dataset requires a different schema, change the query accordingly

In [None]:
def create_meta_table(dataset_name):
    """
    Creates a meta table for the given dataset
    
    Args:
        dataset_name (string) - name of dataset
        
    Returns:
        True/False depending on if table creation was successful or not.
    """
    
    # by default the table name is {dataset_name}meta
    table_name = dataset_name + "meta"
    
    create_table_query = f""" 

    CREATE TABLE IF NOT EXISTS {table_name} (
        id INTEGER UNIQUE,
        frame_id INTEGER,
        video_id INTEGER,
        dataset_name TEXT(30),
        label TEXT(30),
        bbox NDARRAY FLOAT32(4),
        object_id INTEGER
    );
    
    """
    
    cursor.execute(create_table_query)
    response = cursor.fetch_all()
    
    if response.status == '0':
        return True
    else:
        return False

### Method to load a video

In [None]:
def load_video(dataset_name, video_path, info_path):
    """
    Takes the path to 1 video and its corresponding meta file. 
    Iterates over each frame of the video and performs an INSERT operation on the table
    
    Args:
        dataset_name (string) - name of the dataset this video belongs to. There should be table existing with this name
        video_path (string) - path of the video to be loaded
        info_path (string) - path of the meta file that contains info about the video
    """
    
    table_name = dataset_name + "meta"
    #print(f"Loading video from: {video_path} info from: {info_path} into {table_name}")
    
    # load meta
    meta_name_with_ext = info_path.split('/')[-1]
    meta_name = meta_name_with_ext.split('.')[0]
    upload_meta_query = f'UPLOAD INFILE "{info_path}" PATH "{meta_name_with_ext}";'
    load_meta_query = f'LOAD DATA INFILE "{meta_name_with_ext}" INTO {table_name} WITH FORMAT CSV;'
    
    #print(f"upload_meta_query: {upload_meta_query}")
    #print(f"load_meta_query: {load_meta_query}")
    
    cursor.execute(upload_meta_query)
    meta_upload_response = cursor.fetch_all()
    #print(f"response from upload query: {meta_upload_response}")
    
    cursor.execute(load_meta_query)
    meta_load_response = cursor.fetch_all()
    #print(f"response from load query: {meta_load_response}")
    
    if int(meta_load_response.status) != 0:
        print(f"Loading meta failed! \nUpload response: {meta_upload_response} \nLoad response: {meta_load_response}")
        return False
    
    # load video
    video_name_with_ext = video_path.split('/')[-1]
    video_name = video_name_with_ext.split('.')[0]
    upload_video_query = f'UPLOAD INFILE "{video_path}" PATH "{video_name_with_ext}";'
    load_video_query = f'LOAD DATA INFILE "{video_name_with_ext}" INTO {video_name} WITH FORMAT VIDEO;'
    
    #print(f"upload_video_query: {upload_video_query}")
    #print(f"load_video_query: {load_video_query}")
    
    cursor.execute(upload_video_query)
    video_upload_response = cursor.fetch_all()
    #print(f"response from upload query: {video_upload_response}")
    
    cursor.execute(load_video_query)
    video_load_response = cursor.fetch_all()
    #print(f"response from load query: {video_load_response}")
    
    if int(video_load_response.status) != 0:
        print(f"Loading video failed! \nUpload response: {video_upload_response} \nLoad response: {video_load_response}")
        return False
    
    return True

### Method to load a dataset

In [None]:
def load_dataset(dataset_name):
    """
    This method iterates over each video and csv in the dataset and loads them into EVA
    A folder named dataset_name is expected to be inside datasets. This folder should contain 2 other folders named info and videos.
    
    Args:
        dataset_name (string) - name of the dataset
        
    Returns:
        True if all videos have been loaded succesfully
        False if there was any error
    """
        
    # a folder for eva will be created at ~/.eva
    home_folder = os.path.expanduser('~')
    eva_root_folder = os.path.join(home_folder, '.eva')

    # dataset_name must be your folder name
    dataset_path = os.path.join(eva_root_folder, 'data', 'datasets', dataset_name)
    #print(f"Loading {dataset_name} from the path {dataset_path}")

    # first create a meta table for this dataset if it doesnt exist
    if create_meta_table(dataset_name):
        print(f"Table created successfully for {dataset_name} (or already exists)")
    else:
        return False

    # Load the paths for all videos and info files
    videos_path = os.path.join(dataset_path, 'videos')
    info_path = os.path.join(dataset_path, 'info')
    video_files = [os.path.join(videos_path, f) for f in sorted(os.listdir(videos_path))]
    info_files = [os.path.join(info_path, f) for f in sorted(os.listdir(info_path))]
    
    # check that each video under videos has a corresponding meta file
    for video_file in video_files:
        video_name = video_file.split('/')[-1].split('.')[-2]
        expected_info_file = os.path.join(dataset_path, 'info', video_name + '.csv')
        if expected_info_file not in info_files:
            print(f"Each video under videos should have a corresponding info file under info.")
            return False
            
    # loop through each video and load them one by one
    dataset_len = len(video_files)
    for video_index in tqdm(range(dataset_len)):
        video_path = video_files[video_index]
        info_path = info_files[video_index]
        
        # load this video along with its meta info
        if not load_video(dataset_name, video_path, info_path):
            print(f"Loading video: {video_path} failed! ")
            return False
        
    return True

## Load the above dataset into EVA

- This step may be time consuming as we are loading each video one by one into EVA along with its corresponding meta info

In [None]:
dataset_name = "bddtest"

# load the dataset
if load_dataset(dataset_name):
    print(f"Dataset loaded successfully!")
else:
    print(f"One or more video loads failed! ")