Skip to content

Commit

Permalink
Updates evadb apps (#1123)
Browse files Browse the repository at this point in the history
Update evadb apps
1. Change `udf` to `function`
2. Update queries to `cursor.query("...")`

---------

Co-authored-by: Jiashen Cao <caojiashen24@gmail.com>
  • Loading branch information
pchunduri6 and jiashenC committed Sep 18, 2023
1 parent 22feed0 commit 11fa571
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 162 deletions.
37 changes: 18 additions & 19 deletions apps/privategpt/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,37 @@ def load_data(source_folder_path: str):
cursor = evadb.connect(path).cursor()

# Drop function if it already exists
cursor.drop_function("embedding").execute()

cursor.query("DROP FUNCTION IF EXISTS embedding;").execute()
# Create function from Python file
# This function is a sentence feature extractor
embedding_udf = cursor.create_function(
udf_name="embedding",
if_not_exists=True,
impl_path=f"{path}/udfs/sentence_feature_extractor.py",
)
embedding_udf.execute()
text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS embedding
IMPL '{path}/functions/sentence_feature_extractor.py';
"""
print(text_feat_function_query)
cursor.query(text_feat_function_query).execute()

print("🧹 Dropping existing tables in EvaDB")
cursor.drop_table("data_table").execute()
cursor.drop_table("embedding_table").execute()
cursor.query("DROP TABLE IF EXISTS data_table;").execute()
cursor.query("DROP TABLE IF EXISTS embedding_table;").execute()

print("📄 Loading PDFs into EvaDB")
cursor.load(
file_regex=f"{source_folder_path}/*.pdf", format="PDF", table_name="data_table"
).execute()
text_load_query = f"""LOAD PDF '{source_folder_path}/*.pdf' INTO data_table;"""
print(text_load_query)
cursor.query(text_load_query).execute()

print("🤖 Extracting Feature Embeddings. This may take some time ...")
cursor.query(
"CREATE TABLE IF NOT EXISTS embedding_table AS SELECT embedding(data), data FROM data_table;"
).execute()

print("🔍 Building FAISS Index ...")
cursor.create_vector_index(
index_name="embedding_index",
table_name="embedding_table",
expr="features",
using="FAISS",
)
cursor.query(
"""
CREATE INDEX embedding_index
ON embedding_table (features)
USING FAISS;
"""
).execute()


def main():
Expand Down
20 changes: 12 additions & 8 deletions apps/privategpt/privateGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@


def query(question):
context_docs = (
cursor.table("embedding_table")
.order(f"Similarity(embedding('{question}'), features)")
.limit(3)
.select("data")
.df()
)
context_docs = cursor.query(
f"""
SELECT data
FROM embedding_table
ORDER BY Similarity(embedding('{question}'), features)
ASC LIMIT 3;
"""
).df()

# Merge all context information.
context = "; \n".join(context_docs["embedding_table.data"])

Expand All @@ -51,8 +53,10 @@ def query(question):
print("\n>> Context: ")
print(context)


print(
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before running this file."
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before"
" running this file."
)

## Take input of queries from user in a loop
Expand Down
28 changes: 15 additions & 13 deletions apps/story_qa/evadb_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from time import perf_counter

from gpt4all import GPT4All
from unidecode import unidecode
from util import download_story, read_text_line, try_execute
from util import download_story, read_text_line

import evadb


def ask_question(path):
def ask_question(story_path: str):
# Initialize early to exclude download time.
llm = GPT4All("ggml-gpt4all-j-v1.3-groovy")

path = os.path.dirname(evadb.__file__)
cursor = evadb.connect().cursor()

story_table = "TablePPText"
Expand All @@ -35,17 +37,17 @@ def ask_question(path):
t_i = 0

timestamps[t_i] = perf_counter()
print("Setup UDF")
print("Setup Function")

Text_feat_udf_query = """CREATE UDF IF NOT EXISTS SentenceFeatureExtractor
IMPL 'evadb/udfs/sentence_feature_extractor.py';
Text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
IMPL '{path}/functions/sentence_feature_extractor.py';
"""

cursor.query("DROP UDF IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_udf_query).execute()
cursor.query("DROP FUNCTION IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_function_query).execute()

try_execute(cursor, f"DROP TABLE IF EXISTS {story_table};")
try_execute(cursor, f"DROP TABLE IF EXISTS {story_feat_table};")
cursor.query(f"DROP TABLE IF EXISTS {story_table};").execute()
cursor.query(f"DROP TABLE IF EXISTS {story_feat_table};").execute()

t_i = t_i + 1
timestamps[t_i] = perf_counter()
Expand All @@ -56,7 +58,7 @@ def ask_question(path):
cursor.query(f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));").execute()

# Insert text chunk by chunk.
for i, text in enumerate(read_text_line(path)):
for i, text in enumerate(read_text_line(story_path)):
print("text: --" + text + "--")
ascii_text = unidecode(text)
cursor.query(
Expand Down Expand Up @@ -84,7 +86,7 @@ def ask_question(path):

# Create search index on extracted features.
cursor.query(
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING FAISS;"
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING" " FAISS;"
).execute()

t_i = t_i + 1
Expand Down Expand Up @@ -139,9 +141,9 @@ def ask_question(path):


def main():
path = download_story()
story_path = download_story()

ask_question(path)
ask_question(story_path)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion apps/youtube_channel_qa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ pip install -r requirements.txt
## Usage
Run script:
```bat
python multi_youtube_video_qa.py
python youtube_channel_qa.py
```

Loading

0 comments on commit 11fa571

Please sign in to comment.