In [2]:
import os
from llmware.library import Library
from llmware.retrieval import Query
from llmware.prompts import Prompt
from llmware.setup import Setup

# Update this value with your own API Key, either by setting the env var or editing it directly here:
openai_api_key = os.environ["OPENAI_API_KEY"]
OPENAI_MODEL_NAME = "gpt-3.5-turbo" # "gpt-4"

In [3]:
# Create a library called "Agreements", and load it with llmware sample files
print (f"\n > Creating library 'Agreements'...")
library = Library().create_new_library("Agreements")
sample_files_path = Setup().load_sample_files()
library.add_files(os.path.join(sample_files_path,"Agreements"))


 > Creating library 'Agreements'...


{'docs_added': 15,
 'blocks_added': 1272,
 'images_added': 0,
 'pages_added': 204,
 'tables_added': 0,
 'rejected_files': ['.DS_Store']}

In [4]:
# Create vector embeddings for the library using the "industry-bert-contracts model and store them in Milvus
print (f"\n > Generating vector embeddings using embedding model: 'industry-bert-contracts'...")
library.install_new_embedding(embedding_model_name="industry-bert-contracts", vector_db="milvus")


 > Generating vector embeddings using embedding model: 'industry-bert-contracts'...


{'embeddings_created': 1272}

In [None]:
# Perform a semantic search against our library.  This will gather evidence to be used in the LLM prompt
print (f"\n > Performing a semantic query...")
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid a HuggingFace tokenizer warning
query_results = Query(library).semantic_query("Termination", result_count=20)


 > Performing a semantic query...




In [None]:
query_results

In [None]:
# Create a new prompter using the GPT-4 and add the query_results captured above
prompt_text = "Summarize the termination provisions"
print (f"\n > Prompting LLM with '{prompt_text}'")
prompter = Prompt().load_model(OPENAI_MODEL_NAME, api_key=openai_api_key)
sources = prompter.add_source_query_results(query_results)

In [None]:
# Prompt the LLM with the sources and a query string
responses = prompter.prompt_with_source(prompt_text, prompt_name="summarize_with_bullets")
for response in responses:
    print ("\n > LLM response\n" + response["llm_response"])

In [None]:
# Finally, generate a CSV report that can be shared
print (f"\n > Generating CSV report...")
report_data = prompter.send_to_human_for_review()
print ("File: " + report_data["report_fp"] + "\n")