In [None]:
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

from chromadb.config import Settings

import time
import os
import statistics
import re
os.environ["CUDA_VISIBLE_DEVICES"] = "5" 
os.environ["WORLD_SIZE"] = "1"
import torch

In [None]:
loader = CSVLoader("./8_entry_demo_ready.csv")
documents = loader.load()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# set cuda visible devices
#os.environ["CUDA_VISIBLE_DEVICES"] = "5"
embedding_function = HuggingFaceEmbeddings(
    model_name='all-MiniLM-L6-v2',
    model_kwargs={"device": device},
    cache_folder='/local/work/baheryilmaz/.cache'
)

PERSIST_DIRECTORY="./chroma"
query = "speeding with road bikes"
db = Chroma.from_documents(documents, embedding_function, persist_directory = PERSIST_DIRECTORY)
docs_vanilla = db.similarity_search(query)
print(docs_vanilla[0].page_content)


In [None]:
llm_name = "mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFacePipeline.from_model_id(
                model_id=llm_name,
                task="text-generation",
                model_kwargs={"temperature": 0, "max_length": 1024, "trust_remote_code": True, "cache_dir": "/local/work/baheryilmaz/.cache"},
                device=0
            )


In [None]:
CHROMA_SETTINGS = Settings(
        chroma_db_impl='duckdb+parquet',
        persist_directory=PERSIST_DIRECTORY,
        anonymized_telemetry=False
)


In [None]:
# Interactive questions and answers
while True:
    query = input("\nEnter a query: ")
    if query == "exit":
        break
    if query.strip() == "":
        continue
    # Get the answer from the chain
    start = time.time()
    #docs = retriever.get_relevant_documents(query)
    pattern = r"(id)(\W)*([0-9]+)"
    filter = {}
    for match in re.finditer(pattern, query):
        patient_id = match[3]
        filter = {'source': f'source_documents/patient_{patient_id}.txt'}
    docs_with_score = db.similarity_search_with_score(query, k = 2, filter=filter)
    #docs_with_score = docs_vanilla
    mean = statistics.mean([doc[1] for doc in docs_with_score])
    docs = [doc for doc in docs_with_score if doc[1]<=mean]
    doc_page_contens = [doc[0] for doc in docs_with_score if doc[1] <= mean]
    #memory = ConversationBufferMemory(memory_key="text")

    prompt_template = """Write a concise and short summary of the following text at the end. Keep it short and simple:
    
    {text}
    
    CONCISE SUMMARY:"""
    PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
    chain = load_summarize_chain(llm, chain_type="stuff")
    #answer = chain({"input_documents": doc_page_contens, "human_input": query}, return_only_outputs=True)
    #answer = chain.run({"input_documents": doc_page_contens, "human_input": query})
    answer = chain.run(doc_page_contens)
    #res = qa(query)
    #answer, docs = res['result'], [] if args.hide_source else res['source_documents']
    end = time.time()

    # Print the result
    print("\n\n> Question:")
    print(query)
    print(f"\n> Answer (took {round(end - start, 2)} s.):")
    print(answer)

    # Print the relevant sources used for the answer
    for document in docs:
        print("\n> " + document[0].metadata["source"] + ":")
        print(document[0].page_content)
        print("\n> Score: " + str(document[1]))
    #print(chain.memory.buffer)
