In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_google_vertexai import ChatVertexAI
from langchain_google_vertexai import VertexAI
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough

In [11]:
# , project="som-rit-phi-starr-dev", location="us-west1"
llm = ChatVertexAI(model="gemini-1.5-pro-preview-0409")
embeddings = VertexAIEmbeddings("textembedding-gecko")

In [12]:
loader = TextLoader("stetson-help.txt")
docs = loader.load()
print(str(len(docs)) + " documents loaded")

1 documents loaded


In [13]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500
)
all_splits = text_splitter.split_documents(docs)

print("Total Chunks " + str(len(all_splits)))

Total Chunks 47


In [14]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [15]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, use your own knowledge to provide a helpful response. 
Keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

custom_rag_prompt = PromptTemplate.from_template(template)


In [16]:
def format_docs(inp_docs):
    str = "\n\n".join(doc.page_content for doc in inp_docs)
    # print("Context Information: " + str)
    return str

In [17]:
rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | custom_rag_prompt
        | llm
        | StrOutputParser()
)

In [18]:
print("Welcome to Stetson AI Chatbot.\n")

question = input("Enter your question: ")
result = rag_chain.invoke(question)
print("Answer: " + result)


Welcome to Stetson AI Chatbot.

Answer: ## Downloading PHI Data at Stanford Medicine: A Step-by-Step Guide

**Here's how to download PHI data for research at Stanford Medicine:**

1. **IRB/DPA Approval:** Ensure you have a valid IRB (Institutional Review Board) approval with an associated Data Protection Agreement (DPA). This is the foundational step for any research involving PHI.
2. **PHI Download Exemption:** Stanford Medicine has a policy prohibiting self-service download of PHI. You'll need an exemption to this policy. Fill out the survey at https://redcap.stanford.edu/webauth/surveys/?s=RKFR9PWAP9R3X493 to request this exemption. 
3. **Specify Minimum Necessary PHI:**  Once you have the necessary approvals, you'll need to specify the minimum PHI required for your research. This aligns with HIPAA's Minimum Necessary requirement, ensuring you only access the data essential for your study.
4. **Timeline:**  Expect the approval process for PHI download to take several weeks, assuming

In [19]:
vectorstore.delete_collection()