In [2]:
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
FILENAME = "report.pdf"
SYSTEM_TEMPLATE = """
You are a helpful medical assistant that that can answer questions about a patient based on their report: {docs}

Only use the factual information from the report to answer the question.

If you feel like you don't have enough information to answer the question, say "I don't know".
"""

HUMAN_TEMPLATE = """
Answer the following question: {question}
"""

MODEL = ChatOpenAI(
    model_name = "gpt-3.5-turbo", 
    temperature = 0.2,
    openai_api_key = os.environ["OPENAI_API_KEY"]
)

EMBEDDINGS = OpenAIEmbeddings()

In [4]:
def pdf_to_text(filename="proj4.pdf"):
    reader = PdfReader(filename)
    num_pages = len(reader.pages)

    temp = []
    for i in range(num_pages):
        temp.append(reader.pages[i].extract_text())
    context = ''.join(temp)
    return context


def get_embeddings_db(filename="proj4.pdf", chunk_size=2000, chunk_overlap=200):
    loader = PyPDFLoader(filename)
    pages = loader.load()

    # define splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = chunk_size, 
        chunk_overlap = chunk_overlap,
        length_function = len,
        is_separator_regex = False
    )
    docs = text_splitter.split_documents(pages)
    db = FAISS.from_documents(docs, EMBEDDINGS)
    return db

def get_system_message_prompt(system_template):
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    return system_message_prompt

def get_human_message_prompt(human_template):
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    return human_message_prompt

def get_chat_prompt(system_message_prompt, human_message_prompt):
    chat_prompt = ChatPromptTemplate.from_messages(
        [system_message_prompt, human_message_prompt]
    )
    return chat_prompt

def generate_response(model, query, db, k=4):
    # set up prompts
    system_message_prompt  = get_system_message_prompt(SYSTEM_TEMPLATE)
    human_message_prompt = get_human_message_prompt(HUMAN_TEMPLATE)
    chat_prompt = get_chat_prompt(system_message_prompt, human_message_prompt)

    #set up doc search
    docs = db.similarity_search(
        query, 
        k = k
    )
    docs_page_content = " ".join([d.page_content for d in docs])
    
    # set up output parser
    parser = StrOutputParser()

    # create chain
    chain = LLMChain(
        llm = model,
        prompt = chat_prompt
    )
    chain = chat_prompt| model | parser

    response = chain.invoke(
        {
            "question": query,
            "docs": docs_page_content
        }
    )
    return response

In [8]:
DB = get_embeddings_db(
    filename=FILENAME, 
    chunk_size=1000,
    chunk_overlap=20
)

user_query = "What is the patient's past medical history?"
response = generate_response(
    model = MODEL,
    query = user_query,
    db = DB,
    k = 4
)
response

'Based on the provided information, the patient, Mr. Tan Ah Kow, has a past medical history of dementia and stroke.'

In [6]:
DB = get_embeddings_db(
    filename=FILENAME, 
    chunk_size=2000,
    chunk_overlap=200
)

user_query = "What is the patient's past medical history?"
response = generate_response(
    model = MODEL,
    query = user_query,
    db = DB,
    k = 4
)
print(response)

The patient, Mr. Tan Ah Kow, has a history of hypertension and hyperlipidemia since 1990. He has also suffered several strokes in 2005, leading to subsequent issues such as cardiomyopathy, cardiac failure, and chronic renal disease. Additionally, he was diagnosed with dementia and has experienced a gradual deterioration in his cognitive ability and physical state over the years.
