In [1]:
import os
from environs import Env

env = Env()
env.read_env(".env") # read .env file, if it exists

api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_openai.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFium2Loader
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

In [3]:
loader = PyPDFium2Loader("docs/OT-Practice-Act-Sept-2023.pdf")
data = loader.load()

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
llm = OpenAI(openai_api_key=api_key)
docsearch= Chroma.from_documents(texts, embeddings)

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [5]:
qa.invoke("What is this document about?")

{'query': 'What is this document about?',
 'result': ' This document is the Texas Occupational Therapy Practice Act, which outlines the requirements and regulations for practicing occupational therapy in the state of Texas. It includes information on licensing, complaints and investigations, and rules and procedures for practicing occupational therapy.'}

In [6]:
template_string = """You are OTReviewerGPT, an AI assistant that writes random questions based on the Texas Occupational Therapy Practice Act document and generates multiple answers based on those questions."

{chat_history}
Human: {human_input}
Chatbot: """

In [7]:
prompt = PromptTemplate(
    input_variables=["chat_history", "human_input"], template=template_string
)

memory = ConversationBufferMemory(memory_key="chat_history")

In [8]:
llm = OpenAI(openai_api_key=api_key)
chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=False,
    memory=memory
)

In [9]:
response = chain.predict(human_input="Write 5 random questions based on the document.")

In [10]:
responses = response.split("\n")
responses

['1. What is the purpose of the Texas Occupational Therapy Practice Act?',
 '2. What are the requirements for obtaining an occupational therapy license in Texas?',
 '3. What are the responsibilities of an occupational therapist in Texas?',
 '4. How does the Texas Occupational Therapy Practice Act define occupational therapy?',
 '5. What is the process for reporting a violation of the Texas Occupational Therapy Practice Act?']

In [11]:
print("UNMARKED REVIEW QUESTIONS")
print("====================================\n\n")
for question in responses:
    prompt = "Based on the question " + question + ", generate 4 possible answers including one correct answer and three false answers."
    print("Question: " + question)
    print("Choices:")
    choices = chain.predict(human_input=prompt)
    print(choices)
    print("\n\n")

UNMARKED REVIEW QUESTIONS


Question: 1. What is the purpose of the Texas Occupational Therapy Practice Act?
Choices:
1. The purpose of the Texas Occupational Therapy Practice Act is to promote and protect the health, welfare, and safety of the public by regulating the practice of occupational therapy in the state.
2. The purpose of the Texas Occupational Therapy Practice Act is to ensure that occupational therapists are properly educated and trained to provide quality healthcare services to patients.
3. The purpose of the Texas Occupational Therapy Practice Act is to limit the number of occupational therapists practicing in the state in order to maintain a competitive job market.
4. The purpose of the Texas Occupational Therapy Practice Act is to provide tax breaks for occupational therapists working in underserved areas of the state.



Question: 2. What are the requirements for obtaining an occupational therapy license in Texas?
Choices:
1. To obtain an occupational therapy license 

In [12]:
print("MARKED REVIEW QUESTIONS")
print("====================================\n\n")
for question in responses:
    prompt = "Based on the question " + question + ", generate 4 possible answers including one correct answer and three false answers. Mark the correct answer as CORRECT."
    print("Question: " + question)
    print("Choices:")
    choices = chain.predict(human_input=prompt)
    print(choices)
    print("\n\n")

MARKED REVIEW QUESTIONS


Question: 1. What is the purpose of the Texas Occupational Therapy Practice Act?
Choices:
1. CORRECT: The purpose of the Texas Occupational Therapy Practice Act is to promote and protect the health, welfare, and safety of the public by regulating the practice of occupational therapy in the state.
2. The purpose of the Texas Occupational Therapy Practice Act is to ensure that occupational therapists are properly educated and trained to provide quality healthcare services to patients.
3. The purpose of the Texas Occupational Therapy Practice Act is to limit the number of occupational therapists practicing in the state in order to maintain a competitive job market.
4. The purpose of the Texas Occupational Therapy Practice Act is to provide tax breaks for occupational therapists working in underserved areas of the state.



Question: 2. What are the requirements for obtaining an occupational therapy license in Texas?
Choices:
1. CORRECT: To obtain an occupational 