In [3]:
from dotenv import load_dotenv, find_dotenv
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

load_dotenv(find_dotenv("../config/.env"))
#openai.api_key = os.getenv("OPENAI_API_KEY")

True

In [4]:
# Document Loading 

loader = TextLoader("../data/clean-sc1015.txt")
data = loader.load()

# Document Splitting

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(data)

# Storing into VectorDB (ChromaDB)
persist_directory = "chroma_db"
vectorstore = Chroma.from_documents(documents=all_splits,embedding=OpenAIEmbeddings(), persist_directory=persist_directory)
vectorstore.persist()

In [5]:
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=OpenAIEmbeddings())

In [6]:
# Build system prompt

template = """ 
You are AskNarelle, a FAQ (Frequently Asked Questions) chatbot that is designed to answer course-related queries by undergraduate students.
You are to use the provided pieces of context to answer any questions. 
If you do not know the answer, just reply with "Sorry, I'm not sure.", do not try to make up your own answer.
You are also to required to keep the answers as concise as possible.
Always end with "Thanks for using AskNarelle!" at the end of your answer.

{context}
Question: {question}
Helpful Answer:
"""

In [11]:
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa = RetrievalQA.from_chain_type(llm,
                                chain_type='stuff',
                                retriever=vectorstore.as_retriever(),
                                chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

In [12]:
# Test out bot with the 5/30 questions
outputs = {}

with open('../data/5questions.txt', 'r') as file:
    for line in file:
        question = line.strip()
        result = qa.run(question)
        outputs[question] = result

for key, val in outputs.items():
    print("Q:", key)
    print("A:", val, "\n")

Q: Can you explain the grading criteria and rubrics for assignments and exams?
A: The grading criteria for this course are as follows:
- Quizzes within LAMS: 5%
- Theory Quizzes: 40%
- Lab Exercises: 25%
- Mini-Project: 30%

The rubrics for assignments and exams will be provided by your instructor. Please refer to the course materials and instructions for specific details on how your work will be assessed. Thanks for using AskNarelle! 

Q: What flavor of ice-cream will I get if I were to score a A+ for the course?
A: Sorry, I'm not sure. Thanks for using AskNarelle! 

Q: Who are my professors for the course, and how do I contact them?
A: The professors for the course are Associate Prof Bo AN and Dr Sourav SEN GUPTA. You can contact them via email at boan@ntu.edu.sg and sg.sourav@ntu.edu.sg respectively. Thanks for using AskNarelle! 

Q: What are the course requirements and prerequisites?
A: The course requirements and prerequisites may vary depending on the specific course. It is best 

In [13]:
# Custom single prompt to test out bot

query = "What are the programming language(s) that I will be using in this course?"
result = qa.run(query)
print(result)

The programming language used in this course is Python. Thanks for using AskNarelle!
