# Chatbot

In [1]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain

In [2]:
GPT4ALL_MODEL_PATH = "E:\\gpt4all-main\\chat\\gpt4all-converted.bin"

In [3]:
template = """
Question: {question}
Answer: Let's think step by step.
"""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [4]:
llm = LlamaCpp(model_path=GPT4ALL_MODEL_PATH)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [5]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [6]:
from langchain.embeddings import LlamaCppEmbeddings

llama_embeddings = LlamaCppEmbeddings(model_path=GPT4ALL_MODEL_PATH)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [10]:
from langchain.document_loaders import TextLoader

loader = TextLoader("C:\\Users\\PC\\Downloads\\NLP_A5\\data\\textual_FAST.txt")



urls = [
    'https://www.nu.edu.pk/',
    'https://www.nu.edu.pk/Degree-Programs',
    'https://www.nu.edu.pk/Admissions/Schedule', 'https://www.nu.edu.pk/Admissions/HowToApply', 'https://www.nu.edu.pk/Admissions/EligibilityCriteria', 'https://www.nu.edu.pk/Admissions/Scholarship', 'https://www.nu.edu.pk/Admissions/TestPattern', 'https://www.nu.edu.pk/Admissions/FeeStructure', 'http://isb.nu.edu.pk/', 'http://isb.nu.edu.pk/Faculty/allfaculty#cs', 'http://isb.nu.edu.pk/Faculty/allfaculty#ms', 'http://isb.nu.edu.pk/Faculty/allfaculty#ee', 'http://isb.nu.edu.pk/Faculty/allfaculty#sh', 'http://isb.nu.edu.pk/Student/Grading', 'https://nu.edu.pk/Student/Calender', 'https://nu.edu.pk/Student/Conduct' , 'https://nu.edu.pk/Student/HECEquivalence',
'https://nu.edu.pk/Student/FinancialRules','https://nu.edu.pk/University/History', 'https://nu.edu.pk/University/Foundation' , 'https://nu.edu.pk/University/Chancellor', 'https://nu.edu.pk/vision-and-mission' , 'https://nu.edu.pk/University/Trustees' , 'https://nu.edu.pk/University/Governers' , 'https://nu.edu.pk/University/Officers' , 'https://nu.edu.pk/University/Headquarters', 'https://nu.edu.pk/University/PhDFaculty' , 'https://nu.edu.pk/University/HECSupervisors' , 'https://nu.edu.pk/home/ContactUs'
]
from langchain.document_loaders import WebBaseLoader
import nest_asyncio
nest_asyncio.apply()
loader = WebBaseLoader(urls)
scrape_data = loader.aload()

In [11]:
from langchain.indexes import VectorstoreIndexCreator

In [12]:
# NOTE: must specify a persist_directory oncreation to persist the collection

index = VectorstoreIndexCreator(embedding=llama_embeddings,
                                vectorstore_kwargs={"persist_directory": "db"}
                               ).from_loaders([loader])

Using embedded DuckDB with persistence: data will be stored in: db


In [13]:
query = "What is FAST NUCES?"

In [14]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Again, we should persist the db and figure out how to reuse it
docsearch = Chroma.from_documents(texts, llama_embeddings)

Using embedded DuckDB without persistence: data will be transient


In [15]:
# Just getting a single result document from the knowledge lookup is fine...
MIN_DOCS = 1

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",
                                 retriever=docsearch.as_retriever(search_kwargs={"k": MIN_DOCS}))

In [18]:
print(query)

qa.run(query)

What is FAST NUCES?
 Founded as a Federally Chartered University in July 2000, the National University of Computer and Emerging Sciences is a premiere University of Pakistan, renowned for quality and impact of its students in the development of local software and other industries. The university has five modern campuses at Karachi, Lahore, Islamabad, Peshawar and Chiniot-Faisalabad. These campuses provide world class educational environment and recreational facilities to about over 11,000 students, around one quarter are female and over 500 skilled faculty members.


In [19]:
import json
class Chatbot:
    def __init__(self, api_key, index):
        self.index = index
        self.chat_history = []

    def generate_response(self, user_input):
        prompt = "\n".join([f"{message['role']}: {message['content']}" for message in self.chat_history[-5:]])
        prompt += f"\nUser: {user_input}"
        response = index.query(user_input)

        message = {"role": "assistant", "content": response.response}
        self.chat_history.append({"role": "user", "content": user_input})
        self.chat_history.append(message)
        return message
    
    def load_chat_history(self, filename):
        try:
            with open(filename, 'r') as f:
                self.chat_history = json.load(f)
        except FileNotFoundError:
            pass

    def save_chat_history(self, filename):
        with open(filename, 'w') as f:
            json.dump(self.chat_history, f)

In [None]:
bot = Chatbot(index=index)
bot.load_chat_history("chat_history.json")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        print("Bot: Goodbye!")
        bot.save_chat_history("chat_history.json")
        break
    response = bot.generate_response(user_input)
    print(f"RAZ: {response['content']}")