In [None]:
!pip install langchain-huggingface
!pip install huggingface_hub
!pip install transformers
!pip install accelerate
!pip install  bitsandbytes
!pip install langchain
pip install langchain-community
pip install chromadb

In [None]:
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from time import time
from langchain_huggingface import HuggingFaceEndpoint
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [None]:
loader = TextLoader("/FinalScraptedData.txt", encoding="utf8")
documents = loader.load()
documents

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)
splits

In [None]:
print(f"Number of splits: {len(splits)}")


In [None]:
model_name = "sentence-transformers/all-MiniLM-L12-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [None]:
embeddings

In [None]:
import chromadb

vectordb = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="chroma_db")

In [None]:
retriever = vectordb.as_retriever()
retriever

In [None]:
!huggingface-cli login

In [None]:
!huggingface-cli whoami

In [None]:
model_id = "meta-llama/Llama-3.1-8B"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token = "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",
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token = "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")

In [None]:
from langchain.prompts.prompt import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain import LLMChain, PromptTemplate

RESPONSE_TEMPLATE = """[INST]
<>Context:
    {context}

INSTRUCTION:
    Using the aforementioned contexts, answer the following question in short. DO NOT make things by your own. If any question is irrelevant, say that you DON'T know. But, feel free to answer some general questions.
    Do not answer any query related to any other institution other than NSU (North South University).

    Question: {question}[/INST]
    Helpful Answer:
    """


PROMPT = PromptTemplate.from_template(RESPONSE_TEMPLATE)
PROMPT = PromptTemplate(template=RESPONSE_TEMPLATE, input_variables=["context", "question"])
PROMPT

In [None]:
query_pipeline = transformers.pipeline(
        "text2text-generation",
        max_new_tokens = 200,
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto",)

llm = HuggingFacePipeline(pipeline=query_pipeline)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type='stuff',
    retriever=retriever,
    chain_type_kwargs={
        "verbose": False,
        "prompt": PROMPT,
    }
)

In [None]:
def test_rag(qa, query):
    result = qa.run(query)
    return result

In [None]:
query = "who are the faculty of NorthSouth University in ece department?"
result = test_rag(qa_chain, query)

In [None]:
result

In [None]:
if isinstance(result, str) and "Helpful Answer" in result:
    cleaned_answer = result.split("Helpful Answer:", 1)[-1].strip()
else:
    cleaned_answer = result

print("\nAnswer: ", cleaned_answer)

In [None]:
query = input("Question:")

while query.lower() != 'stop':
  result = test_rag(qa_chain, query)

  if isinstance(result, str) and "Helpful Answer" in result:
      cleaned_answer = result.split("Helpful Answer:", 1)[-1].strip()
  else:
      cleaned_answer = result

  print(f"Answer: {cleaned_answer}")

  query = input("Question:")