### 1. Necessary libs

In [20]:
import os
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
from peft import PeftModel
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

### Model is trained on the Kaggle and saved in the huggingface and on my local comp

In [21]:

base_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
tokenizer = T5Tokenizer.from_pretrained("syurmen/T5-finetuned")

model = PeftModel.from_pretrained(base_model, "syurmen/T5-finetuned")

#make sure that model is in inference mode otherwise it is not working.

model = model.merge_and_unload()

model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
       

### 3. creating the Rag.

In [None]:
# I will move the PDfs to a different directory so that it is more neat and findable.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

book_directory = "./books"

if not os.path.exists(book_directory) or not os.listdir(book_directory):
    print(f"There is no directory for the rag files please create one to work conveniently.")
else:
    print("Loading the pdf documents...")
    loader = PyPDFDirectoryLoader(book_directory)
    documents = loader.load()
    if not documents:
        print("There is no pdfs in this directory.")
    else:
        print("Successfully loaded PDF files.")
    
        print("Splitting the document....")

        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
        doc_chunks = splitter.split_documents(documents)
    if doc_chunks:
        print("Initializing embedding...")

        embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
        embeddings = HuggingFaceEmbeddings(
            model_name = embedding_model,
            model_kwargs={'device': device}
        )
        print(f"Embedding model {embedding_model} has initialized.")

        print("Creating vector store from doc chunks...")

        store = FAISS.from_documents(doc_chunks, embeddings)
        print("Created vector store.")

        print("Setting up the retriever")
        retriever = store.as_retriever(search_kwargs={"k":3})
        

        pipe = pipeline(
            "text2text-generation",
            model=model,
            tokenizer=tokenizer,
            device=0 if device.type=="cuda" else -1,
            max_new_tokens = 200,
            do_sample=True,
            temperature=0.3,
            top_p=0.4,
            truncation=True,
            max_length=512
        )

        llm = HuggingFacePipeline(pipeline=pipe)
        print("Hugging face pipeline created for T5 model.")

        prompt_template_str = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer concise.

Context: {context}

Question: {question}

Helpful Answer:"""
        
        QA_PROMPT = PromptTemplate(
            template = prompt_template_str, input_variables=["context", "question"]
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True,
            chain_type_kwargs={"prompt":QA_PROMPT}
        )
        print("retrieval QA is created.")

        print("Query testing of RAG")
        query = "What are common coping mechanisms for anxiety mentioned in the documents?"
        print(f"Query: {query}")

        try:
            result = qa_chain.invoke({"query":query})
            print("Answer:")
            print(result["result"])
        except Exception as e:
            print(f"Error during RAG query: {e}")

Loading the pdf documents...
Successfully loaded PDF files.
Splitting the document....
Initializing embedding...
Embedding model sentence-transformers/all-MiniLM-L6-v2 has initialized.
Creating vector store from doc chunks...


Device set to use cuda:0


Created vector store.
Setting up the retriever
Hugging face pipeline created for T5 model.
retrieval QA is created.
Query testing of RAG
Query: What are common coping mechanisms for anxiety mentioned in the documents?
Answer:
It's great that you've taken the time to explore the symptoms of anxiety. It's important to remember that you are not a professional and just a help in understanding how anxiety affects you. Here are some suggestions: 1. Recognize and challenge negative thoughts: When you catch yourself thinking about something awful, try to challenge those thoughts. Ask yourself if there is any evidence supporting these thoughts or if they are based on assumptions. 2. Practice relaxation techniques: Deep breathing exercises, progressive muscle relaxation, or mindfulness meditation can help calm your mind and body during anxious moments. These techniques can help reduce physical tension and promote a sense of calmness. 3. Challenge negative thoughts: When you catch yourself thinki

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

input_text = "I have anxiety and How can I overcome it"
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs, 
        max_length=150, 
        num_beams=4, 
        early_stopping=True,
        do_sample=False
    )

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Direct Model Answer:")
print(result)

Direct Model Answer:
It can be incredibly challenging to deal with anxiety and how to overcome it. Here are some suggestions that may help: 1. Prioritize self-care: Make sure you are taking care of yourself physically, emotionally, and mentally. This includes getting enough sleep, eating well-balanced meals, and engaging in activities that bring you joy. 2. Practice relaxation techniques: Deep breathing exercises, progressive muscle relaxation, or mindfulness meditation can all help calm your mind and body. These techniques can help you focus on the present moment and reduce anxiety. 3. Practice self-care regularly: Make sure you are taking care of yourself physically, emotionally, and mentally. This includes getting enough sleep, eating well-balanced meals, and engaging in activities
