<a href="https://colab.research.google.com/github/highplainscomputing/Mistral-Gradio-fine-tuning/blob/main/RAG_Demo_Restricted.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependencies

In [None]:
!git clone https://github.com/highplainscomputing/Mistral-Gradio-fine-tuning.git

In [None]:
!pip install -q -r /content/Mistral-Gradio-fine-tuning/requirements.txt
# !pip install -q nemoguardrails==0.8.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import Dependencies

In [None]:
import numpy as np
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from io import StringIO
from datasets import load_dataset, Dataset
from langchain.prompts import PromptTemplate, MessagesPlaceholder, ChatPromptTemplate
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import AIMessage, HumanMessage
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.schema.runnable import RunnablePassthrough
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
import os
import gradio as gr
from operator import itemgetter
import shutil
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from peft import PeftModel
import torch
from huggingface_hub import login
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from threading import Thread
from transformers.utils import logging
logging.get_logger("transformers").setLevel(logging.ERROR)
import langchain_core
# nemo Guardrails

from nemoguardrails import RailsConfig, LLMRails
from nemoguardrails.integrations.langchain.runnable_rails import RunnableRails

# Global Variables

In [None]:
LOAD_IN_4BIT = True
BNB_4BIT_USE_DOUBLE_QUANT = True
BNB_4BIT_QUANT_TYPE = "nf4"
ADD_EOS_TOKEN = True
PADDING_SIDE = "left"
ADD_BOS_TOKEN = True
HUGGINGFACE_API_KEY = "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"
FIX_RESPONSE = "I am a Mental health assistant. I don't answer non medical health related question."
RAG_COLANG_CONTENT = """
# define limits
define user ask doctor
    "Can you refer me to a doctor?"


define bot answer doctor
    "I'm a Medical health assistant"
    "Sorry I can't recommend a doctor!"

define flow doctor
    user ask doctor
    bot answer doctor
    bot offer help
#
define user ask general
    "what is Artifical Intelligence?"
    "what are Transformer models?"

define bot answer general
    "I'm a Medical health assistant, I can only help you out with medical health issues."
    "Sorry I can't answer that!"

define flow medicine
    user ask general
    bot answer general
    bot offer help"

# Basic guardrail against insults.
define flow
  user express insult
  bot express calmly willingness to help

# Basic guardrail against non medical question.
define flow
  user express non medical health question
  bot express apology willingness to help

# define RAG intents and flow
# Here we use the QA chain for anything else.
define flow
  user ...
  $contexts = execute retrieve(query=$last_user_message)
  $answer = execute qa_chain(query=$last_user_message, contexts=$contexts)
  bot $answer
"""

In [None]:
try:
  os.mkdir("data")
except:
  print("Already Exists.")

# Parameters

In [None]:
class RAG_Parameters:
  def __init__(self, files = None, question_col_name = "Context", answer_col_name = "Response", model_name_choice = ["mistralai/Mistral-7B-Instruct-v0.1", "meta-llama/Llama-2-7b-chat"],
               prompt_choices = ["mistral_prompt", "llama_prompt", "restrict"],
               embedding_model = "sentence-transformers/all-mpnet-base-v2", temperature = 0.7, max_new_tokens = 128, repetition_penalty = 1.15,
               top_k = 1, top_p = 0.75, k_context = 4, chunk_size = 512, chunk_overlap = 100):

    # Dataset Parameters
    self.files = files
    self.question_col_name = question_col_name
    self.answer_col_name = answer_col_name

    # Model Parameters
    self.model_name_choice = model_name_choice

    # Config Parameters
    self.prompt_choices = prompt_choices
    self.embedding_model = embedding_model
    self.temperature = temperature
    self.max_new_tokens = max_new_tokens
    self.repetition_penalty = repetition_penalty
    self.top_k = top_k
    self.top_p = top_p
    self.k_context = k_context
    self.chunk_size = chunk_size
    self.chunk_overlap = chunk_overlap

  # Functions to modify variables
  def set_files(self, files):
    self.files = files

  def set_model_name_choice(self, model_name_choice):
    self.model_name_choice = model_name_choice

  def set_prompt_choices(self, prompt_choices):
    self.prompt_choices = prompt_choices

  def set_embedding_model(self, embedding_model):
    self.embedding_model = embedding_model

  def set_temperature(self, temperature):
    self.temperature = temperature

  def set_max_new_tokens(self, max_new_tokens):
    self.max_new_tokens = max_new_tokens

  def set_repetition_penalty(self, repetition_penalty):
    self.repetition_penalty = repetition_penalty

  def set_top_k(self, top_k):
    self.top_k = top_k

  def set_top_p(self, top_p):
    self.top_p = top_p

  def set_k_context(self, k_context):
    self.k_context = k_context

  def set_chunk_size(self, chunk_size):
    self.chunk_size = chunk_size

  def set_chunk_overlap(self, chunk_overlap):
    self.chunk_overlap = chunk_overlap

  def success_msg_params(self):
    return "Parameters set successfully"



# Data

In [None]:
class Data(RAG_Parameters):
  def __init__(self):
    super().__init__()
    self.directory = "/content/drive/MyDrive/MentalHealthFAQ/some_sample_leaflets"

  def loading_dataset(self):
    if self.files != None:
      print(f"Files duplicates : {self.files}")
      self.files = list(set(self.files)) # remove duplicate path
      print(f"Files without duplicates : {self.files}")
      self.file_extension = self.files[0]
      # print(f"Files : {self.files}")
      # print("-------------------------------------------------------------------------------------------------------------")
      # print(f"Type of files {type(self.files)}")
      for file_path in self.files:
      # Extract the file name from the file path
        file_name = os.path.basename(file_path)
        # Construct the destination path
        destination_path = os.path.join(self.directory, file_name)
        # Move the file
        shutil.move(file_path, destination_path)
        print(f"Moved {file_name} to {self.directory}")
    else:
      print("-------------------------------------------------------------------------------------------------------------------")
      print("Please Upload Files in UI")

  def get_file_types(self):
    file_types = set()
    for filename in os.listdir(self.directory):
      if os.path.isfile(os.path.join(self.directory, filename)):
        file_extension = filename.split(".")[-1].lower()
        file_types.add(file_extension)

      total_size = os.path.getsize(self.directory)
      self.size_of_files = total_size / 1024  # Size in KB
      self.size_of_files = "KB".format(self.size_of_files)
      self.file_types_string = ", ".join(file_types)
      self.number_of_files = len(os.listdir(self.directory))

    return self.file_types_string, self.number_of_files, self.size_of_files

  def create_vector_store_index(self, file_extension):
    file_extension, directory, _ = self.get_file_types()
    print(f"File Extension : {file_extension}")
    print(f"Directory : {directory}")
    print(f"Size : {_}")
    if file_extension == "md":
      loader = DirectoryLoader(
          self.directory,
          glob="*.md",
          loader_cls=TextLoader,
          show_progress=True,
          )
      pages = loader.load()
      text_splitter = RecursiveCharacterTextSplitter(
      chunk_size = self.chunk_size,
      chunk_overlap = self.chunk_overlap,
      )
      self.documents = text_splitter.split_documents(pages)

    elif file_extension == "pdf":
      loader = DirectoryLoader(
      self.directory,
      glob="*.pdf",
      loader_cls=PyPDFLoader,
      show_progress=True,
      silent_errors=True,
      )
      pages = loader.load()
      text_splitter = RecursiveCharacterTextSplitter(
      chunk_size = self.chunk_size,
      chunk_overlap = self.chunk_overlap,
      )
      self.documents = text_splitter.split_documents(pages)


  def success_msg_data(self):
    return "Documents created successfully"

# Create Database

In [None]:
class Create_vector(Data):
  def __init__(self):
    super().__init__()

  def load_embedding_model(self):
    try:
      self.embeddings = HuggingFaceEmbeddings(model_name="model/embedding_model/")
    except:
      self.embeddings = HuggingFaceEmbeddings(model_name=self.embedding_model)


  def create_db(self):
    self.vectordb = FAISS.from_documents(self.documents, self.embeddings)
    # self.vectordb.save_local("vectorDB_for_mental_health")
    self.retriever_qa = self.vectordb.as_retriever(search_kwargs={"k": self.k_context})
    self.retriever = self.vectordb.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.1, "k": self.k_context}) # set here to that all parameters can be set in UI

  def load_db(self):
    self.loadDB = FAISS.load_local("/content/drive/MyDrive/vectorDB_for_mental_health", self.embeddings, allow_dangerous_deserialization=True)
    num_documents_before = len(self.loadDB.index_to_docstore_id)
    print(f"Total number of documents before adding docs: {num_documents_before}")
    print(type(self.loadDB))
    self.local_retriever_qa = self.loadDB.as_retriever(search_kwargs={"k": self.k_context})
    self.local_retriever = self.loadDB.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.1, "k": self.k_context}) # set here to that all parameters can be set in UI


  def reindex_db(self):
    # add Docs
    self.mergeDB = self.loadDB.add_documents(self.documents)
    num_documents_after = len(self.loadDB.index_to_docstore_id)
    print(f"Total number of documents after adding docs: {num_documents_after}")
    self.local_retriever_qa = self.loadDB.as_retriever(search_kwargs={"k": self.k_context})
    self.local_retriever = self.loadDB.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.1, "k": self.k_context}) # set here to that all parameters can be set in UI

  def success_msg_data_db(self):
    return "Split data into chunks. Download embedding model, create vector database and retriever chain."

# Model

In [None]:
class Model(Create_vector):
  def __init__(self):
    super().__init__()
    self.config = None


  def set_config(self):
    self.bnb_config = BitsAndBytesConfig(
                                        load_in_4bit = LOAD_IN_4BIT,
                                        bnb_4bit_use_double_quant = BNB_4BIT_USE_DOUBLE_QUANT,
                                        bnb_4bit_quant_type = BNB_4BIT_QUANT_TYPE,
                                        bnb_4bit_compute_dtype = torch.bfloat16
)




  def load_model(self):
    # self.set_config()
    if self.model_name_choice == "mistralai/Mistral-7B-Instruct-v0.1":
      try:
        print("-------------------------BEFORE LOADING MODEL LOCAL TRY----------------------------")
        self.load_base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", quantization_config = self.bnb_config, low_cpu_mem_usage=True)
        self.model = PeftModel.from_pretrained(self.load_base_model, "/content/drive/MyDrive/mistral_instruct_r32a32/Mistral-7B-v0.1-Fine_tuned_model_dir/checkpoint-100")
        tokenizer = AutoTokenizer.from_pretrained(
                                          "mistralai/Mistral-7B-Instruct-v0.1",
                                          padding_side=PADDING_SIDE,
                                          add_eos_token=ADD_EOS_TOKEN,
                                          add_bos_token=ADD_BOS_TOKEN,
                                                  )
        tokenizer.pad_token = tokenizer.eos_token
        self.tokenizer = tokenizer
        print("-------------------------AFTER LOADING MODEL LOCAL TRY----------------------------")
      except:
        print("-------------------------BEFORE LOADING MODEL EXCEPT----------------------------")

        self.model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1",
                                                          quantization_config = self.bnb_config, low_cpu_mem_usage=True)
        tokenizer = AutoTokenizer.from_pretrained(
                                                  "mistralai/Mistral-7B-Instruct-v0.1",
                                                  padding_side=PADDING_SIDE,
                                                  add_eos_token=ADD_EOS_TOKEN,
                                                  add_bos_token=ADD_BOS_TOKEN,
                                                  )
        tokenizer.pad_token = tokenizer.eos_token
        self.tokenizer = tokenizer
        print("-------------------------AFTER LOADING MODEL EXCEPT----------------------------")

    elif self.model_name_choice == "meta-llama/Llama-2-7b-chat":
      try:
        print("-------------------------BEFORE LOADING MODEL LOCAL TRY----------------------------")
        self.model = AutoModelForCausalLM.from_pretrained("model/llama/", quantization_config = self.bnb_config, low_cpu_mem_usage=True)
        tokenizer = AutoTokenizer.from_pretrained(
                                                  "model/llama/",
                                                  padding_side=PADDING_SIDE,
                                                  add_eos_token=ADD_EOS_TOKEN,
                                                  add_bos_token=ADD_BOS_TOKEN,
                                                  )
        tokenizer.pad_token = tokenizer.eos_token
        self.tokenizer = tokenizer
        print("-------------------------AFTER LOADING MODEL LOCAL TRY----------------------------")
      except:
        print("-------------------------BEFORE LOADING MODEL EXCEPT----------------------------")
        self.model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat", quantization_config = self.bnb_config, low_cpu_mem_usage=True)
        tokenizer = AutoTokenizer.from_pretrained(
                                                  "meta-llama/Llama-2-7b-chat",
                                                  padding_side=PADDING_SIDE,
                                                  add_eos_token=ADD_EOS_TOKEN,
                                                  add_bos_token=ADD_BOS_TOKEN,
                                                  )
        tokenizer.pad_token = tokenizer.eos_token
        self.tokenizer = tokenizer
        print("-------------------------AFTER LOADING MODEL EXCEPT----------------------------")

  def success_msg_load(self):
    return "Successfully load Model and Tokenizer"

# Pipeline and LLM

In [None]:
class Pipeline_and_llm(Model):
  def __init__(self):
    super().__init__()
    self.qa = None

  def create_pipeline(self):
    pipe = pipeline(
        model=self.model,
        task='text-generation',
        tokenizer=self.tokenizer,
        temperature=self.temperature,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
        max_new_tokens=self.max_new_tokens,  # mex number of tokens to generate in the output
        repetition_penalty=self.repetition_penalty,  # without this output begins repeating
        top_k=self.top_k,
        top_p=self.top_p,
    )
    self.pipeline = HuggingFacePipeline(pipeline=pipe)


  def prompt_template(self):
    if self.prompt_choices == "mistral_prompt":
      template_history = """Context information is below.
                            ---------------------
                            {context}
                            ---------------------
                            Given the context information and not prior knowledge, answer the query.
                            Query: {question}
                            Answer:"""

      template_diagnose = """[INST] You are a medical assistant chatbot having a conversation with human. Answer the question based on the context below, and if the question can't be answered based on the context, say "I don't know".
                    Context: {context}
                    Human: {question}
                    Chatbot:[/INST]"""
      self.history_prompt = PromptTemplate.from_template(template_history)
      self.diagnose_prompt = PromptTemplate.from_template(template_diagnose)

    elif self.prompt_choices == "llama_prompt":
      template_history = """[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>>
                    Question: {question}
                    Context: {context}
                    Answer: [/INST]"""

      template_diagnose = """[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>>
                    Question: {question}
                    Context: {context}
                    Answer: [/INST]"""
      self.history_prompt = PromptTemplate.from_template(template_diagnose)
      self.diagnose_prompt = PromptTemplate.from_template(template_diagnose)

    elif self.prompt_choices == "restrict":
      template_history = """[INST]You are a very helpful Medical health assistant. Your goal is to answer each question, using the following documents as context, as truthfully as you can. If you cannot answer the question or find relevant meaning in the presented texts, tell the user to try re-phrasing the question.
                    Context: {context}
                    History: {chat_history}
                    Question: {question}
                    Answer: [/INST]"""

      template_diagnose = """[INST]You are a very helpful Medical health assistant. Your goal is to answer each question, using the following documents as context, as truthfully as you can. If you cannot answer the question or find relevant meaning in the presented texts, tell the user to try re-phrasing the question.
                    Context: {context}
                    Question: {question}
                    Answer: [/INST]"""
      self.history_prompt = PromptTemplate.from_template(template_history)
      self.diagnose_prompt = PromptTemplate.from_template(template_diagnose)

    return self.history_prompt


  def llm_chain(self):
    #------------------------Diagnose prompt------------------------#
    self.diagnose_chain = LLMChain(llm=self.pipeline, prompt=self.diagnose_prompt)

    self.memory = ConversationSummaryBufferMemory(
    llm=self.pipeline, memory_key="chat_history", return_messages=True, max_token_limit=1000
    )


  def retriever_chain(self):
    self.retrieval_chain = (
      {"context": self.local_retriever, "question": RunnablePassthrough()}
      | self.diagnose_chain
    )

  def create_guardrails(self):
    print("---------------------------CREATE CONFIG-------------------------------------")
    self.config = RailsConfig.from_content(RAG_COLANG_CONTENT)
    self.guardrails = RunnableRails(self.config)
    print("---------------------------CREATE CONFIG DONE-------------------------------------")

  def history_retriever_chain(self, query):
    condense_prompt = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Preserve the original question in the answer sentiment during rephrasing. If the question is not related to medical health. Just say "Sorry! I don't know.
    Chat History:
    {chat_history}
    Follow Up Input: {question}
    Standalone question:"""
    question_generator_prompt = PromptTemplate.from_template(condense_prompt)
    self.question_generator_chain = LLMChain(llm=self.pipeline, prompt=question_generator_prompt)


    self.qa = ConversationalRetrievalChain.from_llm(
        self.pipeline,
        retriever=self.local_retriever,
        memory=self.memory,
        verbose=True,
        combine_docs_chain_kwargs={"prompt": self.history_prompt}
    )
    # self.chain_with_guardrails = RunnableRails(self.config, runnable=self.qa)



  def success_msg_pipeline(self):
    return "Successfully load model, tokenizer. Created Pipeline and chain"


  def diagnose_inference(self, query):
    self.query = query

    if self.query is None:
        raise ValueError("Query is not set. Please provide a query.")
    answer = self.retrieval_chain.invoke(self.query)
    # print(answer)
    context = answer.get("context")
    context_processed = [context[i].page_content.replace("\n", " ") for i in range(len(context))]
    meta_data = [context[i].metadata for i in range(len(context))]
    # post_processed_answer = answer["text"].split("Helpful Answer:")[-1].strip()
    return context_processed, meta_data, answer.get("text")

# Instance

In [None]:
Chatbot = Pipeline_and_llm()

In [None]:
def predict(message, history):
  global Chatbot
  gpt_response = Chatbot.qa(message)
  retrieve_content = Chatbot.local_retriever.invoke(message)

  if len(retrieve_content) == 0:
    preprocess_hist = gpt_response["chat_history"]
    if type(preprocess_hist[-1]) == langchain_core.messages.ai.AIMessage:
      gpt_response["chat_history"][-1].content = FIX_RESPONSE
      gpt_response["answer"] = FIX_RESPONSE
    return FIX_RESPONSE

  else:
    return gpt_response["answer"]

# Login

In [None]:
class login_setup:
  def HF_login(self, hf_token):
    self.hf_token = hf_token
    login(token = self.hf_token, add_to_git_credential=True)
    return "Successfully Login into HuggingFace"

In [None]:
LOGIN = login_setup()
LOGIN.HF_login(HUGGINGFACE_API_KEY)

# manually create vectordb

Learning Disability.pdf has error

In [None]:
# Chatbot.loading_dataset()
# file_extension, directory, _ = Chatbot.get_file_types()
# print(f"file Extention : {file_extension}")
# print("-------------------------------------")
# print(f"number of files : {directory}")
# print("-------------------------------------")
# print(f"Size : {_}")
# print("-------------------------------------")


In [None]:
# Chatbot.create_vector_store_index(file_extension)
# Chatbot.load_embedding_model()
# Chatbot.create_db()

# UI

Generate Long answer due to conversationbuffermemory

In [None]:
css = """
.feedback {margin-top: 140px !important}
.file_st {height: 50px; width: 10px;}
.feed {height: 25px; width: 70px; margin-top: 10px}
.fortextbox {width: 70px;}
.forprompt {height: 25px; width: 70px; margin-top: 200px}
.forparameter {margin-top: 50px}
.forqabutton {margin-top: 65px}
"""
#  margin-left: 80px; margin-right: 150px;

In [None]:
with gr.Blocks(theme = "finlaymacklon/smooth_slate", css=css) as demo:
  gr.Markdown("""
                # Mental health assistant
              """)

  with gr.Row(equal_height=False):
    # UI for Model Parameters
    with gr.Column(scale = 16):
      model_name = gr.Dropdown(choices = Chatbot.model_name_choice, label = "Select a model", info = "Please select model. i.e mistralai/Mistral-7B-v0.1", value = Chatbot.model_name_choice[0])
      model_and_tokenizer_load = gr.components.Textbox(label = "Message")
    with gr.Column(scale = 4, elem_classes="feedback"):
      model_and_tokenizer_load_btn = gr.Button("Reload", size='lg', min_width=10)
      model_and_tokenizer_load_btn.click(Chatbot.set_model_name_choice, inputs=[model_name]).then(Chatbot.set_config).then(Chatbot.load_model
                                    ).then(Chatbot.create_pipeline).then(Chatbot.success_msg_load, outputs=model_and_tokenizer_load, show_progress=True)

  with gr.Tab("Inference"):


    with gr.Tab("QA with history"):
      with gr.Row():
        with gr.Column():
          inference_checkbox = gr.Checkbox(label="Only look for answer in provided private data")
          chat = gr.ChatInterface(predict,
                                  chatbot=gr.Chatbot(render=False, height=1000))

    with gr.Tab("Diagnostic QA"):
      with gr.Row(equal_height=False):
        with gr.Column(scale=16):
          inference_input = gr.components.Textbox(label = "Query", info = "Write query related to your docs.")
        with gr.Column(scale=4, elem_classes="forqabutton"):
          btn = gr.Button("Generate")
      with gr.Row():
        with gr.Column():
          inference_output_question = gr.components.Textbox(label = "Relevant Text", lines=5)
          inference_output_context = gr.components.Textbox(label = "Metadata", lines=3)
          inference_output_text = gr.components.Textbox(label = "Answer")

      btn.click(Chatbot.diagnose_inference, inputs=[inference_input], outputs=[inference_output_question, inference_output_context, inference_output_text], show_progress=True)



  with gr.Tab("Prompts and Data Store"):

    with gr.Row(equal_height=False):
      with gr.Column(scale=16):
        prompt = gr.Dropdown(choices = Chatbot.prompt_choices, label = "Select Prompt", info = "Please provide your own prompt template if you want.", value = Chatbot.prompt_choices[0])
        prompt_text = gr.components.Textbox(label = "Prompt Text", lines = 4)
      with gr.Column(scale=2, elem_classes="forprompt"):
        prompts_btn = gr.Button("Set Prompt")
        prompts_btn.click(Chatbot.set_prompt_choices, inputs=[prompt]).then(Chatbot.llm_chain).then(Chatbot.retriever_chain
                        ).then(Chatbot.history_retriever_chain).then(Chatbot.prompt_template, outputs=[prompt_text])


    gr.Markdown("File must be PDF")
    gr.Markdown("Please update chunk size and chunk overlap if you want to change it according to your use case.")
    with gr.Row(equal_height=False):

      with gr.Column(scale = 16, elem_classes="file_st"):
        files_upload = gr.File(type='filepath', file_count="multiple")

      with gr.Column(scale = 2, elem_classes="feed"):
        data_btn = gr.Button("Reindex")


    with gr.Column():
      file_types_string = gr.components.Textbox(label = "File type")
      number_of_files = gr.components.Textbox(label = "Number of files")
      size_of_files = gr.components.Textbox(label = "Size of files")
      data_success_msg_db = gr.components.Textbox(label = "Message")

      data_btn.click(Chatbot.set_files, inputs=[files_upload]).then(Chatbot.loading_dataset).then(Chatbot.get_file_types, outputs=[file_types_string, number_of_files, size_of_files]
                    ).then(Chatbot.create_vector_store_index).then(Chatbot.load_embedding_model).then(Chatbot.load_db).then(Chatbot.reindex_db).then(Chatbot.llm_chain).then(Chatbot.retriever_chain
                    ).then(Chatbot.history_retriever_chain
                    ).then(Chatbot.success_msg_data_db, outputs=data_success_msg_db, show_progress=True)


  with gr.Tab("Parameters"):

    with gr.Row():
      with gr.Column():
        embedding_model = gr.components.Textbox(label = "Embedding Model", info = "Please provide a embedding model from Huggingface.", value = "sentence-transformers/all-mpnet-base-v2")

    gr.Markdown("## Model Parameters")
    with gr.Row():
      with gr.Column():
        temperature = gr.Slider(minimum = 0, maximum = 1, label = "Temperature", info = " Close to 1 general, close to zero specific to data.", value = 0.2, step = 0.1)
        max_new_tokens = gr.Slider(minimum = 100, maximum = 1024, label = "Max New Tokens", info = "Number of max new token you wan.", value = 100, step = 1)
        repetition_penalty = gr.Slider(minimum = 0.1, maximum = 100, label = "Repetition Penalty", info = "", value = 1.15, step = 0.01)

    with gr.Row():
      with gr.Column():
        top_k = gr.Slider(minimum = 1, maximum = 1000, label = "Top k", info = "", value = 100, step = 1)
        top_p = gr.Slider(minimum = 0.1, maximum = 1.0, label = "Top p", info = "", value = 0.95, step = 0.01)

    gr.Markdown("## Data Parameters")
    with gr.Row():
      with gr.Column():
        k_context = gr.Slider(minimum = 1, maximum = 10, label = "K context", info = "", value = 2, step = 1)
        chunk_size = gr.Slider(minimum = 100, maximum = 2048, label = "Chunk Size", info = "", value = 1024, step = 1)
        chunk_overlap = gr.Slider(minimum = 100, maximum = 2048, label = "Chunk Overlap", info = "", value = 100, step = 1)

    with gr.Row(equal_height=False):
      with gr.Column(scale=16):
        success_message = gr.components.Textbox(label = "Message")
      with gr.Column(scale=4, elem_classes="forparameter"):
        parameters_btn = gr.Button("Set Parameter")
    parameters_btn.click(Chatbot.set_embedding_model, inputs = [embedding_model]).then(Chatbot.set_temperature, inputs = [temperature]
                        ).then(Chatbot.set_max_new_tokens, inputs = [max_new_tokens]).then(Chatbot.set_repetition_penalty, inputs = [repetition_penalty]
                        ).then(Chatbot.set_top_k, inputs = [top_k]).then(Chatbot.set_k_context, inputs=[k_context]).then(Chatbot.set_chunk_size, inputs = [chunk_size]).then(Chatbot.set_chunk_overlap, inputs = [chunk_overlap]
                        ).then(Chatbot.create_pipeline).then(Chatbot.llm_chain).then(Chatbot.retriever_chain
                        ).then(Chatbot.history_retriever_chain).then(Chatbot.success_msg_params, outputs=[success_message])


        #----------------------------------------------------------------------------------------------------------------------------------------------------#
  demo.load(Chatbot.set_model_name_choice, inputs=[model_name]).success(Chatbot.set_config).success(Chatbot.load_model
            ).success(Chatbot.create_pipeline).success(Chatbot.success_msg_load, outputs=model_and_tokenizer_load, show_progress=True
            ).success(Chatbot.set_prompt_choices, inputs=[prompt]).success(Chatbot.prompt_template, outputs=[prompt_text]
            ).success(Chatbot.load_embedding_model).success(Chatbot.load_db).success(Chatbot.llm_chain).success(Chatbot.retriever_chain
            ).success(Chatbot.history_retriever_chain).success(Chatbot.success_msg_data_db, outputs=data_success_msg_db, show_progress=True)
  #----------------------------------------------------------------------------------------------------------------------------------------------------#
if __name__ == "__main__":
  demo.launch(share=True, debug=True)


# Insights


In [None]:
# def get_score(self, query):
#   docs = self.vectordb.similarity_search_with_relevance_scores(query)
#   print(docs)
#   # similarity_scores = docs.similarity_scores
#   # for i, doc in enumerate(docs):
#   #   print(f"Document {i+1}:")
#   #   print(f"Content: {doc.page_content}\n")
#   #   print(f"Similarity Score: {doc.similarity_scores[i]}\n")
#   return docs

In [None]:
# docs = Chatbot.vectordb.similarity_search_with_relevance_scores("What is mental health?")
# [item for item in docs if item[1] > 0]

In [None]:
# DirectoryLoader??

In [None]:
# s = """[INST] You are an Computer Science Conversational assistant. You strickly answers from provided context only. if no context is found. you politely refuse.
# Context: 029 2068 8401 @ncmh_wales (

# Being a teenager can be tough, but it shouldn’t feel hopeless. If you
# have been feeling sad most of the time for a few weeks or longer and you’re not able to concentrate or do the things you used to enjoy, talk to a trusted adult about depression.
# You’re not alone, and help is avail able. You can feel better.
# To get help, call or text the 988 Suicide & Crisis Lifeline at 988 or chat a t 988lifeline.org .
# nimh.nih.gov/depression
# NIMH Identifier No. OM 22-4321

# What is depression?
# Everyone feels sad or low sometimes, but these feelings usually pass with a little time.

# 10 GET EXCITED ABOUT MENTAL HEALTH RESEARCH! 18 12 8 424 80 33 12 24 80 433 56 12 212 24 56 16 33 1256A Cryptic Message About
# Your Future
# Solve the math problems, then use the letters below your answers to decode
# the hidden message in the blanks below.
# 12 + 12 5 x 0 8 + 8 16 + 16 6 + 6 1 x 9 30 - 2 25 + 8 5 + 1 90 - 9 55 + 8 60 + 20 10 + 8
# A B C D E F G H I J K L M
# 4 + 4 15 + 15 30 - 5 9 + 6 50 + 6 2 x 1 2 + 2 30 - 3 10 + 10 3 x 1 3 + 4 7 + 7 24 + 2
# N O P Q R S T U V W X Y Z
# History:
# Human: I am Huzaifa. I am feeling lost for few days.
# Chatbot:[/INST] Hello Huzaifa, I'm sorry to hear that you've been feeling lost for a few days. Is there anything specific that's troubling you that you would like to talk about?"""

In [None]:
# s.split("Chatbot:[/INST]")[-1]

In [None]:
# ConversationBufferMemory??

In [None]:
# !pip freeze | grep gradio

In [None]:
# loadDB.similarity_search("What is depression?")

In [None]:
  # def history_retriever_chain(self):
  #   condense_prompt = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Preserve the original question in the answer sentiment during rephrasing. If the question is not related to medical health. Just say "Sorry! I don't know.
  #   Chat History:
  #   {chat_history}
  #   Follow Up Input: {question}
  #   Standalone question:"""
  #   CONDENSE_QUESTION_PROMPT_CUSTOM = PromptTemplate.from_template(condense_prompt)
  #   self.qa = RetrievalQA.from_chain_type(
  #       self.application.llm,
  #       retriever=self.local_retriever_qa,
  #       memory=self.memory,
  #       verbose=True,
  #       combine_docs_chain_kwargs={"prompt": self.history_prompt},
  #       # response_if_no_docs_found = "Sorry! I don't know.",
  #       # rephrase_question = False,
  #       # condense_question_prompt=CONDENSE_QUESTION_PROMPT_CUSTOM
  #   )
  #   self.application.register_action(self.qa, name="qa_chain")

In [None]:
# PeftModel.from_pretrained??

In [None]:
    # if self.prompt_choices == "mistral_prompt":
    #   template_history = """[INST] You are a medical assistant chatbot having a conversation with human. Answer the question based on the context below, and if the question can't be answered based on the context, say "I don't know".
    #                 Context: {context}
    #                 History: {chat_history}
    #                 Human: {question}
    #                 Chatbot:[/INST]"""

    #   template_diagnose = """[INST] You are a medical assistant chatbot having a conversation with human. Answer the question based on the context below, and if the question can't be answered based on the context, say "I don't know".
    #                 Context: {context}
    #                 Human: {question}
    #                 Chatbot:[/INST]"""
    #   self.history_prompt = PromptTemplate.from_template(template_history)
    #   self.diagnose_prompt = PromptTemplate.from_template(template_diagnose)

# 1

In [None]:
# Chatbot.set_model_name_choice("mistralai/Mistral-7B-Instruct-v0.1")
# Chatbot.set_prompt_choices("mistral_prompt")
# Chatbot.set_config()
# Chatbot.load_model()
# Chatbot.create_pipeline()
# Chatbot.prompt_template()
# Chatbot.load_embedding_model()
# Chatbot.load_db()
# Chatbot.llm_chain()
# Chatbot.retriever_chain()
# Chatbot.create_guardrails()

In [None]:
# Chatbot.diagnose_inference("What does Glycogen Phosphorylase do?")
# query = "What is SAD?"

In [None]:
# Chatbot.chain_with_guardrails.get_prompts()
# import nest_asyncio
# nest_asyncio.apply()
# Chatbot.qa({"question" : query})

[HumanMessage(content='What would I experience during a clinical trial?'),

AIMessage(content=' During a clinical trial, the study team will track your health. Participating in a clinical trial may take more time than standard treatment, and you may have more tests and treatments than you would if you weren’t in a clinical trial. The study team also may ask you to keep a log of symptoms or other health measures, fill out forms about how you feel, or complete other tasks. You may need to travel or reside away from home to take part in a study. Clinical trials are research studies that look at new ways to prevent, detect, or treat diseases and conditions. Although individuals may benefit from being part of a')]

In [None]:
# La = [HumanMessage(content='What would I experience during a clinical trial?'),
#       AIMessage(content=' During a clinical trial, the study team will track your health. Participating in a clinical trial may take more time than standard treatment, and you may have more tests and treatments than you would if you weren’t in a clinical trial. The study team also may ask you to keep a log of symptoms or other health measures, fill out forms about how you feel, or complete other tasks. You may need to travel or reside away from home to take part in a study. Clinical trials are research studies that look at new ways to prevent, detect, or treat diseases and conditions. Although individuals may benefit from being part of a')
#       ]

In [None]:
# [i.content for i in La if type(i) == langchain_core.messages.ai.AIMessage]

In [None]:
# for i in La:
#   if type(i) == langchain_core.messages.ai.AIMessage:
#     i.content = "I don't know"

In [None]:
# AI = La[1]

In [None]:
# def predict(message, history):
#   global Chatbot
#   history_langchain_format = []
#   for human, ai in history:
#     history_langchain_format.append(HumanMessage(content=human))
#     history_langchain_format.append(AIMessage(content=ai))
#   history_langchain_format.append(HumanMessage(content=message))
#   gpt_response = Chatbot.qa(message)         # , "chat_history": history_langchain_format
#   retrieve_content = Chatbot.local_retriever.invoke(message)
#   print("------------------------RETRIEVE CONTENT-----------------------------------------------------")
#   print(retrieve_content)
#   # print("---------------------------------------HISTORY----------------------------------------------")
#   print(type(history_langchain_format))
#   print("------------------------------------------------------------------------------------------")
#   print(gpt_response)
#   print("------------------------------------------------------------------------------------------")
#   # print("------------------------------------------------------------------------------------------")
#   return gpt_response["answer"]  # .split("Chatbot")[-1]

In [None]:
# alp = {'question': 'What is SAD?', 'chat_history': [HumanMessage(content='What is sad'), AIMessage(content=" Sadness is a normal human emotion that is experienced when something disappointing, upsetting, or threatening happens. It's also known as depression. Everyone feels sad or low sometimes, but these feelings usually pass with a little time. However, if someone experiences persistent sadness or loss of interest or pleasure in activities for several weeks or longer, it may be a sign of depression. Depression is a serious illness that affects millions of people worldwide. There are different types of depression, including major depressive disorder, persistent depressive disorder, postpartum depression, seasonal affective disorder, and bipolar disorder. Symptoms"), HumanMessage(content='What is SAD?'), AIMessage(content='  Seasonal Affective Disorder (SAD) is a type of depression characterized by a recurrent seasonal pattern, with symptoms lasting about 4−5 months out of the year. It is more common in people with depression or bipolar disorder, especially bipolar II disorder. Because SAD, like other types of depression, is associated with disturbances in serotonin activity, antidepressant medications called selective serotonin reuptake inhibitors are sometimes used to treat symptoms.')], 'answer': '  Seasonal Affective Disorder (SAD) is a type of depression characterized by a recurrent seasonal pattern, with symptoms lasting about 4−5 months out of the year. It is more common in people with depression or bipolar disorder, especially bipolar II disorder. Because SAD, like other types of depression, is associated with disturbances in serotonin activity, antidepressant medications called selective serotonin reuptake inhibitors are sometimes used to treat symptoms.'}


In [None]:
# alp["chat_history"]

In [None]:
# ConversationalRetrievalChain??