### Inital import of libraries

In [1]:
# Install primary libraries
!pip install -q -U torch==2.3.0 datasets tensorflow==2.15 langchain sentence_transformers faiss-cpu
!pip install transformers -U
!pip install git+https://github.com/huggingface/diffusers

# Install additional libraries for optimization
!pip install peft==0.4.0 bitsandbytes==0.41.3 trl==0.4.7

# Install community extensions for LangChain
!pip install -U langchain-community

# Install gradio for GUI
!pip install gradio typer==0.12.3 click==8.0

Collecting git+https://github.com/huggingface/diffusers
  Cloning https://github.com/huggingface/diffusers to /tmp/pip-req-build-a2ep_6qv
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers /tmp/pip-req-build-a2ep_6qv
  Resolved https://github.com/huggingface/diffusers to commit 2e2684f014a62bdb690e6b439e946068eb304fb6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:

import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

### Loading Mistal 7B as primary model

In [3]:
# Import the login function from the huggingface_hub library
from huggingface_hub import login

# Log in to Hugging Face Hub using your token
# Note: Replace "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" with your actual Hugging Face token.
# Your token can be obtained from your Hugging Face account settings under "Access Tokens".
login("hf_yTMbtUGuOdnxkdqfakepKWKxgtEPtECYNP")


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
#Define the model name
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

# Initialize the tokenizer from the pre-trained model
# 'trust_remote_code=True' allows the use of custom code from the model repository
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Set the padding token to be the same as the end-of-sequence token
tokenizer.pad_token = tokenizer.eos_token

# Set the side on which to apply padding
tokenizer.padding_side = "right"

# Define bitsandbytes configuration parameters for quantization
# Quantization is a technique used to reduce the memory and computation requirements of deep learning models
# Here, we're using 4-bit quantization
bnb_configuration = BitsAndBytesConfig(
    load_in_4bit=True,  # Load the model with 4-bit precision
    bnb_4bit_compute_dtype=torch.float16,  # Use 16-bit floats for computation
    bnb_4bit_quant_type="nf4",  # Specify the quantization type
    bnb_4bit_use_double_quant=True,  # Use double quantization for better precision
)

# Load the pre-trained model with the specified quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_configuration,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

###Text generation pipeline with Mistral-7B and desired Hyperparameters

In [5]:
# Import necessary libraries
from transformers import pipeline

# Create a text generation pipeline using the loaded model and tokenizer the pipeline abstraction simplifies the process of using models for specific tasks such as text generation, translation, etc.
model_pipeline = pipeline(
    model=model,           # The pre-trained model loaded with quantization
    tokenizer=tokenizer,   # The tokenizer associated with the model
    task="text-generation", # Specify the task as text generation
    temperature=0.2,       # Control the randomness of predictions lower temperature as in the example results in less random and more deterministic outputs
    repetition_penalty=1,  # Penalize repetition in the generated text to improve coherence
    return_full_text=True, # Return the full text generated by the model
    max_new_tokens=1000,   # Limit the maximum number of new tokens generated in the output
)

In [6]:
# Create a HuggingFacePipeline instance using the previously defined model pipeline
mistral_llm = HuggingFacePipeline(pipeline=model_pipeline)

  warn_deprecated(


### Load the csv of abstracts. Create embeddings and store them in FAISS vectorstore

---



In [7]:
# Import the pandas library for data manipulation and analysis
import pandas as pd

# Define the URL of the CSV file containing COVID-19 abstracts
url = 'https://raw.githubusercontent.com/dlp1004/Aplicacion_de_chatbot_con_LLM_y_RAG_para_la_gestion_de_informacion_cientifica_de_COVID-19_en_PubMed/main/data/covid_abstracts.csv'

# Load the CSV file from the URL into a pandas DataFrame
df = pd.read_csv(url)

In [8]:
# Import necessary components from langchain_community
from langchain_community.document_loaders import DataFrameLoader

# Create a DataFrameLoader instance to load data from a DataFrame
loader = DataFrameLoader(df, page_content_column="abstract")

# Load the abstracts from the DataFrame using the loader instance
abstracts = loader.load()

In [9]:
# Import necessary components
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Create a FAISS index and load abstracts into it
# Use HuggingFace embeddings to convert abstracts into vectors
vectorstore = FAISS.from_documents(
    abstracts,  # List of abstracts to be indexed
    HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')  # Embedding model to convert text into vectors
)

# Create a retriever from the vectorstore
# Set the search type to "similarity_score_threshold" and define the threshold
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",  # Define the type of search to perform
    search_kwargs={"score_threshold": 0.4}  # define the similarity threshold that must be surpassed
)

  warn_deprecated(


### Prompts and LLMChain


In [10]:
prompt_template = """
### [INST] Instruction: You are an expert in microbiology. Please answer the question based ONLY on your knowledge of abstracts from a series of articles in this prompt don't use information of any other kind, if there were not any articles passed in this prompt just indicate that:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create an LLMChain (Language Model Chain) instance
llm_chain = LLMChain(
    llm=mistral_llm, # Specify the language model (LLM) to use for text generation
    prompt=prompt # Use the created prompt template for generating prompts
)

  warn_deprecated(


### Build RAG Chain

In [11]:
# Define the RAG (Retrieval Augmented Generation) chain using LangChain syntax
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}  # Define the components of the chain
    | llm_chain  # Specify the LLMChain instance as the final step in the pipeline
)

In [12]:
import locale

# Define a custom function to override the behavior of locale.getpreferredencoding(), this is done to process the data uniformly as UTF-8
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"

# Override locale.getpreferredencoding() with the custom function
locale.getpreferredencoding = getpreferredencoding

In [13]:
import gradio as gr
import random

### Deployment of GUI Demo

In [14]:
# Building of the Gradio GUI demo
def model_response(user_prompt, history):
    result = rag_chain.invoke(user_prompt)
    return result['text']

gr.ChatInterface(model_response).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://3b8bb9fcb6ff831d54.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


