### Necessary imports

In [2]:
!pip install -q -U torch==2.2.1 datasets transformers==4.36.1 tensorflow==2.15 langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline


from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


### Load quantized Mistal 7B

In [4]:
from huggingface_hub import login
login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.2'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

### Count number of trainable parameters

In [6]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipeline

In [7]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [8]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index

---



In [9]:
!playwright install
!playwright install-deps

Downloading Chromium 124.0.6367.29 (playwright build v1112)[2m from https://playwright.azureedge.net/builds/chromium/1112/chromium-linux.zip[22m
[1G155.3 MiB [] 0% 0.0s[0K[1G155.3 MiB [] 0% 55.1s[0K[1G155.3 MiB [] 0% 18.9s[0K[1G155.3 MiB [] 0% 14.9s[0K[1G155.3 MiB [] 0% 12.4s[0K[1G155.3 MiB [] 0% 9.9s[0K[1G155.3 MiB [] 1% 9.1s[0K[1G155.3 MiB [] 1% 8.5s[0K[1G155.3 MiB [] 1% 8.0s[0K[1G155.3 MiB [] 1% 8.4s[0K[1G155.3 MiB [] 2% 7.9s[0K[1G155.3 MiB [] 2% 7.8s[0K[1G155.3 MiB [] 2% 7.9s[0K[1G155.3 MiB [] 2% 7.8s[0K[1G155.3 MiB [] 3% 7.7s[0K[1G155.3 MiB [] 3% 7.9s[0K[1G155.3 MiB [] 3% 7.5s[0K[1G155.3 MiB [] 4% 7.5s[0K[1G155.3 MiB [] 4% 7.4s[0K[1G155.3 MiB [] 5% 7.2s[0K[1G155.3 MiB [] 5% 7.0s[0K[1G155.3 MiB [] 5% 6.9s[0K[1G155.3 MiB [] 6% 6.8s[0K[1G155.3 MiB [] 6% 6.7s[0K[1G155.3 MiB [] 7% 6.7s[0K[1G155.3 MiB [] 7% 6.6s[0K[1G155.3 MiB [] 7% 6.4s[0K[1G155.3 MiB [] 8% 6.5s[0K[1G155.3 MiB [] 8% 6.4s[0K[1G155.3 MiB [] 9% 6.2s[0K[1G155.3

In [10]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index

articles = ["https://es.euronews.com/tag/inteligencia-artificial"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [11]:
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [25]:
import pandas as pd
url = 'https://raw.githubusercontent.com/dlp1004/Aplicacion_de_chatbot_con_LLM_y_RAG_para_la_gestion_de_informacion_cientifica_de_COVID-19_en_PubMed/main/covid_abstracts.csv'
df = pd.read_csv(url)
# Dataset is now stored in a Pandas Dataframe



Unnamed: 0,title,abstract,url
0,Real-World Experience with COVID-19 Including...,This article summarizes the experiences of COV...,https://pubmed.ncbi.nlm.nih.gov/35008137
1,Successful outcome of pre-engraftment COVID-19...,Coronavirus disease 2019 COVID-19 caused by...,https://pubmed.ncbi.nlm.nih.gov/35008104
2,The impact of COVID-19 on oncology professiona...,BACKGROUND COVID-19 has had a significant imp...,https://pubmed.ncbi.nlm.nih.gov/35007996
3,ICU admission and mortality classifiers for CO...,The coronavirus disease 2019 COVID-19 which ...,https://pubmed.ncbi.nlm.nih.gov/35007991
4,Clinical evaluation of nasopharyngeal midturb...,In the setting of supply chain shortages of na...,https://pubmed.ncbi.nlm.nih.gov/35007959


In [43]:
### Celda de prueba, no ejecutar normalmente
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(df, page_content_column="abstract")
abstracts = loader.load()

In [33]:
text_splitter = CharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(abstracts)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

### Create PromptTemplate and LLMChain

In [49]:
prompt_template = """
### [INST] Instrucción: Eres un experto en COVID-19 responda en español la pregunta según sus conocimientos de abstracts de una serie de artículos publicados en pubMed:

{context}

### PREGUNTA:
{question} (responde en castellano) [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [50]:
llm_chain.invoke({"context": "", "question": " Cuales son los sintomas del Covid-19"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': '',
 'question': ' Cuales son los sintomas del Covid-19',
 'text': '\n### [INST] Instrucción: Eres un experto en COVID-19 responda en español la pregunta según sus conocimientos de abstracts de una serie de artículos publicados en pubMed:\n\n\n\n### PREGUNTA:\n Cuales son los sintomas del Covid-19 (responde en castellano) [/INST]\n ¡Hola! Soy un asistente de IA especializado en temas relacionados con el COVID-19. Según varios estudios publicados en PubMed, los principales síntomas de COVID-19 son: fiebre, tos seca, cansancio intensa, y dificultad para respirar o disnea. Además, algunas personas pueden presentar dolor de garganta, mal de cabeza, miopía, dolor de articulaciones, dolor de pecho, pérdida de olfato o gusto, erupciones cutáneas, diarrea, vómitos, confusión mental, y en casos graves, hipoxemia, hipotensión arterial, insuficiencia respiratoria y fallo múltiple orgánico. Sin embargo, es importante tener en cuenta que la presentación clínica de COVID-19 puede variar 

### Build RAG Chain

In [51]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("¿Cuales son los sintomas del covid-19? presentame los dtos en una lista")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [52]:
result['context']

[Document(page_content='Background and Objectives  Healthcare workers  HCWs  play important roles in mitigating the COVID-19 pandemic and are more likely to become infected with COVID-19  Mexico  among other countries  had a high incidence and prevalence of cases and deaths from this disease  Material and Methods  This retrospective study evaluated the clinical characteristics as well as the geographical distribution of cases  deaths  and active cases of COVID-19 in HCWs and non-HCWs using official information from the Ministry of Health of Mexico  Results  A total of 235 343 cases of COVID-19 were reported in healthcare workers  and 2 094 191 cases were reported in non-healthcare workers  A total of 76 0  of cases in healthcare workers occurred in those who were between 25 and 50 years of age  and 71 4  of deaths occurred in those who were 50 to 69 years of age  Among healthcare workers  the most frequent comorbidities were obesity  15 2    hypertension  10 9    and diabetes  6 8    N

In [53]:
resultado = result['text']

In [54]:
print(resultado)


### [INST] Instrucción: Eres un experto en COVID-19 responda en español la pregunta según sus conocimientos de abstracts de una serie de artículos publicados en pubMed:

[Document(page_content='Background and Objectives  Healthcare workers  HCWs  play important roles in mitigating the COVID-19 pandemic and are more likely to become infected with COVID-19  Mexico  among other countries  had a high incidence and prevalence of cases and deaths from this disease  Material and Methods  This retrospective study evaluated the clinical characteristics as well as the geographical distribution of cases  deaths  and active cases of COVID-19 in HCWs and non-HCWs using official information from the Ministry of Health of Mexico  Results  A total of 235 343 cases of COVID-19 were reported in healthcare workers  and 2 094 191 cases were reported in non-healthcare workers  A total of 76 0  of cases in healthcare workers occurred in those who were between 25 and 50 years of age  and 71 4  of deaths occ