In [1]:
%%capture
!pip install unsloth langchain langchain-huggingface faiss-cpu langchain-community

In [2]:
import shutil
# I unzip the zip file containing the model
shutil.unpack_archive("lora_pharma.zip", "lora_pharma", "zip")

In [5]:
from unsloth import FastLanguageModel
import torch
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [6]:
# I load the model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "lora_pharma",
    max_seq_length = 2048,
    load_in_4bit = True,
)

==((====))==  Unsloth 2026.1.4: Fast Gemma3 patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Unsloth: Gemma3 does not support SDPA - switching to fast eager.


model.safetensors:   0%|          | 0.00/4.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

In [7]:
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma3ForConditionalGeneration(
      (model): Gemma3Model(
        (vision_tower): SiglipVisionModel(
          (vision_model): SiglipVisionTransformer(
            (embeddings): SiglipVisionEmbeddings(
              (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
              (position_embedding): Embedding(4096, 1152)
            )
            (encoder): SiglipEncoder(
              (layers): ModuleList(
                (0-26): 27 x SiglipEncoderLayer(
                  (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
                  (self_attn): SiglipAttention(
                    (k_proj): lora.Linear(
                      (base_layer): Linear(in_features=1152, out_features=1152, bias=True)
                      (lora_dropout): ModuleDict(
                        (default): Identity()
                      )
                      (lora_A): ModuleDict(
  

In [8]:
# I load the vector database (FAISS)
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

vector_db = FAISS.load_local("faiss_pharmacy", embeddings, allow_dangerous_deserialization=True)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Modelo y Base de Datos cargados correctamente.


In [41]:
def pharmaceutical_question(question, k=5, critical_threshold=1.5):

    # Retrieval
    docs_with_score = vector_db.similarity_search_with_score(question, k=k)

    # I extract the best score (the one of the first document)
    best_score = docs_with_score[0][1]

    # Validation of the threshold
    if best_score > critical_threshold:
        return "The question is out of my domain, and I cannot find any related information.", []

    # If it passes the filter, I prepare the context
    docs = [doc for doc, score in docs_with_score]
    retrieved_context = "\n\n".join([doc.page_content for doc in docs])

    # Prompt Preparation
    messages = [
    {"role": "system", "content": "You are a precise pharmaceutical assistant. If the answer is not in the context, say that you do not know, do not invent."},
    {"role": "user", "content": f"Context: {retrieved_context}\n\nQuestion: {question}"},
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(text=prompt, return_tensors="pt").to("cuda")

    #  Generation
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        use_cache=True,
        temperature=0.1
    )

    answer = tokenizer.batch_decode(outputs)[0].split("<start_of_turn>model\n")[-1]
    return answer.replace("<end_of_turn>", "").replace("<eos>", "").strip(), docs

print("Pipeline configured")

Pipeline configured


In [42]:
user_question = "What is the composition of Aciloc 150?"

answer, sources = pharmaceutical_question(user_question)

print(f"\n--- QUESTION ---\n{user_question}")
print(f"\n--- ASSISTANT'S RESPONSE ---\n{answer}")
print("\n--- SOURCES ---")
for i, doc in enumerate(sources):
    print(f"Fuente {i+1}: {doc.metadata.get('source', 'Unknown')}")


--- QUESTION ---
What is the composition of Aciloc 150?

--- ASSISTANT'S RESPONSE ---
The composition of Aciloc 150 is Ranitidine (150mg).

--- SOURCES ---
Fuente 1: Kaggle_Dataset
Fuente 2: Kaggle_Dataset
Fuente 3: Kaggle_Dataset
Fuente 4: Kaggle_Dataset
Fuente 5: Kaggle_Dataset


In [43]:
user_question = "What are the contraindications of Azithromycin?"

answer, sources = pharmaceutical_question(user_question)

print(f"\n--- QUESTION ---\n{user_question}")
print(f"\n--- ASSISTANT'S RESPONSE ---\n{answer}")
print("\n--- SOURCES ---")
for i, doc in enumerate(sources):
    print(f"Fuente {i+1}: {doc.metadata.get('source', 'Unknown')}")


--- QUESTION ---
What are the contraindications of Azithromycin?

--- ASSISTANT'S RESPONSE ---
The provided context does not contain information about the contraindications of Azithromycin. It only mentions the uses and side effects of Azithromycin in the context of Azithral 200 Liquid and ATM 200 Oral Suspension, which are both formulations of Azithromycin. Therefore, I cannot answer the question about the contraindications of Azithromycin based on the given context.

--- SOURCES ---
Fuente 1: Katzung - Basic and Clinical Pharmacology 12th Edition (2012).pdf
Fuente 2: Kaggle_Dataset
Fuente 3: Kaggle_Dataset
Fuente 4: Kaggle_Dataset
Fuente 5: Kaggle_Dataset


In [44]:
user_question = "Who is Messi?"

answer, sources = pharmaceutical_question(user_question)

print(f"\n--- QUESTION ---\n{user_question}")
print(f"\n--- ASSISTANT'S RESPONSE ---\n{answer}")
print("\n--- SOURCES ---")
for i, doc in enumerate(sources):
    print(f"Fuente {i+1}: {doc.metadata.get('source', 'Unknown')}")


--- QUESTION ---
Who is Messi?

--- ASSISTANT'S RESPONSE ---
The question is out of my domain, and I cannot find any related information.

--- SOURCES ---
