In [1]:
!pip install -qU transformers accelerate sentence-transformers
!pip install -qU langchain-community langchain-text-splitters chromadb pypdf


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.6/486.6 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.5/323.5 kB[0m [31m21.4 MB/s[0m eta [3

In [2]:
from google.colab import drive
import os

drive.mount('/content/drive')

BASE_DIR = "/content/drive/MyDrive/RAG"
RAW_DIR  = f"{BASE_DIR}/data/raw"
PERSIST  = f"{BASE_DIR}/index/chroma"

os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(PERSIST, exist_ok=True)

print("BASE_DIR:", BASE_DIR)
print("Coloca tus PDFs en:", RAW_DIR)


Mounted at /content/drive
BASE_DIR: /content/drive/MyDrive/RAG
Coloca tus PDFs en: /content/drive/MyDrive/RAG/data/raw


In [3]:
# PASO C — Embeddings gratis + Ingesta (PDF → chunks → Chroma persistente)

import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Rutas base (ajusta si usaste otras en pasos previos)
BASE_DIR = "/content/drive/MyDrive/RAG"
RAW_DIR  = f"{BASE_DIR}/data/raw"
PERSIST  = f"{BASE_DIR}/index/chroma"
COLLECTION = "rag_local"

os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(PERSIST, exist_ok=True)

# 1) Cargar todos los PDFs del directorio
pdf_files = [f for f in os.listdir(RAW_DIR) if f.lower().endswith(".pdf")]
assert pdf_files, f"No hay PDFs en {RAW_DIR}. Sube al menos uno."

docs = []
for fname in pdf_files:
    loader = PyPDFLoader(os.path.join(RAW_DIR, fname))
    docs.extend(loader.load())

print(f"Documentos cargados: {len(docs)}")

# 2) Split a chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=120,
    add_start_index=True
)
chunks = splitter.split_documents(docs)
print(f"Chunks generados: {len(chunks)}")

# 3) Embeddings (gratis) con wrapper de LangChain
emb = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    encode_kwargs={"normalize_embeddings": True}
)

# 4) Crear índice persistente en Chroma (OJO: aquí va 'embedding=')
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=emb,
    collection_name=COLLECTION,
    persist_directory=PERSIST,
)
vectorstore.persist()
print("Índice Chroma persistido en:", PERSIST)



Documentos cargados: 941
Chunks generados: 2236


  emb = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Índice Chroma persistido en: /content/drive/MyDrive/RAG/index/chroma


  vectorstore.persist()


In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Modelo DeepSeek distilado chico (viable en Colab sin pagar GPU)
MODEL_IDS = [
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",  # preferido
    "Qwen/Qwen2.5-1.5B-Instruct"                  # respaldo si el anterior no carga
]

model_id = None
for mid in MODEL_IDS:
    try:
        tokenizer = AutoTokenizer.from_pretrained(mid, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            mid,
            trust_remote_code=True,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto"
        )
        model_id = mid
        break
    except Exception as e:
        print(f"No se pudo cargar {mid}: {e}")

assert model_id is not None, "No se pudo cargar un modelo DeepSeek/Qwen 1.5B. (Prueba nuevamente o habilita GPU)."

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.2,
    top_p=0.9,
    do_sample=True
)

device_info = "GPU" if torch.cuda.is_available() else "CPU"
print(f"Modelo cargado: {model_id} | Dispositivo: {device_info}")


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Device set to use cpu


Modelo cargado: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | Dispositivo: CPU


In [5]:
!pip install -U langchain-chroma

Collecting langchain-chroma
  Downloading langchain_chroma-0.2.6-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_chroma-0.2.6-py3-none-any.whl (12 kB)
Installing collected packages: langchain-chroma
Successfully installed langchain-chroma-0.2.6


In [None]:
# PASO E — Consulta RAG (retrieve → prompt → DeepSeek) con la API moderna

import os
from langchain_chroma import Chroma  # <-- wrapper nuevo
from langchain_community.embeddings import HuggingFaceEmbeddings

# 1) Reconstruir el MISMO objeto de embeddings usado en el Paso C
BASE_DIR = "/content/drive/MyDrive/RAG"
PERSIST  = f"{BASE_DIR}/index/chroma"
COLLECTION = "rag_local"

emb = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    encode_kwargs={"normalize_embeddings": True}
)

# 2) Cargar índice persistente con el nuevo wrapper
vs = Chroma(
    collection_name=COLLECTION,
    embedding_function=emb,          # OJO: aquí va 'embedding_function=' y es un OBJETO, no una función suelta
    persist_directory=PERSIST,
)

# 3) Retriever (API nueva). Evitamos get_relevant_documents (deprecado).
retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# 4) Utilidades para formatear contexto con citas
def format_ctx(docs):
    out = []
    for d in docs:
        src = os.path.basename(d.metadata.get("source","desconocido"))
        page = d.metadata.get("page", None)
        snippet = d.page_content.strip().replace("\n", " ")
        out.append(f"[p.{page}] {snippet[:1200]} (fuente: {src})")
    return "\n\n".join(out)

def build_prompt(question: str, context: str | None = None, *, spanish: bool = True) -> str:
    lang_line = "Responde SIEMPRE en español neutro, técnico y conciso." if spanish else "Respond in clear, concise English."
    grounding = (
        "Responde SOLO con información del contexto proporcionado. "
        "Si algo no está en el contexto, responde literalmente: 'No hay evidencia en las fuentes'. "
        "Incluye citas entre corchetes con número de página [p.X] donde corresponda."
    ) if context else (
        "Responde con precisión y evita inventar datos. Si no tienes certeza, dilo explícitamente."
    )
    system = f"{lang_line} {grounding}"

    if context:
        user = f"Pregunta: {question}\n\nContexto:\n{context}\n\nRespuesta concisa y citada:"
    else:
        user = f"Pregunta: {question}\n\nRespuesta concisa:"

    # Formato simple tipo chat
    return f"<|system|>\n{system}\n<|user|>\n{user}\n<|assistant|>\n"


# 5) Consulta (API nueva: retriever.invoke)
query = "Resúmeme en 5 líneas los puntos clave del documento y cita páginas."
ctx_docs = retriever.invoke(query)
context = format_ctx(ctx_docs)

prompt = build_prompt(query, context)

# 6) Generación con DeepSeek (usa el pipeline 'generator' que cargaste en el Paso D)
out = generator(prompt)[0]["generated_text"]

# 7) Post-procesar: cortar hasta el final de la respuesta del assistant (opcional)
if "<|assistant|>" in out:
    out = out.split("<|assistant|>")[-1].strip()

print(out)

Okay, so I need to respond to the user's query. They provided a context with some documents and a specific question about summarizing key points of a document in five lines. They also included some instructions on how to format the response, like using a specific citation format.

First, I should read through the context carefully to understand the content. The documents seem to be related to electric power transmission lines, possibly from a manual or technical guide. The user is asking for a summary of the key points, so I need to extract the main ideas from the provided text.

Looking at the context, there are multiple entries with the same page number and title, which might be a repetition. The main document appears to be on electric power transmission lines, specifically about the installation and maintenance processes. The user also mentions a program for reviewing inspections, which is a separate section.

The user's question is about summarizing the key points of a document in 

In [None]:
import os
import time
from datetime import datetime

# ---------- util: recuperar con scores (opcional) ----------
def retrieve_with_scores(query: str, k: int = 6):
    """Intenta traer documentos + score. Si no se puede, cae a retriever.invoke."""
    try:
        results = vs.similarity_search_with_relevance_scores(query, k=k)
        # results: list[(Document, score)]
        docs = [d for d, _ in results]
        scores = [float(s) for _, s in results]
        return docs, scores
    except Exception:
        docs = retriever.invoke(query)
        return docs, [None]*len(docs)

def show_sources(docs, scores):
    lines = []
    for i, d in enumerate(docs):
        src = os.path.basename(d.metadata.get("source","desconocido"))
        page = d.metadata.get("page", None)
        sc  = scores[i]
        lines.append(f"- {src} [p.{page}]  score={sc if sc is not None else 'N/A'}")
    return "\n".join(lines)

# ---------- logging opcional a Drive ----------
BASE_DIR = "/content/drive/MyDrive/RAG"
LOG_DIR  = f"{BASE_DIR}/logs"
os.makedirs(LOG_DIR, exist_ok=True)
session_id = datetime.now().strftime("%Y%m%d-%H%M%S")
log_path = f"{LOG_DIR}/chat_{session_id}.txt"

def log(line: str):
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(line.rstrip()+"\n")

print("Chat interactivo listo.")
print("Opciones: [1]=NO-RAG  [2]=RAG  [3]=AMBOS  | Enter vacío para salir.")
print(f"Logging en: {log_path}")

while True:
    q = input("\nPregunta > ").strip()
    if not q:
        print("Salida.")
        break

    mode = input("Modo [1=NO-RAG, 2=RAG, 3=AMBOS] > ").strip() or "3"
    t0 = time.time()

    if mode == "1":
        ans_no = ask_llm_no_rag(q, spanish=True)
        print("\n=== SIN RAG ===")
        print(ans_no)
        log(f"[{datetime.now()}] Q: {q}\n--- NO-RAG ---\n{ans_no}\n")

    elif mode == "2":
        docs, scores = retrieve_with_scores(q, k=6)
        ctx = format_ctx(docs)
        ans_rag = ask_llm_rag(q, spanish=True)
        print("\n=== CON RAG ===")
        print(ans_rag)
        print("\nFuentes:")
        print(show_sources(docs, scores))
        log(f"[{datetime.now()}] Q: {q}\n--- RAG ---\n{ans_rag}\n[SOURCES]\n{show_sources(docs, scores)}\n")

    else:  # "3" ambos
        ans_no = ask_llm_no_rag(q, spanish=True)
        docs, scores = retrieve_with_scores(q, k=6)
        ctx = format_ctx(docs)
        ans_rag = ask_llm_rag(q, spanish=True)

        print("\n=== SIN RAG ===")
        print(ans_no)
        print("\n=== CON RAG ===")
        print(ans_rag)
        print("\nFuentes:")
        print(show_sources(docs, scores))

        log(f"[{datetime.now()}] Q: {q}\n--- NO-RAG ---\n{ans_no}\n--- RAG ---\n{ans_rag}\n[SOURCES]\n{show_sources(docs, scores)}\n")

    dt = time.time() - t0
    print(f"\n Tiempo total: {dt:.2f}s")


Chat interactivo listo.
Opciones: [1]=NO-RAG  [2]=RAG  [3]=AMBOS  | Enter vacío para salir.
Logging en: /content/drive/MyDrive/RAG/logs/chat_20251002-020120.txt


In [None]:
def ask_llm_no_rag(question: str, spanish: bool = True) -> str:
    """Asks the LLM directly without RAG."""
    prompt = build_prompt(question, context=None, spanish=spanish)
    out = generator(prompt)[0]["generated_text"]
    if "<|assistant|>" in out:
        out = out.split("<|assistant|>")[-1].strip()
    return out

def ask_llm_rag(question: str, spanish: bool = True) -> str:
    """Asks the LLM with RAG context."""
    docs, scores = retrieve_with_scores(question, k=6)
    context = format_ctx(docs)
    prompt = build_prompt(question, context=context, spanish=spanish)
    out = generator(prompt)[0]["generated_text"]
    if "<|assistant|>" in out:
        out = out.split("<|assistant|>")[-1].strip()
    return out