<a href="https://colab.research.google.com/github/cbadenes/semantic-report-search/blob/main/data/analysis/44_RAG_from_Excel_with_LangChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
# Step 1: Install dependencies
!pip install langchain langchain_huggingface langchain_community faiss-cpu pandas openpyxl llama-cpp-python tiktoken huggingface_hub


Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.3.0


In [18]:
# Step 2: Load the Excel spreadsheet
import pandas as pd

df = pd.read_excel("Reporting_Inventory.xlsx", sheet_name="Views")
df.head()


Unnamed: 0,ID Data Product,Report Name,Product Owner,PBIX_File,Report View,Description,Category,Status,Rename,Dimensions,KPIs,Other Terms,Filters,Tags,Priority
0,RPPBI0032,Feeder Market - 2024,Jonathan Shields,LifeReport.pbix,CRITERIA,Methodolody and definition of the algorithim o...,Informative,Productive,,,,,,,Priority 1
1,RPPBI0032,Feeder Market - 2024,Jonathan Shields,LifeReport.pbix,DESTINATION_OF_FEEDER_MARKETS,View focused on understand the performance by ...,Functional,Productive,,"Hotel, month, Feeder Market, Segment, Channel ...","Total Revenue, Room Revenue, RN, Lead Time, Le...",,,,Priority 1
2,RPPBI0032,Feeder Market - 2024,Jonathan Shields,LifeReport.pbix,EXECUTIVE VIEW,Global view to understand Feeder Market Perfor...,Executive,Productive,,"Hotel, month, Feeder Market, Segment, Channel ...","Total Revenue, Room Revenue, RN, Lead Time, Le...",,,,Priority 1
3,RPPBI0032,Feeder Market - 2024,Jonathan Shields,LifeReport.pbix,FEEDER MARKET FLOWS,View focused on understanding the booking beha...,Functional,Productive,,"Hotel, month, Feeder Market, Segment, Channel ...","Total Revenue, Room Revenue, RN, Lead Time, Le...",,,,Priority 1
4,RPPBI0032,Feeder Market - 2024,Jonathan Shields,LifeReport.pbix,FEEDER_MARKET_DETAIL,Detail view of Feeder Markets by Destination i...,Functional,Productive,,"Hotel, month, Feeder Market, Segment, Channel ...","Total Revenue, Room Revenue, RN, Lead Time, Le...",,,,Priority 1


In [19]:
# Step 3: Convert rows to LangChain Documents
from langchain.schema import Document

def row_to_document(row):
    content = "\n".join([f"{col}: {row[col]}" for col in row.index if pd.notnull(row[col])])
    return Document(page_content=content)

documents = [row_to_document(row) for _, row in df.iterrows()]
print(f"{len(documents)} documents created.")
print("Document:\n", documents[0])

1486 documents created.
Document:
 page_content='ID Data Product: RPPBI0032
Report Name: Feeder Market - 2024
Product Owner: Jonathan Shields
PBIX_File: LifeReport.pbix
Report View: CRITERIA
Description: Methodolody and definition of the algorithim of Feeder Market
Category: Informative
Status: Productive
Priority: Priority 1'


In [20]:
# from huggingface_hub import login
# login(token="hf_xxx...")

# Step 4: Embed documents using a local embedding model (default: huggingface-based)
from langchain_huggingface import HuggingFaceEmbeddings

# You can change the model to another one like 'sentence-transformers/all-MiniLM-L6-v2'
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(documents, embedding_model)


In [24]:
# Download the GGUF model from Hugging Face:
# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF

from huggingface_hub import hf_hub_download

model_path = hf_hub_download(
    repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
    #filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    filename="mistral-7b-instruct-v0.1.Q2_K.gguf"
    local_dir="/content"  # o cualquier otra ruta
)

print("Model downloaded in:", model_path)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (<ipython-input-24-1627421904>, line 9)

In [21]:
# Step 5: Load a local LLM using llama-cpp
from langchain.llms import LlamaCpp

# Make sure you have a GGUF model (like mistral-7b-instruct-v0.1.Q4_K_M.gguf)
MODEL_PATH = "/content/mistral-7b-instruct-v0.1.Q4_K_M.gguf"  # Change to your model's path

llm = LlamaCpp(
    model_path=MODEL_PATH,
    temperature=0.7,  # Controls the randomness of the model's output. Lower values make responses more deterministic.
    max_tokens=512,   # The maximum number of tokens the model is allowed to generate in its response.
    top_p=0.95,       # Limits the sampling to the smallest possible set of tokens whose cumulative probability is ≥ top_p.
    n_ctx=2048,       # Maximum number of context tokens (prompt + output) that the model can handle. This should not exceed the context length of your model (e.g. 2048, 4096, 8192).
    verbose=True      # If True, prints debug-level logs during inference (token generation steps, etc.).
)


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /content/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 l

In [22]:
# Step 6: Build the RetrievalQA chain
from langchain.chains import RetrievalQA

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)


In [23]:
# Step 7: Ask your question
query = "Which reports are related to financial resources?"
response = qa_chain.invoke({"query": query})

print("Answer:", response)


llama_perf_context_print:        load time =  325599.24 ms
llama_perf_context_print: prompt eval time =  325599.04 ms /   908 tokens (  358.59 ms per token,     2.79 tokens per second)
llama_perf_context_print:        eval time =   21402.47 ms /    32 runs   (  668.83 ms per token,     1.50 tokens per second)
llama_perf_context_print:       total time =  347035.24 ms /   940 tokens


Answer:
{'query': 'Which reports are related to financial resources?', 'result': ' The reports "Budget 2025 Report" and "Daily Revenue Report 2025" are related to financial resources.'}


In [None]:
# Step 7: Ask your question
query = "What views provide KPIs for service performance?"
response = qa_chain.invoke({"query": query})

print("Answer:", response)
