In [8]:
import requests
import os
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader, JSONLoader
from langchain_community.vectorstores import FAISS

In [9]:
os.environ["OPENROUTER_API_KEY"] = "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"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# You need a REAL OpenAI API key for embeddings (not OpenRouter)
# Get it from: https://platform.openai.com/api-keys

In [10]:
loader = DirectoryLoader(
    "data/",
    glob="**/*.json",
    loader_cls=JSONLoader,
    show_progress=True,
    loader_kwargs={"jq_schema": ".", "text_content": False}
)
docs = loader.load()
print(len(docs), "documents loaded.")

  0%|          | 0/25 [00:00<?, ?it/s]

100%|██████████| 25/25 [00:00<00:00, 58.14it/s]

25 documents loaded.





In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(chunks,embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":10})

In [14]:
llm = ChatOpenAI(
    model = "openai/gpt-oss-120b:free",
    temperature = 0.6, 
    openai_api_key=os.environ["OPENROUTER_API_KEY"],
    openai_api_base=OPENROUTER_BASE_URL,
    max_tokens = 6000,
    default_headers={
        "HTTP-Referer": "http://localhost:3000", \
        "X-Title": "My RAG App"
    },
)

In [15]:
def rag_answer(query: str) -> str:
    relevant_docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in relevant_docs])
    prompt = f"""\
    Answer the question based on the context below and give the accurate result from this :\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:
    
    also if you dont know the answer tell the user that you don't know the answer for the specific question
    """
    response = llm.invoke(prompt)
    clr = response.content.replace("**","")
    return clr

query = "machine laerning engineer jobs list them out"
answer = rag_answer(query)
print(answer)


Machine‑Learning‑Engineer positions found in the provided data

| # | Job Title | Company (if mentioned) | Location (city, state) | Brief note |
|---|-----------|------------------------|------------------------|------------|
| 1 | Senior Machine Learning Engineer | – (not specified) | Chennai, Tamil Nadu | 7‑10 years experience; model development, data engineering, deployment. |
| 2 | ML Engineer | – (not specified) | Hyderabad, Telangana | 8‑12 years experience; design & build high‑performance ML pipelines & MLOps infrastructure. |
| 3 | Machine Learning Engineer | Stier Solutions Inc. | Hyderabad, Telangana | Full‑time, on‑site/hybrid; design, train, and deploy scalable ML models. |
| 4 | Machine Learning Engineer | Eximietas Design | – (India, location not detailed) | Part of a technology‑consulting firm working on chip design, firmware, cloud, cybersecurity & AI/ML. |
| 5 | Machine Learning Engineer | – (appears in multiple listings) | – (various Indian locations) | Generic entry 