In [17]:
import requests
import os
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader, JSONLoader
from langchain_community.vectorstores import FAISS

In [23]:
os.environ["OPENROUTER_API_KEY"] = "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"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# You need a REAL OpenAI API key for embeddings (not OpenRouter)
# Get it from: https://platform.openai.com/api-keys

In [19]:
loader = DirectoryLoader(
    "data/",
    glob="**/*.json",
    loader_cls=JSONLoader,
    show_progress=True,
    loader_kwargs={"jq_schema": ".", "text_content": False}
)
docs = loader.load()
print(len(docs), "documents loaded.")

100%|██████████| 25/25 [00:00<00:00, 30.66it/s]

25 documents loaded.





In [20]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(chunks,embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":10})

In [27]:
llm = ChatOpenAI(
    model = "openai/gpt-oss-120b:free",
    temperature = 0.6, 
    openai_api_key=os.environ["OPENROUTER_API_KEY"],
    openai_api_base=OPENROUTER_BASE_URL,
    max_tokens = 10000,
    default_headers={
        "HTTP-Referer": "http://localhost:3000", \
        "X-Title": "My RAG App"
    },
)

In [29]:
def rag_answer(query: str) -> str:
    relevant_docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in relevant_docs])
    prompt = f"""
    Answer the question based on the context below and give the accurate result from this  and list donw the jobs as bullet points and give a detailed description of about the job :\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer: 
     also if you dont know the answer tell the user that you don't know the answer for the specific question
    """
    response = llm.invoke(prompt)
    clr = response.content.replace("**","")
    return clr

query = "machine learning engineer jobs list them out"
answer = rag_answer(query)
print(answer)


Machine‑Learning‑Engineer positions found in the supplied data

| # | Job title (as listed) | Location / Work‑mode | Employment type | Core responsibilities & highlights |
|---|-----------------------|----------------------|-----------------|--------------------------------------|
| 1 | AI/ML Engineer (Remote) – Contractual (3 months) | Remote – India | 3‑month contract | • Build efficient, data‑driven AI systems that power predictive‑automation features.<br>• Apply strong statistics & programming skills to assess, analyse and organise large data sets.<br>• Design, test and optimise machine‑learning models and algorithms. |
| 2 | Senior Machine Learning Engineer | Chennai, Tamil Nadu, India | Full‑time (permanent) | • Research, design and implement advanced ML algorithms to solve business challenges.<br>• Perform ETL of large data sets into a PostgreSQL data‑warehouse.<br>• Deploy models to production, ensuring performance, scalability and monitoring. |
| 3 | Machine Learning Engineer 